cbz2ebook: Replace with python implementation
It has multiple advantages: * It processes the images in memory, without writing intermediary files * It supports multithreading (though zlib fails non-reproducibly with some files, so it falls back to single-threading) * It does not spawn an army of subprocesses * It shows a better progress bar
This commit is contained in:
parent
1cb0ab9cf9
commit
2b5735a380
97
cbz2ebook.py
Executable file
97
cbz2ebook.py
Executable file
|
@ -0,0 +1,97 @@
|
||||||
|
#!/usr/bin/env cached-nix-shell
|
||||||
|
#!nix-shell -i python3 -p python3 python3Packages.pillow python3Packages.tqdm
|
||||||
|
from PIL import Image
|
||||||
|
from concurrent.futures import ThreadPoolExecutor
|
||||||
|
from tqdm import tqdm
|
||||||
|
from zipfile import ZipFile
|
||||||
|
import argparse
|
||||||
|
import sys
|
||||||
|
import zlib
|
||||||
|
|
||||||
|
|
||||||
|
def should_scale(in_width, in_height, out_width, out_height):
|
||||||
|
return not (
|
||||||
|
(in_width == out_width and in_height <= out_height)
|
||||||
|
or (in_height == out_height and in_width <= out_width)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def scaled_size(in_width, in_height, out_width, out_height):
|
||||||
|
in_aspect_ratio = float(in_width) / float(in_height)
|
||||||
|
out_aspect_ratio = float(out_width) / float(out_height)
|
||||||
|
|
||||||
|
if in_aspect_ratio > out_aspect_ratio:
|
||||||
|
height = out_width / in_aspect_ratio
|
||||||
|
width = out_width
|
||||||
|
else:
|
||||||
|
width = out_height * in_aspect_ratio
|
||||||
|
height = out_height
|
||||||
|
|
||||||
|
return int(round(width)), round(int(height))
|
||||||
|
|
||||||
|
|
||||||
|
def process_image(im):
|
||||||
|
# double pages (wider than high) should be rotated
|
||||||
|
if im.width > im.height:
|
||||||
|
im = im.transpose(Image.ROTATE_90)
|
||||||
|
|
||||||
|
# convert to greyscale
|
||||||
|
im = im.convert("L")
|
||||||
|
|
||||||
|
# resize if necessary
|
||||||
|
if should_scale(im.width, im.height, args.width, args.height):
|
||||||
|
im = im.resize(
|
||||||
|
scaled_size(im.width, im.height, args.width, args.height),
|
||||||
|
resample=Image.LANCZOS,
|
||||||
|
)
|
||||||
|
|
||||||
|
return im
|
||||||
|
|
||||||
|
|
||||||
|
def process_archive(srczip, dstzip, multithread=True):
|
||||||
|
executor = ThreadPoolExecutor()
|
||||||
|
|
||||||
|
if multithread:
|
||||||
|
mapper = executor.map
|
||||||
|
else:
|
||||||
|
mapper = map
|
||||||
|
|
||||||
|
srcfiles = sorted(
|
||||||
|
map(
|
||||||
|
lambda zi: zi.filename,
|
||||||
|
filter(lambda zi: not zi.is_dir(), srczip.infolist()),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
for idx, im in enumerate(
|
||||||
|
tqdm(
|
||||||
|
mapper(process_image, mapper(Image.open, mapper(srczip.open, srcfiles))),
|
||||||
|
total=len(srcfiles),
|
||||||
|
)
|
||||||
|
):
|
||||||
|
with dstzip.open(f"{idx+1:04d}.jpg", "w") as outpagefile:
|
||||||
|
im.save(outpagefile, format="JPEG", quality=92)
|
||||||
|
|
||||||
|
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument("infile")
|
||||||
|
parser.add_argument("outfile")
|
||||||
|
parser.add_argument("-W", "--width", default=1440)
|
||||||
|
parser.add_argument("-H", "--height", default=1920)
|
||||||
|
parser.add_argument("-s", "--singlethreaded", action="store_true")
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
srczip = ZipFile(args.infile)
|
||||||
|
dstzip = ZipFile(args.outfile, "x")
|
||||||
|
|
||||||
|
if args.singlethreaded:
|
||||||
|
process_archive(srczip, dstzip, multithread=False)
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
process_archive(srczip, dstzip, multithread=True)
|
||||||
|
except zlib.error as e:
|
||||||
|
print(f"zlib choked ({e}), retrying in single threaded mode", file=sys.stderr)
|
||||||
|
# replace partial archive with new empty one
|
||||||
|
dstzip.close()
|
||||||
|
dstzip = ZipFile(args.outfile, "w")
|
||||||
|
process_archive(srczip, dstzip, multithread=False)
|
45
cbz2ebook.sh
45
cbz2ebook.sh
|
@ -1,45 +0,0 @@
|
||||||
#!/usr/bin/env cached-nix-shell
|
|
||||||
#!nix-shell -i zsh -p imagemagick unzip zip zsh
|
|
||||||
set -e
|
|
||||||
|
|
||||||
size="1440x1920" # Kobo Forma
|
|
||||||
#size="768x1024" # Amazon Kindle Paperwhite
|
|
||||||
|
|
||||||
vertical_size="$(cut -dx -f2 <<< $size)x$(cut -dx -f1 <<< $size)"
|
|
||||||
|
|
||||||
infile="$(realpath $1)"
|
|
||||||
outfile="$(realpath $2)"
|
|
||||||
tmpdir=$(mktemp -d)
|
|
||||||
|
|
||||||
function cleanup {
|
|
||||||
rm -rf "$tmpdir"
|
|
||||||
}
|
|
||||||
|
|
||||||
trap cleanup EXIT INT SIGTERM
|
|
||||||
|
|
||||||
cd "$tmpdir"
|
|
||||||
unzip "$infile"
|
|
||||||
|
|
||||||
# rename files to have a plain 4 digit filename
|
|
||||||
i=1
|
|
||||||
for file in **/*.???; do
|
|
||||||
mv -n "$file" "$(dirname $file)/$(printf %04d $i).${file##*.}"
|
|
||||||
i=$((i+1))
|
|
||||||
done
|
|
||||||
|
|
||||||
length=$(ls -1 **/*.??? | wc -l)
|
|
||||||
position=0
|
|
||||||
for image in **/*.???; do
|
|
||||||
width=$(identify -format "%W" "$image")
|
|
||||||
height=$(identify -format "%H" "$image")
|
|
||||||
if (($width > $height)); then
|
|
||||||
mogrify -resize "$vertical_size" -rotate 270 "$image"
|
|
||||||
else
|
|
||||||
mogrify -resize "$size" "$image"
|
|
||||||
fi
|
|
||||||
position=$(($position + 1))
|
|
||||||
echo -ne "$(printf '%3s' $((100 * $position / $length))) % "$(printf "%0*d" "$((72 * $position / $length))" 0 | tr '0' '#')'\r'
|
|
||||||
done
|
|
||||||
echo
|
|
||||||
zip "$outfile" **/*.???
|
|
||||||
cd -
|
|
Loading…
Reference in a new issue