cbz2ebook: Replace with python implementation

It has multiple advantages:

 * It processes the images in memory, without writing intermediary files
 * It supports multithreading (though zlib fails non-reproducibly with
 some files, so it falls back to single-threading)
 * It does not spawn an army of subprocesses
 * It shows a better progress bar
This commit is contained in:
Simon Bruder 2021-07-11 11:08:00 +02:00
parent 1cb0ab9cf9
commit 2b5735a380
Signed by: simon
GPG key ID: 8D3C82F9F309F8EC
2 changed files with 97 additions and 45 deletions

97
cbz2ebook.py Executable file
View file

@ -0,0 +1,97 @@
#!/usr/bin/env cached-nix-shell
#!nix-shell -i python3 -p python3 python3Packages.pillow python3Packages.tqdm
from PIL import Image
from concurrent.futures import ThreadPoolExecutor
from tqdm import tqdm
from zipfile import ZipFile
import argparse
import sys
import zlib
def should_scale(in_width, in_height, out_width, out_height):
return not (
(in_width == out_width and in_height <= out_height)
or (in_height == out_height and in_width <= out_width)
)
def scaled_size(in_width, in_height, out_width, out_height):
in_aspect_ratio = float(in_width) / float(in_height)
out_aspect_ratio = float(out_width) / float(out_height)
if in_aspect_ratio > out_aspect_ratio:
height = out_width / in_aspect_ratio
width = out_width
else:
width = out_height * in_aspect_ratio
height = out_height
return int(round(width)), round(int(height))
def process_image(im):
# double pages (wider than high) should be rotated
if im.width > im.height:
im = im.transpose(Image.ROTATE_90)
# convert to greyscale
im = im.convert("L")
# resize if necessary
if should_scale(im.width, im.height, args.width, args.height):
im = im.resize(
scaled_size(im.width, im.height, args.width, args.height),
resample=Image.LANCZOS,
)
return im
def process_archive(srczip, dstzip, multithread=True):
executor = ThreadPoolExecutor()
if multithread:
mapper = executor.map
else:
mapper = map
srcfiles = sorted(
map(
lambda zi: zi.filename,
filter(lambda zi: not zi.is_dir(), srczip.infolist()),
)
)
for idx, im in enumerate(
tqdm(
mapper(process_image, mapper(Image.open, mapper(srczip.open, srcfiles))),
total=len(srcfiles),
)
):
with dstzip.open(f"{idx+1:04d}.jpg", "w") as outpagefile:
im.save(outpagefile, format="JPEG", quality=92)
parser = argparse.ArgumentParser()
parser.add_argument("infile")
parser.add_argument("outfile")
parser.add_argument("-W", "--width", default=1440)
parser.add_argument("-H", "--height", default=1920)
parser.add_argument("-s", "--singlethreaded", action="store_true")
args = parser.parse_args()
srczip = ZipFile(args.infile)
dstzip = ZipFile(args.outfile, "x")
if args.singlethreaded:
process_archive(srczip, dstzip, multithread=False)
else:
try:
process_archive(srczip, dstzip, multithread=True)
except zlib.error as e:
print(f"zlib choked ({e}), retrying in single threaded mode", file=sys.stderr)
# replace partial archive with new empty one
dstzip.close()
dstzip = ZipFile(args.outfile, "w")
process_archive(srczip, dstzip, multithread=False)

View file

@ -1,45 +0,0 @@
#!/usr/bin/env cached-nix-shell
#!nix-shell -i zsh -p imagemagick unzip zip zsh
set -e
size="1440x1920" # Kobo Forma
#size="768x1024" # Amazon Kindle Paperwhite
vertical_size="$(cut -dx -f2 <<< $size)x$(cut -dx -f1 <<< $size)"
infile="$(realpath $1)"
outfile="$(realpath $2)"
tmpdir=$(mktemp -d)
function cleanup {
rm -rf "$tmpdir"
}
trap cleanup EXIT INT SIGTERM
cd "$tmpdir"
unzip "$infile"
# rename files to have a plain 4 digit filename
i=1
for file in **/*.???; do
mv -n "$file" "$(dirname $file)/$(printf %04d $i).${file##*.}"
i=$((i+1))
done
length=$(ls -1 **/*.??? | wc -l)
position=0
for image in **/*.???; do
width=$(identify -format "%W" "$image")
height=$(identify -format "%H" "$image")
if (($width > $height)); then
mogrify -resize "$vertical_size" -rotate 270 "$image"
else
mogrify -resize "$size" "$image"
fi
position=$(($position + 1))
echo -ne "$(printf '%3s' $((100 * $position / $length))) % "$(printf "%0*d" "$((72 * $position / $length))" 0 | tr '0' '#')'\r'
done
echo
zip "$outfile" **/*.???
cd -