cbz2ebook: Replace with python implementation
It has multiple advantages: * It processes the images in memory, without writing intermediary files * It supports multithreading (though zlib fails non-reproducibly with some files, so it falls back to single-threading) * It does not spawn an army of subprocesses * It shows a better progress bar
This commit is contained in:
parent
1cb0ab9cf9
commit
2b5735a380
97
cbz2ebook.py
Executable file
97
cbz2ebook.py
Executable file
|
@ -0,0 +1,97 @@
|
|||
#!/usr/bin/env cached-nix-shell
|
||||
#!nix-shell -i python3 -p python3 python3Packages.pillow python3Packages.tqdm
|
||||
from PIL import Image
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from tqdm import tqdm
|
||||
from zipfile import ZipFile
|
||||
import argparse
|
||||
import sys
|
||||
import zlib
|
||||
|
||||
|
||||
def should_scale(in_width, in_height, out_width, out_height):
|
||||
return not (
|
||||
(in_width == out_width and in_height <= out_height)
|
||||
or (in_height == out_height and in_width <= out_width)
|
||||
)
|
||||
|
||||
|
||||
def scaled_size(in_width, in_height, out_width, out_height):
|
||||
in_aspect_ratio = float(in_width) / float(in_height)
|
||||
out_aspect_ratio = float(out_width) / float(out_height)
|
||||
|
||||
if in_aspect_ratio > out_aspect_ratio:
|
||||
height = out_width / in_aspect_ratio
|
||||
width = out_width
|
||||
else:
|
||||
width = out_height * in_aspect_ratio
|
||||
height = out_height
|
||||
|
||||
return int(round(width)), round(int(height))
|
||||
|
||||
|
||||
def process_image(im):
|
||||
# double pages (wider than high) should be rotated
|
||||
if im.width > im.height:
|
||||
im = im.transpose(Image.ROTATE_90)
|
||||
|
||||
# convert to greyscale
|
||||
im = im.convert("L")
|
||||
|
||||
# resize if necessary
|
||||
if should_scale(im.width, im.height, args.width, args.height):
|
||||
im = im.resize(
|
||||
scaled_size(im.width, im.height, args.width, args.height),
|
||||
resample=Image.LANCZOS,
|
||||
)
|
||||
|
||||
return im
|
||||
|
||||
|
||||
def process_archive(srczip, dstzip, multithread=True):
|
||||
executor = ThreadPoolExecutor()
|
||||
|
||||
if multithread:
|
||||
mapper = executor.map
|
||||
else:
|
||||
mapper = map
|
||||
|
||||
srcfiles = sorted(
|
||||
map(
|
||||
lambda zi: zi.filename,
|
||||
filter(lambda zi: not zi.is_dir(), srczip.infolist()),
|
||||
)
|
||||
)
|
||||
|
||||
for idx, im in enumerate(
|
||||
tqdm(
|
||||
mapper(process_image, mapper(Image.open, mapper(srczip.open, srcfiles))),
|
||||
total=len(srcfiles),
|
||||
)
|
||||
):
|
||||
with dstzip.open(f"{idx+1:04d}.jpg", "w") as outpagefile:
|
||||
im.save(outpagefile, format="JPEG", quality=92)
|
||||
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("infile")
|
||||
parser.add_argument("outfile")
|
||||
parser.add_argument("-W", "--width", default=1440)
|
||||
parser.add_argument("-H", "--height", default=1920)
|
||||
parser.add_argument("-s", "--singlethreaded", action="store_true")
|
||||
args = parser.parse_args()
|
||||
|
||||
srczip = ZipFile(args.infile)
|
||||
dstzip = ZipFile(args.outfile, "x")
|
||||
|
||||
if args.singlethreaded:
|
||||
process_archive(srczip, dstzip, multithread=False)
|
||||
else:
|
||||
try:
|
||||
process_archive(srczip, dstzip, multithread=True)
|
||||
except zlib.error as e:
|
||||
print(f"zlib choked ({e}), retrying in single threaded mode", file=sys.stderr)
|
||||
# replace partial archive with new empty one
|
||||
dstzip.close()
|
||||
dstzip = ZipFile(args.outfile, "w")
|
||||
process_archive(srczip, dstzip, multithread=False)
|
45
cbz2ebook.sh
45
cbz2ebook.sh
|
@ -1,45 +0,0 @@
|
|||
#!/usr/bin/env cached-nix-shell
|
||||
#!nix-shell -i zsh -p imagemagick unzip zip zsh
|
||||
set -e
|
||||
|
||||
size="1440x1920" # Kobo Forma
|
||||
#size="768x1024" # Amazon Kindle Paperwhite
|
||||
|
||||
vertical_size="$(cut -dx -f2 <<< $size)x$(cut -dx -f1 <<< $size)"
|
||||
|
||||
infile="$(realpath $1)"
|
||||
outfile="$(realpath $2)"
|
||||
tmpdir=$(mktemp -d)
|
||||
|
||||
function cleanup {
|
||||
rm -rf "$tmpdir"
|
||||
}
|
||||
|
||||
trap cleanup EXIT INT SIGTERM
|
||||
|
||||
cd "$tmpdir"
|
||||
unzip "$infile"
|
||||
|
||||
# rename files to have a plain 4 digit filename
|
||||
i=1
|
||||
for file in **/*.???; do
|
||||
mv -n "$file" "$(dirname $file)/$(printf %04d $i).${file##*.}"
|
||||
i=$((i+1))
|
||||
done
|
||||
|
||||
length=$(ls -1 **/*.??? | wc -l)
|
||||
position=0
|
||||
for image in **/*.???; do
|
||||
width=$(identify -format "%W" "$image")
|
||||
height=$(identify -format "%H" "$image")
|
||||
if (($width > $height)); then
|
||||
mogrify -resize "$vertical_size" -rotate 270 "$image"
|
||||
else
|
||||
mogrify -resize "$size" "$image"
|
||||
fi
|
||||
position=$(($position + 1))
|
||||
echo -ne "$(printf '%3s' $((100 * $position / $length))) % "$(printf "%0*d" "$((72 * $position / $length))" 0 | tr '0' '#')'\r'
|
||||
done
|
||||
echo
|
||||
zip "$outfile" **/*.???
|
||||
cd -
|
Loading…
Reference in a new issue