You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
98 lines
2.8 KiB
Python
98 lines
2.8 KiB
Python
#!/usr/bin/env cached-nix-shell
|
|
#!nix-shell -i python3 -p python3 python3Packages.pillow python3Packages.tqdm
|
|
from PIL import Image
|
|
from concurrent.futures import ThreadPoolExecutor
|
|
from tqdm import tqdm
|
|
from zipfile import ZipFile
|
|
import argparse
|
|
import sys
|
|
import zlib
|
|
|
|
|
|
def should_scale(in_width, in_height, out_width, out_height):
|
|
return not (
|
|
(in_width == out_width and in_height <= out_height)
|
|
or (in_height == out_height and in_width <= out_width)
|
|
)
|
|
|
|
|
|
def scaled_size(in_width, in_height, out_width, out_height):
|
|
in_aspect_ratio = float(in_width) / float(in_height)
|
|
out_aspect_ratio = float(out_width) / float(out_height)
|
|
|
|
if in_aspect_ratio > out_aspect_ratio:
|
|
height = out_width / in_aspect_ratio
|
|
width = out_width
|
|
else:
|
|
width = out_height * in_aspect_ratio
|
|
height = out_height
|
|
|
|
return int(round(width)), round(int(height))
|
|
|
|
|
|
def process_image(im):
|
|
# double pages (wider than high) should be rotated
|
|
if im.width > im.height:
|
|
im = im.transpose(Image.Transpose.ROTATE_90)
|
|
|
|
# convert to greyscale
|
|
im = im.convert("L")
|
|
|
|
# resize if necessary
|
|
if should_scale(im.width, im.height, args.width, args.height):
|
|
im = im.resize(
|
|
scaled_size(im.width, im.height, args.width, args.height),
|
|
resample=Image.Resampling.LANCZOS,
|
|
)
|
|
|
|
return im
|
|
|
|
|
|
def process_archive(srczip, dstzip, multithread=True):
|
|
executor = ThreadPoolExecutor()
|
|
|
|
if multithread:
|
|
mapper = executor.map
|
|
else:
|
|
mapper = map
|
|
|
|
srcfiles = sorted(
|
|
map(
|
|
lambda zi: zi.filename,
|
|
filter(lambda zi: not zi.is_dir(), srczip.infolist()),
|
|
)
|
|
)
|
|
|
|
for idx, im in enumerate(
|
|
tqdm(
|
|
mapper(process_image, mapper(Image.open, mapper(srczip.open, srcfiles))),
|
|
total=len(srcfiles),
|
|
)
|
|
):
|
|
with dstzip.open(f"{idx+1:04d}.jpg", "w") as outpagefile:
|
|
im.save(outpagefile, format="JPEG", quality=92)
|
|
|
|
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument("infile")
|
|
parser.add_argument("outfile")
|
|
parser.add_argument("-W", "--width", default=1440)
|
|
parser.add_argument("-H", "--height", default=1920)
|
|
parser.add_argument("-s", "--singlethreaded", action="store_true")
|
|
args = parser.parse_args()
|
|
|
|
srczip = ZipFile(args.infile)
|
|
dstzip = ZipFile(args.outfile, "x")
|
|
|
|
if args.singlethreaded:
|
|
process_archive(srczip, dstzip, multithread=False)
|
|
else:
|
|
try:
|
|
process_archive(srczip, dstzip, multithread=True)
|
|
except zlib.error as e:
|
|
print(f"zlib choked ({e}), retrying in single threaded mode", file=sys.stderr)
|
|
# replace partial archive with new empty one
|
|
dstzip.close()
|
|
dstzip = ZipFile(args.outfile, "w")
|
|
process_archive(srczip, dstzip, multithread=False)
|