#!/usr/bin/env cached-nix-shell #!nix-shell -i python3 -p python3 python3Packages.pillow python3Packages.tqdm from PIL import Image from concurrent.futures import ThreadPoolExecutor from tqdm import tqdm from zipfile import ZipFile import argparse import sys import zlib def should_scale(in_width, in_height, out_width, out_height): return not ( (in_width == out_width and in_height <= out_height) or (in_height == out_height and in_width <= out_width) ) def scaled_size(in_width, in_height, out_width, out_height): in_aspect_ratio = float(in_width) / float(in_height) out_aspect_ratio = float(out_width) / float(out_height) if in_aspect_ratio > out_aspect_ratio: height = out_width / in_aspect_ratio width = out_width else: width = out_height * in_aspect_ratio height = out_height return int(round(width)), round(int(height)) def process_image(im): # double pages (wider than high) should be rotated if im.width > im.height: im = im.transpose(Image.Transpose.ROTATE_90) # convert to greyscale im = im.convert("L") # resize if necessary if should_scale(im.width, im.height, args.width, args.height): im = im.resize( scaled_size(im.width, im.height, args.width, args.height), resample=Image.Resampling.LANCZOS, ) return im def process_archive(srczip, dstzip, multithread=True): executor = ThreadPoolExecutor() if multithread: mapper = executor.map else: mapper = map srcfiles = sorted( map( lambda zi: zi.filename, filter(lambda zi: not zi.is_dir(), srczip.infolist()), ) ) for idx, im in enumerate( tqdm( mapper(process_image, mapper(Image.open, mapper(srczip.open, srcfiles))), total=len(srcfiles), ) ): with dstzip.open(f"{idx+1:04d}.jpg", "w") as outpagefile: im.save(outpagefile, format="JPEG", quality=92) parser = argparse.ArgumentParser() parser.add_argument("infile") parser.add_argument("outfile") parser.add_argument("-W", "--width", default=1440) parser.add_argument("-H", "--height", default=1920) parser.add_argument("-s", "--singlethreaded", action="store_true") args = parser.parse_args() srczip = ZipFile(args.infile) dstzip = ZipFile(args.outfile, "x") if args.singlethreaded: process_archive(srczip, dstzip, multithread=False) else: try: process_archive(srczip, dstzip, multithread=True) except zlib.error as e: print(f"zlib choked ({e}), retrying in single threaded mode", file=sys.stderr) # replace partial archive with new empty one dstzip.close() dstzip = ZipFile(args.outfile, "w") process_archive(srczip, dstzip, multithread=False)