Compare commits

..

2 commits

Author SHA1 Message Date
Simon Bruder 2ad7f3a52e
cbz2ebook: Use mt map for opening image and zip
This apparently solves some weird issues with reading the zip file like

zlib.error: Error -3 while decompressing data: invalid block type
zlib.error: Error -3 while decompressing data: invalid distance too far back

Though it reduced the amount of times this happened, some files still
persistently refuse to be read by zipfile in parallel. In those cases,
using `-s` to force single-threading should be sufficient.
2021-07-11 11:27:57 +02:00
Simon Bruder bcfa304862
cbz2ebook: Replace with python implementation
It has multiple advantages:

 * It processes the images in memory, without writing intermediary files
 * It supports multithreading
 * It does not spawn an army of subprocesses
 * It shows a better progress bar
 * It is more robust
2021-07-11 11:12:28 +02:00

View file

@ -5,8 +5,6 @@ from concurrent.futures import ThreadPoolExecutor
from tqdm import tqdm
from zipfile import ZipFile
import argparse
import sys
import zlib
def should_scale(in_width, in_height, out_width, out_height):
@ -48,31 +46,6 @@ def process_image(im):
return im
def process_archive(srczip, dstzip, multithread=True):
executor = ThreadPoolExecutor()
if multithread:
mapper = executor.map
else:
mapper = map
srcfiles = sorted(
map(
lambda zi: zi.filename,
filter(lambda zi: not zi.is_dir(), srczip.infolist()),
)
)
for idx, im in enumerate(
tqdm(
mapper(process_image, mapper(Image.open, mapper(srczip.open, srcfiles))),
total=len(srcfiles),
)
):
with dstzip.open(f"{idx+1:04d}.jpg", "w") as outpagefile:
im.save(outpagefile, format="JPEG", quality=92)
parser = argparse.ArgumentParser()
parser.add_argument("infile")
parser.add_argument("outfile")
@ -84,14 +57,22 @@ args = parser.parse_args()
srczip = ZipFile(args.infile)
dstzip = ZipFile(args.outfile, "x")
srcfiles = sorted(
map(lambda zi: zi.filename, filter(lambda zi: not zi.is_dir(), srczip.infolist(),),)
)
executor = ThreadPoolExecutor()
if args.singlethreaded:
process_archive(srczip, dstzip, multithread=False)
mapper = map
else:
try:
process_archive(srczip, dstzip, multithread=True)
except zlib.error as e:
print(f"zlib choked ({e}), retrying in single threaded mode", file=sys.stderr)
# replace partial archive with new empty one
dstzip.close()
dstzip = ZipFile(args.outfile, "w")
process_archive(srczip, dstzip, multithread=False)
mapper = executor.map
for idx, im in enumerate(
tqdm(
mapper(process_image, mapper(Image.open, mapper(srczip.open, srcfiles)),),
total=len(srcfiles),
)
):
with dstzip.open(f"{idx+1:04d}.jpg", "w") as outpagefile:
im.save(outpagefile, format="JPEG", quality=92)