scripts/downloaders/mangarock.py

54 lines
1.4 KiB
Python
Raw Normal View History

2020-01-16 19:25:19 +01:00
#!/usr/bin/env python3
from io import BytesIO
from tqdm import tqdm
import os
import requests
import sys
# Decoding thanks to https://github.com/bake/mri/blob/master/mri.go#L34
XOR_KEY = 101
2020-02-26 00:37:11 +01:00
2020-01-16 19:25:19 +01:00
def get_pages(chapter):
api_response = requests.get(
2020-02-26 00:37:11 +01:00
"https://api.mangarockhd.com/query/web401/pagesv2",
params={"oid": "mrs-chapter-" + chapter},
2020-01-16 19:25:19 +01:00
).json()
2020-02-26 00:37:11 +01:00
for item in api_response["data"]:
yield item["url"]
2020-01-16 19:25:19 +01:00
def decode_ciphertext(byte):
return byte ^ XOR_KEY
2020-02-26 00:37:11 +01:00
2020-01-16 19:25:19 +01:00
def get_image(url):
ciphertext = requests.get(url).content
size = len(ciphertext) + 7
cleartext = BytesIO()
2020-02-26 00:37:11 +01:00
cleartext.write("RIFF".encode("ascii"))
cleartext.write(
bytes([size >> 0 & 255, size >> 8 & 255, size >> 16 & 255, size >> 24 & 255])
)
cleartext.write("WEBPVP8".encode("ascii"))
2020-01-16 19:25:19 +01:00
cleartext.write(bytes(list(map(decode_ciphertext, ciphertext))))
cleartext.seek(0)
return cleartext
2020-02-26 00:37:11 +01:00
2020-01-16 19:25:19 +01:00
requested_chapters = sys.argv[1:]
for chapter_idx, chapter in tqdm(list(enumerate(requested_chapters))):
2020-02-26 00:37:11 +01:00
chapter_dir = str(chapter_idx + 1)
2020-01-16 19:25:19 +01:00
os.makedirs(chapter_dir, exist_ok=True)
pages = get_pages(chapter)
for idx, page in tqdm(list(enumerate(pages))):
2020-02-26 00:37:11 +01:00
filename = os.path.join(chapter_dir, f"{idx+1:04}.webp")
2020-01-16 19:25:19 +01:00
if os.path.isfile(filename):
continue
image = get_image(page)
2020-02-26 00:37:11 +01:00
with open(filename, "wb") as f:
2020-01-16 19:25:19 +01:00
f.write(image.read())