scripts/downloaders/mangarock.py

#!/usr/bin/env python3
from io import BytesIO
from tqdm import tqdm
import os
import requests
import sys

# Decoding thanks to https://github.com/bake/mri/blob/master/mri.go#L34

XOR_KEY = 101


def get_pages(chapter):
    api_response = requests.get(
        "https://api.mangarockhd.com/query/web401/pagesv2",
        params={"oid": "mrs-chapter-" + chapter},
    ).json()
    for item in api_response["data"]:
        yield item["url"]


def decode_ciphertext(byte):
    return byte ^ XOR_KEY


def get_image(url):
    ciphertext = requests.get(url).content
    size = len(ciphertext) + 7
    cleartext = BytesIO()
    cleartext.write("RIFF".encode("ascii"))
    cleartext.write(
        bytes([size >> 0 & 255, size >> 8 & 255, size >> 16 & 255, size >> 24 & 255])
    )
    cleartext.write("WEBPVP8".encode("ascii"))
    cleartext.write(bytes(list(map(decode_ciphertext, ciphertext))))
    cleartext.seek(0)
    return cleartext


requested_chapters = sys.argv[1:]

for chapter_idx, chapter in tqdm(list(enumerate(requested_chapters))):
    chapter_dir = str(chapter_idx + 1)
    os.makedirs(chapter_dir, exist_ok=True)
    pages = get_pages(chapter)
    for idx, page in tqdm(list(enumerate(pages))):
        filename = os.path.join(chapter_dir, f"{idx+1:04}.webp")
        if os.path.isfile(filename):
            continue

        image = get_image(page)
        with open(filename, "wb") as f:
            f.write(image.read())
init 2020-01-16 19:25:19 +01:00			`#!/usr/bin/env python3`
			`from io import BytesIO`
			`from tqdm import tqdm`
			`import os`
			`import requests`
			`import sys`

			`# Decoding thanks to https://github.com/bake/mri/blob/master/mri.go#L34`

			`XOR_KEY = 101`

Blacken python scripts 2020-02-26 00:37:11 +01:00
init 2020-01-16 19:25:19 +01:00			`def get_pages(chapter):`
			`api_response = requests.get(`
Blacken python scripts 2020-02-26 00:37:11 +01:00			`"https://api.mangarockhd.com/query/web401/pagesv2",`
			`params={"oid": "mrs-chapter-" + chapter},`
init 2020-01-16 19:25:19 +01:00			`).json()`
Blacken python scripts 2020-02-26 00:37:11 +01:00			`for item in api_response["data"]:`
			`yield item["url"]`

init 2020-01-16 19:25:19 +01:00
			`def decode_ciphertext(byte):`
			`return byte ^ XOR_KEY`

Blacken python scripts 2020-02-26 00:37:11 +01:00
init 2020-01-16 19:25:19 +01:00			`def get_image(url):`
			`ciphertext = requests.get(url).content`
			`size = len(ciphertext) + 7`
			`cleartext = BytesIO()`
Blacken python scripts 2020-02-26 00:37:11 +01:00			`cleartext.write("RIFF".encode("ascii"))`
			`cleartext.write(`
			`bytes([size >> 0 & 255, size >> 8 & 255, size >> 16 & 255, size >> 24 & 255])`
			`)`
			`cleartext.write("WEBPVP8".encode("ascii"))`
init 2020-01-16 19:25:19 +01:00			`cleartext.write(bytes(list(map(decode_ciphertext, ciphertext))))`
			`cleartext.seek(0)`
			`return cleartext`

Blacken python scripts 2020-02-26 00:37:11 +01:00
init 2020-01-16 19:25:19 +01:00			`requested_chapters = sys.argv[1:]`

			`for chapter_idx, chapter in tqdm(list(enumerate(requested_chapters))):`
Blacken python scripts 2020-02-26 00:37:11 +01:00			`chapter_dir = str(chapter_idx + 1)`
init 2020-01-16 19:25:19 +01:00			`os.makedirs(chapter_dir, exist_ok=True)`
			`pages = get_pages(chapter)`
			`for idx, page in tqdm(list(enumerate(pages))):`
Blacken python scripts 2020-02-26 00:37:11 +01:00			`filename = os.path.join(chapter_dir, f"{idx+1:04}.webp")`
init 2020-01-16 19:25:19 +01:00			`if os.path.isfile(filename):`
			`continue`

			`image = get_image(page)`
Blacken python scripts 2020-02-26 00:37:11 +01:00			`with open(filename, "wb") as f:`
init 2020-01-16 19:25:19 +01:00			`f.write(image.read())`