scripts/downloaders/comic-valkyrie.py

#!/usr/bin/env python3
from PIL import Image
from bs4 import BeautifulSoup
from tqdm import tqdm
import re
import requests
import sys


def parse_filename(filename):
    split = re.split(":|,|\+|>", filename)
    return map(int, split[1:])


def get_image_urls():
    url = sys.argv[1]
    soup = BeautifulSoup(requests.get(url).text, "html.parser")
    for div in soup.find(id="content").find_all("div"):
        yield url + div.get("data-ptimg")


pages = list(get_image_urls())

for metadata_url in tqdm(pages):
    image_url = re.sub("\.ptimg\.json$", ".jpg", metadata_url)

    ptimg_data = requests.get(metadata_url).json()
    image_data = requests.get(image_url, stream=True).raw

    scrambled_image = Image.open(image_data)
    combined_image = Image.new(
        "RGB", (ptimg_data["views"][0]["width"], ptimg_data["views"][0]["height"])
    )

    for from_x, from_y, width, height, to_x, to_y in map(
        parse_filename, ptimg_data["views"][0]["coords"]
    ):
        chunk_data = scrambled_image.crop(
            (from_x, from_y, from_x + width, from_y + height)
        )
        combined_image.paste(chunk_data, (to_x, to_y))

    combined_image.save(image_url.split("/")[-1])
init 2020-01-16 19:25:19 +01:00			`#!/usr/bin/env python3`
			`from PIL import Image`
			`from bs4 import BeautifulSoup`
			`from tqdm import tqdm`
			`import re`
			`import requests`
			`import sys`

Blacken python scripts 2020-02-26 00:37:11 +01:00
init 2020-01-16 19:25:19 +01:00			`def parse_filename(filename):`
Blacken python scripts 2020-02-26 00:37:11 +01:00			`split = re.split(":\|,\|\+\|>", filename)`
init 2020-01-16 19:25:19 +01:00			`return map(int, split[1:])`

Blacken python scripts 2020-02-26 00:37:11 +01:00
init 2020-01-16 19:25:19 +01:00			`def get_image_urls():`
			`url = sys.argv[1]`
Blacken python scripts 2020-02-26 00:37:11 +01:00			`soup = BeautifulSoup(requests.get(url).text, "html.parser")`
			`for div in soup.find(id="content").find_all("div"):`
			`yield url + div.get("data-ptimg")`

init 2020-01-16 19:25:19 +01:00
			`pages = list(get_image_urls())`

			`for metadata_url in tqdm(pages):`
Blacken python scripts 2020-02-26 00:37:11 +01:00			`image_url = re.sub("\.ptimg\.json$", ".jpg", metadata_url)`
init 2020-01-16 19:25:19 +01:00
			`ptimg_data = requests.get(metadata_url).json()`
			`image_data = requests.get(image_url, stream=True).raw`

			`scrambled_image = Image.open(image_data)`
Blacken python scripts 2020-02-26 00:37:11 +01:00			`combined_image = Image.new(`
			`"RGB", (ptimg_data["views"][0]["width"], ptimg_data["views"][0]["height"])`
			`)`

			`for from_x, from_y, width, height, to_x, to_y in map(`
			`parse_filename, ptimg_data["views"][0]["coords"]`
			`):`
			`chunk_data = scrambled_image.crop(`
			`(from_x, from_y, from_x + width, from_y + height)`
			`)`
init 2020-01-16 19:25:19 +01:00			`combined_image.paste(chunk_data, (to_x, to_y))`

Blacken python scripts 2020-02-26 00:37:11 +01:00			`combined_image.save(image_url.split("/")[-1])`