scripts/downloaders/comic-valkyrie.py

44 lines
1.2 KiB
Python
Raw Normal View History

2020-01-16 19:25:19 +01:00
#!/usr/bin/env python3
from PIL import Image
from bs4 import BeautifulSoup
from tqdm import tqdm
import re
import requests
import sys
2020-02-26 00:37:11 +01:00
2020-01-16 19:25:19 +01:00
def parse_filename(filename):
2020-02-26 00:37:11 +01:00
split = re.split(":|,|\+|>", filename)
2020-01-16 19:25:19 +01:00
return map(int, split[1:])
2020-02-26 00:37:11 +01:00
2020-01-16 19:25:19 +01:00
def get_image_urls():
url = sys.argv[1]
2020-02-26 00:37:11 +01:00
soup = BeautifulSoup(requests.get(url).text, "html.parser")
for div in soup.find(id="content").find_all("div"):
yield url + div.get("data-ptimg")
2020-01-16 19:25:19 +01:00
pages = list(get_image_urls())
for metadata_url in tqdm(pages):
2020-02-26 00:37:11 +01:00
image_url = re.sub("\.ptimg\.json$", ".jpg", metadata_url)
2020-01-16 19:25:19 +01:00
ptimg_data = requests.get(metadata_url).json()
image_data = requests.get(image_url, stream=True).raw
scrambled_image = Image.open(image_data)
2020-02-26 00:37:11 +01:00
combined_image = Image.new(
"RGB", (ptimg_data["views"][0]["width"], ptimg_data["views"][0]["height"])
)
for from_x, from_y, width, height, to_x, to_y in map(
parse_filename, ptimg_data["views"][0]["coords"]
):
chunk_data = scrambled_image.crop(
(from_x, from_y, from_x + width, from_y + height)
)
2020-01-16 19:25:19 +01:00
combined_image.paste(chunk_data, (to_x, to_y))
2020-02-26 00:37:11 +01:00
combined_image.save(image_url.split("/")[-1])