2020-01-16 19:25:19 +01:00
|
|
|
#!/usr/bin/env python3
|
|
|
|
from PIL import Image
|
|
|
|
from bs4 import BeautifulSoup
|
|
|
|
from tqdm import tqdm
|
|
|
|
import re
|
|
|
|
import requests
|
|
|
|
import sys
|
|
|
|
|
2020-02-26 00:37:11 +01:00
|
|
|
|
2020-01-16 19:25:19 +01:00
|
|
|
def parse_filename(filename):
|
2020-02-26 00:37:11 +01:00
|
|
|
split = re.split(":|,|\+|>", filename)
|
2020-01-16 19:25:19 +01:00
|
|
|
return map(int, split[1:])
|
|
|
|
|
2020-02-26 00:37:11 +01:00
|
|
|
|
2020-01-16 19:25:19 +01:00
|
|
|
def get_image_urls():
|
|
|
|
url = sys.argv[1]
|
2020-02-26 00:37:11 +01:00
|
|
|
soup = BeautifulSoup(requests.get(url).text, "html.parser")
|
|
|
|
for div in soup.find(id="content").find_all("div"):
|
|
|
|
yield url + div.get("data-ptimg")
|
|
|
|
|
2020-01-16 19:25:19 +01:00
|
|
|
|
|
|
|
pages = list(get_image_urls())
|
|
|
|
|
|
|
|
for metadata_url in tqdm(pages):
|
2020-02-26 00:37:11 +01:00
|
|
|
image_url = re.sub("\.ptimg\.json$", ".jpg", metadata_url)
|
2020-01-16 19:25:19 +01:00
|
|
|
|
|
|
|
ptimg_data = requests.get(metadata_url).json()
|
|
|
|
image_data = requests.get(image_url, stream=True).raw
|
|
|
|
|
|
|
|
scrambled_image = Image.open(image_data)
|
2020-02-26 00:37:11 +01:00
|
|
|
combined_image = Image.new(
|
|
|
|
"RGB", (ptimg_data["views"][0]["width"], ptimg_data["views"][0]["height"])
|
|
|
|
)
|
|
|
|
|
|
|
|
for from_x, from_y, width, height, to_x, to_y in map(
|
|
|
|
parse_filename, ptimg_data["views"][0]["coords"]
|
|
|
|
):
|
|
|
|
chunk_data = scrambled_image.crop(
|
|
|
|
(from_x, from_y, from_x + width, from_y + height)
|
|
|
|
)
|
2020-01-16 19:25:19 +01:00
|
|
|
combined_image.paste(chunk_data, (to_x, to_y))
|
|
|
|
|
2020-02-26 00:37:11 +01:00
|
|
|
combined_image.save(image_url.split("/")[-1])
|