#!/usr/bin/env python3 import json import os.path from glob import glob from html.parser import HTMLParser import common class EpisodeHtmlParser(HTMLParser): current_tag_is_episode_json = False data = {} def __init__(self, episode): super().__init__() self.episode = episode def handle_starttag(self, tag, attrs): attrs = dict(attrs) if ( tag == "script" and attrs.get("type") == "application/json" and attrs.get("id") == f"config-episode-{episode}" ): self.current_tag_is_episode_json = True def handle_endtag(self, tag): if self.current_tag_is_episode_json: self.current_tag_is_episode_json = False def handle_data(self, data): if self.current_tag_is_episode_json: self.data = json.loads(data) metadata = {} for file in sorted(glob(f"content/{common.ACRONYM}*.md")): episode = os.path.splitext(os.path.basename(file))[0] metadata[episode] = {} metadata[episode]["duration"] = common.sexagesimal( float(common.get_episode_info(episode, "flac")["duration"]) ) metadata[episode]["formats"] = {} for format in common.FORMATS.keys(): try: size = os.path.getsize(common.path_to_episode(episode, format)) except FileNotFoundError: # when bootstrapping for the first time the encoded files do not exist size = 0 metadata[episode]["formats"][format] = {"size": size} with open("static/episodes.json", "w") as f: f.write(json.dumps(metadata)) # extract podlove episode json for file in sorted(glob(f"public/{common.ACRONYM}*/index.html")): episode = os.path.basename(os.path.dirname(file)) parser = EpisodeHtmlParser(episode) with open(file) as f: parser.feed(f.read()) metadata = parser.data os.makedirs("static/episodes", exist_ok=True) with open(f"static/episodes/{episode}.podlove.json", "w") as f: f.write(json.dumps(metadata))