2020-08-11 16:14:38 +02:00
|
|
|
#!/usr/bin/env python3
|
|
|
|
import json
|
|
|
|
import os.path
|
2021-11-30 19:57:59 +01:00
|
|
|
from glob import glob
|
|
|
|
from html.parser import HTMLParser
|
2020-08-11 16:14:38 +02:00
|
|
|
|
|
|
|
import common
|
|
|
|
|
|
|
|
|
|
|
|
class EpisodeHtmlParser(HTMLParser):
|
|
|
|
current_tag_is_episode_json = False
|
|
|
|
data = {}
|
|
|
|
|
|
|
|
def __init__(self, episode):
|
|
|
|
super().__init__()
|
|
|
|
self.episode = episode
|
|
|
|
|
|
|
|
def handle_starttag(self, tag, attrs):
|
|
|
|
attrs = dict(attrs)
|
|
|
|
if (
|
|
|
|
tag == "script"
|
|
|
|
and attrs.get("type") == "application/json"
|
|
|
|
and attrs.get("id") == f"config-episode-{episode}"
|
|
|
|
):
|
|
|
|
self.current_tag_is_episode_json = True
|
|
|
|
|
|
|
|
def handle_endtag(self, tag):
|
|
|
|
if self.current_tag_is_episode_json:
|
|
|
|
self.current_tag_is_episode_json = False
|
|
|
|
|
|
|
|
def handle_data(self, data):
|
|
|
|
if self.current_tag_is_episode_json:
|
|
|
|
self.data = json.loads(data)
|
|
|
|
|
|
|
|
|
|
|
|
metadata = {}
|
|
|
|
|
|
|
|
for file in sorted(glob(f"content/{common.ACRONYM}*.md")):
|
|
|
|
episode = os.path.splitext(os.path.basename(file))[0]
|
|
|
|
metadata[episode] = {}
|
|
|
|
|
|
|
|
metadata[episode]["duration"] = common.sexagesimal(
|
|
|
|
float(common.get_episode_info(episode, "original")["duration"])
|
|
|
|
)
|
|
|
|
|
|
|
|
metadata[episode]["formats"] = {}
|
|
|
|
for format in common.FORMATS.keys():
|
|
|
|
try:
|
|
|
|
size = os.path.getsize(common.path_to_episode(episode, format))
|
|
|
|
except FileNotFoundError:
|
|
|
|
# when bootstrapping for the first time the encoded files do not exist
|
|
|
|
size = 0
|
|
|
|
metadata[episode]["formats"][format] = {"size": size}
|
|
|
|
|
|
|
|
with open("static/episodes.json", "w") as f:
|
|
|
|
f.write(json.dumps(metadata))
|
|
|
|
|
|
|
|
# extract podlove episode json
|
|
|
|
for file in sorted(glob(f"public/{common.ACRONYM}*/index.html")):
|
|
|
|
episode = os.path.basename(os.path.dirname(file))
|
|
|
|
parser = EpisodeHtmlParser(episode)
|
|
|
|
with open(file) as f:
|
|
|
|
parser.feed(f.read())
|
|
|
|
metadata = parser.data
|
|
|
|
|
2021-11-30 19:37:21 +01:00
|
|
|
os.makedirs("static/episodes", exist_ok=True)
|
2020-08-11 16:14:38 +02:00
|
|
|
with open(f"static/episodes/{episode}.podlove.json", "w") as f:
|
|
|
|
f.write(json.dumps(metadata))
|