site/metadata.py

#!/usr/bin/env python3
from glob import glob
from html.parser import HTMLParser
import json
import os.path

import common


class EpisodeHtmlParser(HTMLParser):
    current_tag_is_episode_json = False
    data = {}

    def __init__(self, episode):
        super().__init__()
        self.episode = episode

    def handle_starttag(self, tag, attrs):
        attrs = dict(attrs)
        if (
            tag == "script"
            and attrs.get("type") == "application/json"
            and attrs.get("id") == f"config-episode-{episode}"
        ):
            self.current_tag_is_episode_json = True

    def handle_endtag(self, tag):
        if self.current_tag_is_episode_json:
            self.current_tag_is_episode_json = False

    def handle_data(self, data):
        if self.current_tag_is_episode_json:
            self.data = json.loads(data)


def chapter_fix_timestamp(chapter):
    chapter["start"] = common.sexagesimal(chapter["start"])
    return chapter


metadata = {}

for file in sorted(glob(f"content/{common.ACRONYM}*.md")):
    episode = os.path.splitext(os.path.basename(file))[0]
    metadata[episode] = {}

    metadata[episode]["duration"] = common.sexagesimal(
        float(common.get_episode_info(episode, "original")["duration"])
    )

    metadata[episode]["formats"] = {}
    for format in common.FORMATS.keys():
        try:
            size = os.path.getsize(common.path_to_episode(episode, format))
        except FileNotFoundError:
            # when bootstrapping for the first time the encoded files do not exist
            size = 0
        metadata[episode]["formats"][format] = {"size": size}

    metadata[episode]["chapters"] = list(
        map(chapter_fix_timestamp, common.get_chapters(episode))
    )

with open("static/episodes.json", "w") as f:
    f.write(json.dumps(metadata))

# extract podlove episode json
for file in sorted(glob(f"public/{common.ACRONYM}*/index.html")):
    episode = os.path.basename(os.path.dirname(file))
    parser = EpisodeHtmlParser(episode)
    with open(file) as f:
        parser.feed(f.read())
    metadata = parser.data

    os.makedirs("static/podlove", exist_ok=True)
    with open(f"static/episodes/{episode}.podlove.json", "w") as f:
        f.write(json.dumps(metadata))
Initial commit 2020-08-11 16:14:38 +02:00			`#!/usr/bin/env python3`
			`from glob import glob`
			`from html.parser import HTMLParser`
			`import json`
			`import os.path`

			`import common`


			`class EpisodeHtmlParser(HTMLParser):`
			`current_tag_is_episode_json = False`
			`data = {}`

			`def __init__(self, episode):`
			`super().__init__()`
			`self.episode = episode`

			`def handle_starttag(self, tag, attrs):`
			`attrs = dict(attrs)`
			`if (`
			`tag == "script"`
			`and attrs.get("type") == "application/json"`
			`and attrs.get("id") == f"config-episode-{episode}"`
			`):`
			`self.current_tag_is_episode_json = True`

			`def handle_endtag(self, tag):`
			`if self.current_tag_is_episode_json:`
			`self.current_tag_is_episode_json = False`

			`def handle_data(self, data):`
			`if self.current_tag_is_episode_json:`
			`self.data = json.loads(data)`


			`def chapter_fix_timestamp(chapter):`
			`chapter["start"] = common.sexagesimal(chapter["start"])`
			`return chapter`


			`metadata = {}`

			`for file in sorted(glob(f"content/{common.ACRONYM}*.md")):`
			`episode = os.path.splitext(os.path.basename(file))[0]`
			`metadata[episode] = {}`

			`metadata[episode]["duration"] = common.sexagesimal(`
			`float(common.get_episode_info(episode, "original")["duration"])`
			`)`

			`metadata[episode]["formats"] = {}`
			`for format in common.FORMATS.keys():`
			`try:`
			`size = os.path.getsize(common.path_to_episode(episode, format))`
			`except FileNotFoundError:`
			`# when bootstrapping for the first time the encoded files do not exist`
			`size = 0`
			`metadata[episode]["formats"][format] = {"size": size}`

			`metadata[episode]["chapters"] = list(`
			`map(chapter_fix_timestamp, common.get_chapters(episode))`
			`)`

			`with open("static/episodes.json", "w") as f:`
			`f.write(json.dumps(metadata))`

			`# extract podlove episode json`
			`for file in sorted(glob(f"public/{common.ACRONYM}*/index.html")):`
			`episode = os.path.basename(os.path.dirname(file))`
			`parser = EpisodeHtmlParser(episode)`
			`with open(file) as f:`
			`parser.feed(f.read())`
			`metadata = parser.data`

			`os.makedirs("static/podlove", exist_ok=True)`
			`with open(f"static/episodes/{episode}.podlove.json", "w") as f:`
			`f.write(json.dumps(metadata))`