site/encode.py
Simon Bruder 9c14f1bb19
Make encode bit-exact
This ensures only files that were actually changed have to be uploaded
on deploy.
2022-01-28 14:50:56 +01:00

125 lines
3.9 KiB
Python
Executable file

#!/usr/bin/env python3
import base64
import concurrent.futures
import multiprocessing
import os
import xml.etree.ElementTree as ET
from datetime import datetime
from subprocess import run
from urllib.parse import urlparse
from mutagen.flac import Picture
from mutagen.oggopus import OggOpus
from mutagen.oggvorbis import OggVorbis
import common
def encode_episode(podcast, episode, format):
format, options = format
infile = common.path_to_episode(episode["file_base"], "flac")
outfile = common.path_to_episode(episode["file_base"], format)
content_file = common.path_to_episode(episode["file_base"], "md")
try:
changed = any(
os.path.getmtime(file) > os.path.getmtime(outfile)
for file in [infile, content_file, podcast["poster"]]
)
except FileNotFoundError:
changed = True
if changed:
tags = {
"TITLE": episode["title"],
"ARTIST": ", ".join(episode["contributors"]),
"ALBUM": podcast["title"],
"TRACK": episode["number"],
"GENRE": "podcast",
"DATE": datetime.strftime(episode["date"], "%Y-%m-%d"),
"URL": podcast["link"],
"COMMENT": episode["summary"],
}
command = ["ffmpeg", "-y", "-loglevel", "error"]
command.extend(["-i", infile])
if format not in ["oga", "opus"]:
command.extend(["-i", podcast["poster"]])
# For AAC, the default codec choice (ffmpeg native) is not the best choice
if format == "m4a":
command.extend(["-c:a", "libfdk_aac"])
command.extend(["-c:v", "copy"])
command.extend(["-bitexact"]) # deterministic output
command.extend(options)
for k, v in tags.items():
command.extend(["-metadata", f"{k}={v}"])
command.append(outfile)
run(command, check=True)
if format in ["oga", "opus"]:
if format == "oga":
audio = OggVorbis(outfile)
else:
audio = OggOpus(outfile)
# poster
picture = Picture()
with open(podcast["poster"], "rb") as f:
picture.data = f.read()
picture.type = 17
picture.desc = ""
picture.mime = "image/jpeg"
picture.width = 500
picture.height = 500
picture.depth = 24
audio["metadata_block_picture"] = [
base64.b64encode(picture.write()).decode("ascii")
]
audio.save()
print(f"[✔️] {episode['file_base']}.{format}")
else:
print(f"[⏭️] {episode['file_base']}.{format}")
os.makedirs("static/episodes", exist_ok=True)
tree = ET.parse("public/formats/opus/rss.xml")
root = tree.getroot()
channel = root.find("channel")
podcast = {
"title": channel.find("title").text,
"link": channel.find("link").text,
"poster": "static" + urlparse(channel.find("image").find("url").text).path,
}
pool = concurrent.futures.ThreadPoolExecutor(max_workers=multiprocessing.cpu_count())
for item in channel.findall("item"):
episode = {
"title": item.find("title").text,
"number": item.find("{http://www.itunes.com/dtds/podcast-1.0.dtd}episode").text,
"date": datetime.strptime(
item.find("pubDate").text, "%a, %d %b %Y %H:%M:%S %z"
),
"contributors": [
contributor.find("{http://www.w3.org/2005/Atom}name").text
for contributor in item.findall("{http://www.w3.org/2005/Atom}contributor")
],
"summary": item.find(
"{http://www.itunes.com/dtds/podcast-1.0.dtd}summary"
).text,
"file_base": os.path.splitext(
os.path.basename(item.find("enclosure").attrib["url"])
)[0],
}
for format in common.FORMATS.items():
pool.submit(encode_episode, podcast, episode, format)
pool.shutdown(wait=True)