From 84be057e025ce0349e9f9f33c40ebab718b57efb Mon Sep 17 00:00:00 2001 From: Michael Murtaugh Date: Tue, 14 Nov 2023 10:16:22 +0100 Subject: [PATCH] pad-to-edit scripts --- .gitignore | 0 pad-to-edit.sh | 13 ++++++ scripts/csv-add-end.py | 14 ++++++ scripts/csv-to-melt.py | 38 ++++++++++++++++ scripts/split-urls-csv.py | 13 ++++++ scripts/split-urls.py | 5 ++ scripts/timecode.py | 96 +++++++++++++++++++++++++++++++++++++++ 7 files changed, 179 insertions(+) create mode 100644 .gitignore create mode 100644 pad-to-edit.sh create mode 100644 scripts/csv-add-end.py create mode 100644 scripts/csv-to-melt.py create mode 100644 scripts/split-urls-csv.py create mode 100644 scripts/split-urls.py create mode 100644 scripts/timecode.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..e69de29 diff --git a/pad-to-edit.sh b/pad-to-edit.sh new file mode 100644 index 0000000..7034819 --- /dev/null +++ b/pad-to-edit.sh @@ -0,0 +1,13 @@ + +wget https://pad.xpub.nl/p/radioworm-20231107/export/txt -O radioworm-20231107.pad.txt + +python3 scripts/split-urls-csv.py < radioworm-20231107.pad.txt | \ +python3 scripts/csv-add-end.py > radioworm-20231107.csv + +csvgrep -c text -m "#intro" radioworm-20231107.csv > radioworm-20231107.intro.csv + +python3 scripts/csv-to-melt.py < radioworm-20231107.intro.csv > edit.sh + +wget https://hub.xpub.nl/chopchop/worm/xpub/radioworm-20231107.ogg + +bash edit.sh diff --git a/scripts/csv-add-end.py b/scripts/csv-add-end.py new file mode 100644 index 0000000..310a9c1 --- /dev/null +++ b/scripts/csv-add-end.py @@ -0,0 +1,14 @@ +import csv, sys + +outf = csv.writer(sys.stdout) +last_row = None +outf.writerow(["url", "start", "end", "text"]) +for row in csv.DictReader(sys.stdin): + if last_row is not None: + if last_row['url'] == row['url']: + outf.writerow([last_row['url'], last_row['start'], row['start'], last_row['text']]) + else: + outf.writerow([last_row['url'], last_row['start'], None, last_row['text']]) + last_row = row + +outf.writerow([last_row['url'], last_row['start'], None, last_row['text']]) diff --git a/scripts/csv-to-melt.py b/scripts/csv-to-melt.py new file mode 100644 index 0000000..6b49612 --- /dev/null +++ b/scripts/csv-to-melt.py @@ -0,0 +1,38 @@ +import csv, sys, argparse +from timecode import timecode_tosecs +from math import floor +from urllib.parse import urlparse +import os + + +def url_filename (url): + p = urlparse(url) + return os.path.split(p.path)[1] + +def to_cmd (edl, framerate): + def s2f (t, framerate=args.framerate): + if t == None: + return None + return floor(framerate*timecode_tosecs(t)) + def cmdify (edit): + if edit['end']: + return '"{0}" in={1} out={2}'.format(url_filename(edit['url']), s2f(edit['start']), s2f(edit['end'])) + else: + return '"{0}" in={1}'.format(url_filename(edit['url']), s2f(edit['start'])) + return "melt " + " ".join([cmdify(edit) for edit in edl]) + +ap = argparse.ArgumentParser("srt to melt command") +ap.add_argument("--framerate", type=float, default=25.0) +ap.add_argument("--output", default="edit.mp4") +ap.add_argument("--profile", default="quarter_pal_wide") +args = ap.parse_args() + +outf = csv.writer(sys.stdout) +rows = [] +for row in csv.DictReader(sys.stdin): + rows.append(row) + +cmd = to_cmd(rows, args.framerate) +cmd += f" -profile {args.profile} -consumer avformat:{args.output}" + +print (cmd) \ No newline at end of file diff --git a/scripts/split-urls-csv.py b/scripts/split-urls-csv.py new file mode 100644 index 0000000..a4ab219 --- /dev/null +++ b/scripts/split-urls-csv.py @@ -0,0 +1,13 @@ +import re, sys, csv + +# From [itertools recipes](https://docs.python.org/3/library/itertools.html#itertools-recipes) +def grouper (iterable, n): + args = [iter(iterable)] * n + return zip(*args) + +w = csv.writer(sys.stdout) +w.writerow(["url","start", "text"]) +text = sys.stdin.read() +items = list(re.split(f"((https?://\S+?)#t=(\S+))", text)) +for url, baseurl, timecode, text in grouper(items[1:], 4): + w.writerow((baseurl, timecode, text)) diff --git a/scripts/split-urls.py b/scripts/split-urls.py new file mode 100644 index 0000000..18de454 --- /dev/null +++ b/scripts/split-urls.py @@ -0,0 +1,5 @@ +import re, sys, json + +text = sys.stdin.read() +items = list(re.split(f"(https?://\S+?#t=\S+)", text)) +print (json.dumps({'items': items}, indent=2)) diff --git a/scripts/timecode.py b/scripts/timecode.py new file mode 100644 index 0000000..c6e5696 --- /dev/null +++ b/scripts/timecode.py @@ -0,0 +1,96 @@ +# This file is part of Active Archives. +# Copyright 2006-2016 the Active Archives contributors (see AUTHORS) +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# Also add information on how to contact you by electronic and paper mail. + +from __future__ import print_function +import math +import re + +# timecode_pat = re.compile(r"(\d+):(\d+):(\d+)(?:[.,](\d+))?") +timecode_pat = re.compile(r"(?:(\d+):)?(\d+):(\d+)(?:[.,](\d+))?") + + +def timecode_fromsecs(rawsecs, fract=True, alwaysfract=False, fractdelim=',', alwayshours=False): + # returns a string in HH:MM:SS[.xxx] notation + # if fract is True, uses .xxx if either necessary (non-zero) + # OR alwaysfract is True + hours = math.floor(rawsecs / 3600) + rawsecs -= hours * 3600 + mins = math.floor(rawsecs / 60) + rawsecs -= mins * 60 + if fract: + secs = math.floor(rawsecs) + rawsecs -= secs + if (rawsecs > 0 or alwaysfract): + fract = "%.03f" % rawsecs + if hours or alwayshours: + return "%02d:%02d:%02d%s%s" % (hours, mins, secs, fractdelim, \ + fract[2:]) + else: + return "%02d:%02d%s%s" % (mins, secs, fractdelim, fract[2:]) + else: + if hours or alwayshours: + return "%02d:%02d:%02d" % (hours, mins, secs) + else: + return "%02d:%02d" % (mins, secs) + + else: + secs = round(rawsecs) + if hours or alwayshours: + return "%02d:%02d:%02d" % (hours, mins, secs) + else: + return "%02d:%02d" % (mins, secs) + + +def timecode_tosecs(tcstr): + r = timecode_pat.search(tcstr) + if r: + ret = 0 + if r.group(1): + ret += 3600 * int(r.group(1)) + ret += 60 * int(r.group(2)) + ret += int(r.group(3)) + if (r.group(4)): + ret = float(str(ret) + "." + r.group(4)) + return ret + else: + return None + + +def parse2secs(val): + try: + return float(val) + except ValueError: + return timecode_tosecs(val) +## to accept None +# except TypeError: +# return + +if __name__ == "__main__": + def t(x): + # with fraction + s = timecode_fromsecs(x, True, False) + print (x, "=>", s, "=>", timecode_tosecs(s)) + # without fraction + s = timecode_fromsecs(x, False) + print (x, "=>", s, "=>", timecode_tosecs(s)) + + t(0) + t(59.666666666666666) + t(60) + t(60.0) + t(1235 / 3.0) + t(10000.5)