From 84be057e025ce0349e9f9f33c40ebab718b57efb Mon Sep 17 00:00:00 2001
From: Michael Murtaugh <mm@automatist.org>
Date: Tue, 14 Nov 2023 10:16:22 +0100
Subject: [PATCH] pad-to-edit scripts

---
 .gitignore                |  0
 pad-to-edit.sh            | 13 ++++++
 scripts/csv-add-end.py    | 14 ++++++
 scripts/csv-to-melt.py    | 38 ++++++++++++++++
 scripts/split-urls-csv.py | 13 ++++++
 scripts/split-urls.py     |  5 ++
 scripts/timecode.py       | 96 +++++++++++++++++++++++++++++++++++++++
 7 files changed, 179 insertions(+)
 create mode 100644 .gitignore
 create mode 100644 pad-to-edit.sh
 create mode 100644 scripts/csv-add-end.py
 create mode 100644 scripts/csv-to-melt.py
 create mode 100644 scripts/split-urls-csv.py
 create mode 100644 scripts/split-urls.py
 create mode 100644 scripts/timecode.py

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..e69de29
diff --git a/pad-to-edit.sh b/pad-to-edit.sh
new file mode 100644
index 0000000..7034819
--- /dev/null
+++ b/pad-to-edit.sh
@@ -0,0 +1,13 @@
+
+wget https://pad.xpub.nl/p/radioworm-20231107/export/txt -O radioworm-20231107.pad.txt
+
+python3 scripts/split-urls-csv.py < radioworm-20231107.pad.txt | \
+python3 scripts/csv-add-end.py > radioworm-20231107.csv
+
+csvgrep -c text -m "#intro" radioworm-20231107.csv > radioworm-20231107.intro.csv
+
+python3 scripts/csv-to-melt.py < radioworm-20231107.intro.csv > edit.sh
+
+wget https://hub.xpub.nl/chopchop/worm/xpub/radioworm-20231107.ogg
+
+bash edit.sh
diff --git a/scripts/csv-add-end.py b/scripts/csv-add-end.py
new file mode 100644
index 0000000..310a9c1
--- /dev/null
+++ b/scripts/csv-add-end.py
@@ -0,0 +1,14 @@
+import csv, sys
+
+outf = csv.writer(sys.stdout) 
+last_row = None
+outf.writerow(["url", "start", "end", "text"])
+for row in csv.DictReader(sys.stdin):
+    if last_row is not None:
+        if last_row['url'] == row['url']:
+            outf.writerow([last_row['url'], last_row['start'], row['start'], last_row['text']])
+        else:
+            outf.writerow([last_row['url'], last_row['start'], None, last_row['text']])
+    last_row = row
+
+outf.writerow([last_row['url'], last_row['start'], None, last_row['text']])
diff --git a/scripts/csv-to-melt.py b/scripts/csv-to-melt.py
new file mode 100644
index 0000000..6b49612
--- /dev/null
+++ b/scripts/csv-to-melt.py
@@ -0,0 +1,38 @@
+import csv, sys, argparse
+from timecode import timecode_tosecs
+from math import floor
+from urllib.parse import urlparse
+import os
+
+
+def url_filename (url):
+    p = urlparse(url)
+    return os.path.split(p.path)[1]
+
+def to_cmd (edl, framerate):
+    def s2f (t, framerate=args.framerate):
+        if t == None:
+            return None
+        return floor(framerate*timecode_tosecs(t))
+    def cmdify (edit):
+        if edit['end']:
+            return '"{0}" in={1} out={2}'.format(url_filename(edit['url']), s2f(edit['start']), s2f(edit['end']))
+        else:
+            return '"{0}" in={1}'.format(url_filename(edit['url']), s2f(edit['start']))
+    return "melt " + " ".join([cmdify(edit) for edit in edl])    
+
+ap = argparse.ArgumentParser("srt to melt command")
+ap.add_argument("--framerate", type=float, default=25.0)
+ap.add_argument("--output", default="edit.mp4")
+ap.add_argument("--profile", default="quarter_pal_wide")
+args = ap.parse_args()
+
+outf = csv.writer(sys.stdout)
+rows = [] 
+for row in csv.DictReader(sys.stdin):
+    rows.append(row)
+
+cmd = to_cmd(rows, args.framerate)
+cmd += f" -profile {args.profile} -consumer avformat:{args.output}"
+
+print (cmd)
\ No newline at end of file
diff --git a/scripts/split-urls-csv.py b/scripts/split-urls-csv.py
new file mode 100644
index 0000000..a4ab219
--- /dev/null
+++ b/scripts/split-urls-csv.py
@@ -0,0 +1,13 @@
+import re, sys, csv
+
+# From [itertools recipes](https://docs.python.org/3/library/itertools.html#itertools-recipes)
+def grouper (iterable, n):
+    args = [iter(iterable)] * n
+    return zip(*args)
+
+w = csv.writer(sys.stdout)
+w.writerow(["url","start", "text"])
+text = sys.stdin.read()
+items = list(re.split(f"((https?://\S+?)#t=(\S+))", text))
+for url, baseurl, timecode, text in grouper(items[1:], 4):
+    w.writerow((baseurl, timecode, text))
diff --git a/scripts/split-urls.py b/scripts/split-urls.py
new file mode 100644
index 0000000..18de454
--- /dev/null
+++ b/scripts/split-urls.py
@@ -0,0 +1,5 @@
+import re, sys, json
+
+text = sys.stdin.read()
+items = list(re.split(f"(https?://\S+?#t=\S+)", text))
+print (json.dumps({'items': items}, indent=2))
diff --git a/scripts/timecode.py b/scripts/timecode.py
new file mode 100644
index 0000000..c6e5696
--- /dev/null
+++ b/scripts/timecode.py
@@ -0,0 +1,96 @@
+# This file is part of Active Archives.
+# Copyright 2006-2016 the Active Archives contributors (see AUTHORS)
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+# Also add information on how to contact you by electronic and paper mail.
+
+from __future__ import print_function
+import math
+import re
+
+# timecode_pat = re.compile(r"(\d+):(\d+):(\d+)(?:[.,](\d+))?")
+timecode_pat = re.compile(r"(?:(\d+):)?(\d+):(\d+)(?:[.,](\d+))?")
+
+
+def timecode_fromsecs(rawsecs, fract=True, alwaysfract=False, fractdelim=',', alwayshours=False):
+    # returns a string in HH:MM:SS[.xxx] notation
+    # if fract is True, uses .xxx if either necessary (non-zero)
+    # OR alwaysfract is True
+    hours = math.floor(rawsecs / 3600)
+    rawsecs -= hours * 3600
+    mins = math.floor(rawsecs / 60)
+    rawsecs -= mins * 60
+    if fract:
+        secs = math.floor(rawsecs)
+        rawsecs -= secs
+        if (rawsecs > 0 or alwaysfract):
+            fract = "%.03f" % rawsecs
+            if hours or alwayshours:
+                return "%02d:%02d:%02d%s%s" % (hours, mins, secs, fractdelim, \
+                        fract[2:])
+            else:
+                return "%02d:%02d%s%s" % (mins, secs, fractdelim, fract[2:])
+        else:
+            if hours or alwayshours:
+                return "%02d:%02d:%02d" % (hours, mins, secs)
+            else:
+                return "%02d:%02d" % (mins, secs)
+
+    else:
+        secs = round(rawsecs)
+        if hours or alwayshours:
+            return "%02d:%02d:%02d" % (hours, mins, secs)
+        else:
+            return "%02d:%02d" % (mins, secs)
+
+
+def timecode_tosecs(tcstr):
+    r = timecode_pat.search(tcstr)
+    if r:
+        ret = 0
+        if r.group(1):
+            ret += 3600 * int(r.group(1))
+        ret += 60 * int(r.group(2))
+        ret += int(r.group(3))
+        if (r.group(4)):
+            ret = float(str(ret) + "." + r.group(4))
+        return ret
+    else:
+        return None
+
+
+def parse2secs(val):
+    try:
+        return float(val)
+    except ValueError:
+        return timecode_tosecs(val)
+## to accept None
+#    except TypeError:
+#        return
+
+if __name__ == "__main__":
+    def t(x):
+        # with fraction
+        s = timecode_fromsecs(x, True, False)
+        print (x, "=>", s, "=>", timecode_tosecs(s))
+        # without fraction
+        s = timecode_fromsecs(x, False)
+        print (x, "=>", s, "=>", timecode_tosecs(s))
+
+    t(0)
+    t(59.666666666666666)
+    t(60)
+    t(60.0)
+    t(1235 / 3.0)
+    t(10000.5)