tagradio + mixcloud (fun with subtitles)

4 months ago · 7053a5bbd4
commit 7053a5bbd4
10 changed files with 531 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1 @@
+*.mp3
--- a/mixcloud/fun_with_subtitles_01.sh.txt
+++ b/mixcloud/fun_with_subtitles_01.sh.txt
@ -0,0 +1,127 @@
+# yt-dlp installed with pip
+# nb: you may need to update yt-dlp frequently to keep up with changes made by commercial services
+# to prevent downloading tools like yt-dlp to stay working!
+#
+# pip install --upgrade yt-dlp
+#
+# Download the playlist, writing info in json
+yt-dlp "https://www.mixcloud.com/radiowormrotterdam/playlists/worm-25/" --write-info-json
+
+# let's move the files to a sub-folder
+mkdir worm25
+mv *.info.json *.webm worm25
+
+#
+#  _.........._
+# | |xpub    | |
+# | |        | |
+# | |        | |
+# | |________| |
+# |   ______   |
+# |  |    | |  |
+# |__|____|_|__|
+#
+# IN the early days of computer history
+# on computers using DOS (or Disk Operating System)
+# file names needed to conform to a very strict standard
+# of 8 letters, with only A-Z , dash -, and underscore _ , and numbers 0-9
+# (and no spaces!)
+# plus a 3 letter extension indicating the type of file
+# for example README.TXT
+#
+# Copy one file in this folder and give it a short name
+#
+cp worm25/WORM\ 25：\ A\ history\ of\ WORM\ in\ 25\ Objects\ \#1：\ Mia\ on\ the\ hand-towel\ dispenser\ \(04.06.24\)\ \[radiowormrotterdam_worm-25-a-history-of-worm-in-25-objects-1-mia-o
+n-the-hand-towel-dispenser-040624\].webm w25mia.webm
+
+#                  __
+#       .,-;-;-,. /'_\
+#     _/_/_/_|_\_\) /
+#   '-<_><_><_><_>=/\
+#     `/_/====/_/-'\_\
+#      ""     ""    ""
+# NOTE:
+# it may seem pointless to move files around and rename them with short names
+# BUT...
+# coding/working with the commandline requires A LOT OF FOCUS
+# so steps to reduce "cognitive load" like sifting through long lists of confusing filenames
+# that make working on the commandline slower and *harder to read*
+# are really worthwhile!
+#
+# Also when working with digital materials, it's often tempting to try to
+# address an entire collection (in this case all the recordings, and the whole hour of each recording)
+# BUT ...
+# it's really important when testing things out that you focus on a small sample
+# in this way you make experimentation, including the *necessary errors
+# and missteps*, as fluid as possible to stay in the flow,
+# so that you can get through the bugs
+# to the interesting results that will give you the energy
+# and confidence to keep going!
+#
+# so in this case....
+#
+# get something working for a 60 second sample
+# THEN once you know it works...
+# apply it to the whole hour long recording
+# and eventually all the recordsings of the playlist
+
+
+# Working with a long file also can take a lot of time when experimenting
+# use ffmpeg to make a 30 second extract
+# -ss is start time, -t duration (duration of final extract)
+# at the same time, convert the wav to mp3 (for the browser)
+ffmpeg -i w25mia.webm -ss 120 -t 60 w25mia60.wav
+
+# use vosk to transribe, to srt
+# nb: worm25_mia_60wav is an INPUT and needs to already exist
+#     worm25_mia_60.srt is an OUTPUT and will be (re)created
+vosk-transcriber -l en-us -i w25mia60.wav -t srt -o w25mia60.srt
+
+# SRT (SubRip Subtitle) comes from
+# practices of PIRATE CURATION
+# where films would be ripped from DVDs
+# and distributed as video files
+# SRT is then a simple (and small) text format for
+# the missing subtitles, that can then
+# be translated into many languages
+# and distributed separately from the (heavier) video files
+# Also due to the editability and understandability
+# the format was so popular, that it became the
+# basis of the W3C's web standard (web) VTT
+#
+# see also: https://www.opensubtitles.com/
+#
+# The two are nearly the same except
+# the file needs to have a "header" (the first line should be:)
+# WEBVTT
+# And the timecodes use a dot instead of a comma..
+# SO
+# 00:00:00,075 --> 00:00:02,610
+# in SRT becomes in VTT:
+# 00:00:00.075 --> 00:00:02.610
+# 
+# let's use a python script to convert from srt to vtt
+scripts/srt2vtt.py w25mia60.srt w25mia60.vtt
+
+
+
+# make an mp3 for the browser
+ffmpeg -i w25mia60.wav w25mia60.mp3
+
+# adjust vtt.html to point to the mp3 + vtt
+# LOOK AT vtt.html
+# NB: to see captions, we need to use <video> even though we just have <audio>
+#
+# This seesm to work in Firefox but not Chrome/Chromium.
+# NB the video is made 100% width and a fixed (small) height to make the caption size nice
+# BUT...
+# It would be even better to style the captions ourselves and bring them into the page
+# like other HTML content!
+#
+# VTT tracks can also trigger the *oncue* event, which means we can program CUSTOM title behaviors!
+#
+
+# LOOK AT vtt_basic.html
+#     and vtt_custom.html
+
+
--- a/mixcloud/fun_with_subtitles_02.sh.txt
+++ b/mixcloud/fun_with_subtitles_02.sh.txt
@ -0,0 +1,6 @@
+# vosk can also output JSON that includes the timing of each individual
+# detected WORD!
+# NOTE: I had an error when I did this and needed to PATCH some PYTHON code in VOSK
+# see VOSKPATCH.TXT
+vosk-transcriber -l en-us -i worm25_mia_60.wav -t json -o worm25_mia_60.json
+
--- a/mixcloud/scripts/srt2vtt.py
+++ b/mixcloud/scripts/srt2vtt.py
@ -0,0 +1,16 @@
+#!/usr/bin/env python
+import argparse
+import sys
+import re
+
+
+ap = argparse.ArgumentParser("convert srt into vtt")
+ap.add_argument('infile', nargs='?', type=argparse.FileType('r'), default=sys.stdin)
+ap.add_argument('outfile', nargs='?', type=argparse.FileType('w'), default=sys.stdout)
+args = ap.parse_args()
+
+print ("WEBVTT\n", file=args.outfile)
+for line in args.infile:
+    print (re.sub(r"(\d\d:\d\d:\d\d),(\d\d\d) --> (\d\d:\d\d:\d\d),(\d\d\d)", r"\1.\2 --> \3.\4", line.rstrip()), file=args.outfile)
+
+    
--- a/mixcloud/vtt_basic.html
+++ b/mixcloud/vtt_basic.html
@ -0,0 +1,14 @@
+<!DOCTYPE html>
+<html xmlns="http://www.w3.org/1999/xhtml" lang="" xml:lang="">
+<head>
+  <meta charset="utf-8" />
+  <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes" />
+    <style>
+    </style>
+</head>
+<body>
+    <video src="w25mia60.mp3" controls style="width: 100%; height: 320px;">
+        <track default kind="captions" label="vosk" src="w25mia60.vtt"></track>
+    </video>
+</body>
+</html>
--- a/mixcloud/vtt_custom.html
+++ b/mixcloud/vtt_custom.html
@ -0,0 +1,28 @@
+<!DOCTYPE html>
+<html xmlns="http://www.w3.org/1999/xhtml" lang="" xml:lang="">
+<head>
+  <meta charset="utf-8" />
+  <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes" />
+    <style>
+    </style>
+</head>
+<body>
+    <audio src="w25mia60.mp3" controls style="width: 100%">
+        <track default id="captions" kind="metadata" label="vosk" src="w25mia60.vtt"></track>
+    </audio>
+
+    <div id="caption" style="text-align: center; padding: 25px; font-size: 42px; color: line">caption here</div>
+    
+    <script>
+        let captrack = document.querySelector("track#captions");
+        let capdiv = document.querySelector("#caption");
+        captrack.addEventListener("cuechange", (e) => {
+            console.log('track: cuechange', e);
+            let text = captrack.track.activeCues[0]?.text || "";
+            console.log(text);
+            capdiv.innerHTML = text;
+        });
+        captrack.track.mode = "hidden";
+    </script>
+</body>
+</html>
--- a/mixcloud/w25mia60.vtt
+++ b/mixcloud/w25mia60.vtt
@ -0,0 +1,110 @@
+WEBVTT
+
+1
+00:00:00.075 --> 00:00:02.610
+have a particular connection to worms inner
+
+2
+00:00:02.610 --> 00:00:05.970
+and outer workings i wanted to make
+
+3
+00:00:05.970 --> 00:00:08.640
+the series because one has become over
+
+4
+00:00:08.640 --> 00:00:10.710
+twenty five years and institution
+
+5
+00:00:11.460 --> 00:00:14.130
+but an institution built with and for
+
+6
+00:00:14.130 --> 00:00:16.110
+and from a d i y spirits
+
+7
+00:00:16.590 --> 00:00:19.080
+and something that it still retains it's
+
+8
+00:00:19.080 --> 00:00:20.490
+the nature of do i was spaces
+
+9
+00:00:20.490 --> 00:00:22.710
+of course and any cultural spaces that
+
+10
+00:00:22.710 --> 00:00:24.390
+people who make it come and go
+
+11
+00:00:24.570 --> 00:00:25.830
+the name on the building stays the
+
+12
+00:00:25.830 --> 00:00:28.260
+same but the vibe changes and the
+
+13
+00:00:28.260 --> 00:00:29.880
+sense of identity shifts
+
+14
+00:00:31.020 --> 00:00:33.000
+worm is a place of invention and
+
+15
+00:00:33.000 --> 00:00:35.880
+reinvention and with the constant motion certain
+
+16
+00:00:35.880 --> 00:00:39.120
+stories and memories do fade away sometimes
+
+17
+00:00:39.120 --> 00:00:39.870
+quite quickly
+
+18
+00:00:40.770 --> 00:00:42.563
+the idea of this history of why
+
+19
+00:00:42.563 --> 00:00:44.790
+i'm not the only possible history of
+
+20
+00:00:44.790 --> 00:00:47.160
+course is to try and capture a
+
+21
+00:00:47.160 --> 00:00:48.750
+few of those stories with the people
+
+22
+00:00:48.750 --> 00:00:50.748
+that i've met and people that maria
+
+23
+00:00:50.748 --> 00:00:52.680
+it has known along the way
+
+24
+00:00:53.790 --> 00:00:54.780
+but i'm not going to begin at
+
+25
+00:00:54.780 --> 00:00:56.880
+the beginning that would be logical but
+
+26
+00:00:56.880 --> 00:00:59.370
+actually have already begun obviously and today's
+
+27
+00:00:59.370 --> 00:00:59.970
+episode
+
--- a/tagradio/fetch-jsonp.js
+++ b/tagradio/fetch-jsonp.js
@ -0,0 +1,132 @@
+(function (global, factory) {
+  if (typeof define === 'function' && define.amd) {
+    define(['exports', 'module'], factory);
+  } else if (typeof exports !== 'undefined' && typeof module !== 'undefined') {
+    factory(exports, module);
+  } else {
+    var mod = {
+      exports: {}
+    };
+    factory(mod.exports, mod);
+    global.fetchJsonp = mod.exports;
+  }
+})(this, function (exports, module) {
+  'use strict';
+
+  var defaultOptions = {
+    timeout: 5000,
+    jsonpCallback: 'callback',
+    jsonpCallbackFunction: null
+  };
+
+  function generateCallbackFunction() {
+    return 'jsonp_' + Date.now() + '_' + Math.ceil(Math.random() * 100000);
+  }
+
+  function clearFunction(functionName) {
+    // IE8 throws an exception when you try to delete a property on window
+    // http://stackoverflow.com/a/1824228/751089
+    try {
+      delete window[functionName];
+    } catch (e) {
+      window[functionName] = undefined;
+    }
+  }
+
+  function removeScript(scriptId) {
+    var script = document.getElementById(scriptId);
+    if (script) {
+      document.getElementsByTagName('head')[0].removeChild(script);
+    }
+  }
+
+  function fetchJsonp(_url) {
+    var options = arguments.length <= 1 || arguments[1] === undefined ? {} : arguments[1];
+
+    // to avoid param reassign
+    var url = _url;
+    var timeout = options.timeout || defaultOptions.timeout;
+    var jsonpCallback = options.jsonpCallback || defaultOptions.jsonpCallback;
+
+    var timeoutId = undefined;
+
+    return new Promise(function (resolve, reject) {
+      var callbackFunction = options.jsonpCallbackFunction || generateCallbackFunction();
+      var scriptId = jsonpCallback + '_' + callbackFunction;
+
+      window[callbackFunction] = function (response) {
+        resolve({
+          ok: true,
+          // keep consistent with fetch API
+          json: function json() {
+            return Promise.resolve(response);
+          }
+        });
+
+        if (timeoutId) clearTimeout(timeoutId);
+
+        removeScript(scriptId);
+
+        clearFunction(callbackFunction);
+      };
+
+      // Check if the user set their own params, and if not add a ? to start a list of params
+      url += url.indexOf('?') === -1 ? '?' : '&';
+
+      var jsonpScript = document.createElement('script');
+      jsonpScript.setAttribute('src', '' + url + jsonpCallback + '=' + callbackFunction);
+      if (options.charset) {
+        jsonpScript.setAttribute('charset', options.charset);
+      }
+      if (options.nonce) {
+        jsonpScript.setAttribute('nonce', options.nonce);
+      }
+      if (options.referrerPolicy) {
+        jsonpScript.setAttribute('referrerPolicy', options.referrerPolicy);
+      }
+      if (options.crossorigin) {
+        jsonpScript.setAttribute('crossorigin', 'true');
+      }
+      jsonpScript.id = scriptId;
+      document.getElementsByTagName('head')[0].appendChild(jsonpScript);
+
+      timeoutId = setTimeout(function () {
+        reject(new Error('JSONP request to ' + _url + ' timed out'));
+
+        clearFunction(callbackFunction);
+        removeScript(scriptId);
+        window[callbackFunction] = function () {
+          clearFunction(callbackFunction);
+        };
+      }, timeout);
+
+      // Caught if got 404/500
+      jsonpScript.onerror = function () {
+        reject(new Error('JSONP request to ' + _url + ' failed'));
+
+        clearFunction(callbackFunction);
+        removeScript(scriptId);
+        if (timeoutId) clearTimeout(timeoutId);
+      };
+    });
+  }
+
+  // export as global function
+  /*
+  let local;
+  if (typeof global !== 'undefined') {
+    local = global;
+  } else if (typeof self !== 'undefined') {
+    local = self;
+  } else {
+    try {
+      local = Function('return this')();
+    } catch (e) {
+      throw new Error('polyfill failed because global object is unavailable in this environment');
+    }
+  }
+  local.fetchJsonp = fetchJsonp;
+  */
+
+  module.exports = fetchJsonp;
+});
--- a/tagradio/index.html
+++ b/tagradio/index.html
@ -0,0 +1,41 @@
+<!DOCTYPE html>
+<html>
+    <head>
+        
+    </head>
+    <body>
+<!DOCTYPE html>
+<html>
+<head>
+<title>minimal archive.org search</title>
+</head>
+<body>
+    <p>This is a minimal form to send a search request to archive.org</p>
+    <form action="https://archive.org/advancedsearch.php">
+        <input size="50" type="text" name="q" value="subject:&quot;field recording&quot;">
+        <input type="submit" value="Search">
+        <!-- the rest are hidden -->
+        <input type="hidden" name="fl[]" value="date">
+        <input type="hidden" name="fl[]" value="description">
+        <input type="hidden" name="fl[]" value="format">
+        <input type="hidden" name="fl[]" value="identifier">
+        <input type="hidden" name="fl[]" value="licenseurl">
+        <input type="hidden" name="fl[]" value="mediatype">
+        <input type="hidden" name="fl[]" value="name">
+        <input type="hidden" name="fl[]" value="rights">
+        <input type="hidden" name="fl[]" value="subject">
+        <input type="hidden" name="fl[]" value="title">
+        <input type="hidden" name="fl[]" value="type">
+        <input type="hidden" name="sort[]" value="year desc">
+        <input type="hidden" name="sort[]" value="identifier asc">
+        <input id="numresults" type="hidden" name="rows" value="10">
+        <input type="hidden" name="page" value="1">
+        <input type="hidden" name="output" value="json">
+        <!-- <input type="hidden" name="output" value="tables">
+        <input type="hidden" name="callback" value="callback"> -->
+    </form>
+    <div id="results"></div>
+    <script src="fetch-jsonp.js"></script>
+    <script src="tagradio.js"></script>
+    </body>
+</html>
--- a/tagradio/tagradio.js
+++ b/tagradio/tagradio.js
@ -0,0 +1,56 @@
+// https://github.com/camsong/fetch-jsonp
+// https://raw.githubusercontent.com/camsong/fetch-jsonp/refs/heads/master/src/fetch-jsonp.js
+
+let form = document.querySelector("form");
+
+form.addEventListener("submit", event => {
+    // console.log("GO NO FURTHER FORM... I'll take it from here");
+    event.preventDefault();
+    let formdata = new FormData(form);
+    let usp = new URLSearchParams(formdata);
+    console.log("urlsearchparams", usp.toString());
+    let results_div = document.querySelector("#results");
+    fetchJsonp("https://archive.org/advancedsearch.php?"+usp.toString())
+        .then(resp => resp.json())
+        .then(data => {
+            console.log("data", data);
+            data.response.docs.forEach (doc => {
+                console.log(doc);
+                let div = document.createElement("div");
+                let a = document.createElement("a");
+                let item_url = `https://archive.org/details/${doc.identifier}`;
+                let item_metadata_url = `https://archive.org/metadata/${doc.identifier}`;
+                a.href = item_url;
+                a.textContent = doc.title;
+                div.appendChild(a);
+                results_div.appendChild(div);
+                
+                fetchJsonp(item_metadata_url)
+                    .then (resp => resp.json())
+                    .then(item_data => {
+                        console.log("*", item_data);
+                        let file_url = `https://${item_data.server}${item_data.dir}/`;
+                        let mp3_files = item_data.files.filter(f => f.format == "VBR MP3");
+                        let first_file_url = file_url + encodeURI(mp3_files[0].name);
+                        let audio = document.createElement("audio");
+                        div.appendChild(audio);
+                        audio.controls = true;
+                        audio.src = first_file_url;
+                        
+                    })
+            
+            })
+            // here we have access to the response data...
+        })
+
+    // fetch("https://archive.org/advancedsearch.php", {
+    //     method: "cors",
+    //     body: formdata
+    // }).then(resp => resp.json())
+    // .then(data => {
+    //     console.log("data", data);
+    // })
+
+
+    
+})