tagradio + mixcloud (fun with subtitles)

2 months ago · 7053a5bbd4
commit 7053a5bbd4
10 changed files with 531 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1 @@
 *.mp3
--- a/mixcloud/fun_with_subtitles_01.sh.txt
+++ b/mixcloud/fun_with_subtitles_01.sh.txt
@ -0,0 +1,127 @@
 # yt-dlp installed with pip
 # nb: you may need to update yt-dlp frequently to keep up with changes made by commercial services
 # to prevent downloading tools like yt-dlp to stay working!
 #
 # pip install --upgrade yt-dlp
 #
 # Download the playlist, writing info in json
 yt-dlp "https://www.mixcloud.com/radiowormrotterdam/playlists/worm-25/" --write-info-json
 # let's move the files to a sub-folder
 mkdir worm25
 mv *.info.json *.webm worm25
 #
 #  _.........._
 # | |xpub    | |
 # | |        | |
 # | |        | |
 # | |________| |
 # |   ______   |
 # |  |    | |  |
 # |__|____|_|__|
 #
 # IN the early days of computer history
 # on computers using DOS (or Disk Operating System)
 # file names needed to conform to a very strict standard
 # of 8 letters, with only A-Z , dash -, and underscore _ , and numbers 0-9
 # (and no spaces!)
 # plus a 3 letter extension indicating the type of file
 # for example README.TXT
 #
 # Copy one file in this folder and give it a short name
 #
 cp worm25/WORM\ 25：\ A\ history\ of\ WORM\ in\ 25\ Objects\ \#1：\ Mia\ on\ the\ hand-towel\ dispenser\ \(04.06.24\)\ \[radiowormrotterdam_worm-25-a-history-of-worm-in-25-objects-1-mia-o
 n-the-hand-towel-dispenser-040624\].webm w25mia.webm
 #                  __
 #       .,-;-;-,. /'_\
 #     _/_/_/_|_\_\) /
 #   '-<_><_><_><_>=/\
 #     `/_/====/_/-'\_\
 #      ""     ""    ""
 # NOTE:
 # it may seem pointless to move files around and rename them with short names
 # BUT...
 # coding/working with the commandline requires A LOT OF FOCUS
 # so steps to reduce "cognitive load" like sifting through long lists of confusing filenames
 # that make working on the commandline slower and *harder to read*
 # are really worthwhile!
 #
 # Also when working with digital materials, it's often tempting to try to
 # address an entire collection (in this case all the recordings, and the whole hour of each recording)
 # BUT ...
 # it's really important when testing things out that you focus on a small sample
 # in this way you make experimentation, including the *necessary errors
 # and missteps*, as fluid as possible to stay in the flow,
 # so that you can get through the bugs
 # to the interesting results that will give you the energy
 # and confidence to keep going!
 #
 # so in this case....
 #
 # get something working for a 60 second sample
 # THEN once you know it works...
 # apply it to the whole hour long recording
 # and eventually all the recordsings of the playlist
 # Working with a long file also can take a lot of time when experimenting
 # use ffmpeg to make a 30 second extract
 # -ss is start time, -t duration (duration of final extract)
 # at the same time, convert the wav to mp3 (for the browser)
 ffmpeg -i w25mia.webm -ss 120 -t 60 w25mia60.wav
 # use vosk to transribe, to srt
 # nb: worm25_mia_60wav is an INPUT and needs to already exist
 #     worm25_mia_60.srt is an OUTPUT and will be (re)created
 vosk-transcriber -l en-us -i w25mia60.wav -t srt -o w25mia60.srt
 # SRT (SubRip Subtitle) comes from
 # practices of PIRATE CURATION
 # where films would be ripped from DVDs
 # and distributed as video files
 # SRT is then a simple (and small) text format for
 # the missing subtitles, that can then
 # be translated into many languages
 # and distributed separately from the (heavier) video files
 # Also due to the editability and understandability
 # the format was so popular, that it became the
 # basis of the W3C's web standard (web) VTT
 #
 # see also: https://www.opensubtitles.com/
 #
 # The two are nearly the same except
 # the file needs to have a "header" (the first line should be:)
 # WEBVTT
 # And the timecodes use a dot instead of a comma..
 # SO
 # 00:00:00,075 --> 00:00:02,610
 # in SRT becomes in VTT:
 # 00:00:00.075 --> 00:00:02.610
 # 
 # let's use a python script to convert from srt to vtt
 scripts/srt2vtt.py w25mia60.srt w25mia60.vtt
 # make an mp3 for the browser
 ffmpeg -i w25mia60.wav w25mia60.mp3
 # adjust vtt.html to point to the mp3 + vtt
 # LOOK AT vtt.html
 # NB: to see captions, we need to use <video> even though we just have <audio>
 #
 # This seesm to work in Firefox but not Chrome/Chromium.
 # NB the video is made 100% width and a fixed (small) height to make the caption size nice
 # BUT...
 # It would be even better to style the captions ourselves and bring them into the page
 # like other HTML content!
 #
 # VTT tracks can also trigger the *oncue* event, which means we can program CUSTOM title behaviors!
 #
 # LOOK AT vtt_basic.html
 #     and vtt_custom.html
--- a/mixcloud/fun_with_subtitles_02.sh.txt
+++ b/mixcloud/fun_with_subtitles_02.sh.txt
@ -0,0 +1,6 @@
 # vosk can also output JSON that includes the timing of each individual
 # detected WORD!
 # NOTE: I had an error when I did this and needed to PATCH some PYTHON code in VOSK
 # see VOSKPATCH.TXT
 vosk-transcriber -l en-us -i worm25_mia_60.wav -t json -o worm25_mia_60.json
--- a/mixcloud/scripts/srt2vtt.py
+++ b/mixcloud/scripts/srt2vtt.py
@ -0,0 +1,16 @@
 #!/usr/bin/env python
 import argparse
 import sys
 import re
 ap = argparse.ArgumentParser("convert srt into vtt")
 ap.add_argument('infile', nargs='?', type=argparse.FileType('r'), default=sys.stdin)
 ap.add_argument('outfile', nargs='?', type=argparse.FileType('w'), default=sys.stdout)
 args = ap.parse_args()
 print ("WEBVTT\n", file=args.outfile)
 for line in args.infile:
    print (re.sub(r"(\d\d:\d\d:\d\d),(\d\d\d) --> (\d\d:\d\d:\d\d),(\d\d\d)", r"\1.\2 --> \3.\4", line.rstrip()), file=args.outfile)
--- a/mixcloud/vtt_basic.html
+++ b/mixcloud/vtt_basic.html
@ -0,0 +1,14 @@
 <!DOCTYPE html>
 <html xmlns="http://www.w3.org/1999/xhtml" lang="" xml:lang="">
 <head>
  <meta charset="utf-8" />
  <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes" />
    <style>
    </style>
 </head>
 <body>
    <video src="w25mia60.mp3" controls style="width: 100%; height: 320px;">
        <track default kind="captions" label="vosk" src="w25mia60.vtt"></track>
    </video>
 </body>
 </html>
--- a/mixcloud/vtt_custom.html
+++ b/mixcloud/vtt_custom.html
@ -0,0 +1,28 @@
 <!DOCTYPE html>
 <html xmlns="http://www.w3.org/1999/xhtml" lang="" xml:lang="">
 <head>
  <meta charset="utf-8" />
  <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes" />
    <style>
    </style>
 </head>
 <body>
    <audio src="w25mia60.mp3" controls style="width: 100%">
        <track default id="captions" kind="metadata" label="vosk" src="w25mia60.vtt"></track>
    </audio>
    <div id="caption" style="text-align: center; padding: 25px; font-size: 42px; color: line">caption here</div>
    <script>
        let captrack = document.querySelector("track#captions");
        let capdiv = document.querySelector("#caption");
        captrack.addEventListener("cuechange", (e) => {
            console.log('track: cuechange', e);
            let text = captrack.track.activeCues[0]?.text || "";
            console.log(text);
            capdiv.innerHTML = text;
        });
        captrack.track.mode = "hidden";
    </script>
 </body>
 </html>
--- a/mixcloud/w25mia60.vtt
+++ b/mixcloud/w25mia60.vtt
@ -0,0 +1,110 @@
 WEBVTT
 1
 00:00:00.075 --> 00:00:02.610
 have a particular connection to worms inner
 2
 00:00:02.610 --> 00:00:05.970
 and outer workings i wanted to make
 3
 00:00:05.970 --> 00:00:08.640
 the series because one has become over
 4
 00:00:08.640 --> 00:00:10.710
 twenty five years and institution
 5
 00:00:11.460 --> 00:00:14.130
 but an institution built with and for
 6
 00:00:14.130 --> 00:00:16.110
 and from a d i y spirits
 7
 00:00:16.590 --> 00:00:19.080
 and something that it still retains it's
 8
 00:00:19.080 --> 00:00:20.490
 the nature of do i was spaces
 9
 00:00:20.490 --> 00:00:22.710
 of course and any cultural spaces that
 10
 00:00:22.710 --> 00:00:24.390
 people who make it come and go
 11
 00:00:24.570 --> 00:00:25.830
 the name on the building stays the
 12
 00:00:25.830 --> 00:00:28.260
 same but the vibe changes and the
 13
 00:00:28.260 --> 00:00:29.880
 sense of identity shifts
 14
 00:00:31.020 --> 00:00:33.000
 worm is a place of invention and
 15
 00:00:33.000 --> 00:00:35.880
 reinvention and with the constant motion certain
 16
 00:00:35.880 --> 00:00:39.120
 stories and memories do fade away sometimes
 17
 00:00:39.120 --> 00:00:39.870
 quite quickly
 18
 00:00:40.770 --> 00:00:42.563
 the idea of this history of why
 19
 00:00:42.563 --> 00:00:44.790
 i'm not the only possible history of
 20
 00:00:44.790 --> 00:00:47.160
 course is to try and capture a
 21
 00:00:47.160 --> 00:00:48.750
 few of those stories with the people
 22
 00:00:48.750 --> 00:00:50.748
 that i've met and people that maria
 23
 00:00:50.748 --> 00:00:52.680
 it has known along the way
 24
 00:00:53.790 --> 00:00:54.780
 but i'm not going to begin at
 25
 00:00:54.780 --> 00:00:56.880
 the beginning that would be logical but
 26
 00:00:56.880 --> 00:00:59.370
 actually have already begun obviously and today's
 27
 00:00:59.370 --> 00:00:59.970
 episode
--- a/tagradio/fetch-jsonp.js
+++ b/tagradio/fetch-jsonp.js
@ -0,0 +1,132 @@
 (function (global, factory) {
  if (typeof define === 'function' && define.amd) {
    define(['exports', 'module'], factory);
  } else if (typeof exports !== 'undefined' && typeof module !== 'undefined') {
    factory(exports, module);
  } else {
    var mod = {
      exports: {}
    };
    factory(mod.exports, mod);
    global.fetchJsonp = mod.exports;
  }
 })(this, function (exports, module) {
  'use strict';
  var defaultOptions = {
    timeout: 5000,
    jsonpCallback: 'callback',
    jsonpCallbackFunction: null
  };
  function generateCallbackFunction() {
    return 'jsonp_' + Date.now() + '_' + Math.ceil(Math.random() * 100000);
  }
  function clearFunction(functionName) {
    // IE8 throws an exception when you try to delete a property on window
    // http://stackoverflow.com/a/1824228/751089
    try {
      delete window[functionName];
    } catch (e) {
      window[functionName] = undefined;
    }
  }
  function removeScript(scriptId) {
    var script = document.getElementById(scriptId);
    if (script) {
      document.getElementsByTagName('head')[0].removeChild(script);
    }
  }
  function fetchJsonp(_url) {
    var options = arguments.length <= 1 || arguments[1] === undefined ? {} : arguments[1];
    // to avoid param reassign
    var url = _url;
    var timeout = options.timeout || defaultOptions.timeout;
    var jsonpCallback = options.jsonpCallback || defaultOptions.jsonpCallback;
    var timeoutId = undefined;
    return new Promise(function (resolve, reject) {
      var callbackFunction = options.jsonpCallbackFunction || generateCallbackFunction();
      var scriptId = jsonpCallback + '_' + callbackFunction;
      window[callbackFunction] = function (response) {
        resolve({
          ok: true,
          // keep consistent with fetch API
          json: function json() {
            return Promise.resolve(response);
          }
        });
        if (timeoutId) clearTimeout(timeoutId);
        removeScript(scriptId);
        clearFunction(callbackFunction);
      };
      // Check if the user set their own params, and if not add a ? to start a list of params
      url += url.indexOf('?') === -1 ? '?' : '&';
      var jsonpScript = document.createElement('script');
      jsonpScript.setAttribute('src', '' + url + jsonpCallback + '=' + callbackFunction);
      if (options.charset) {
        jsonpScript.setAttribute('charset', options.charset);
      }
      if (options.nonce) {
        jsonpScript.setAttribute('nonce', options.nonce);
      }
      if (options.referrerPolicy) {
        jsonpScript.setAttribute('referrerPolicy', options.referrerPolicy);
      }
      if (options.crossorigin) {
        jsonpScript.setAttribute('crossorigin', 'true');
      }
      jsonpScript.id = scriptId;
      document.getElementsByTagName('head')[0].appendChild(jsonpScript);
      timeoutId = setTimeout(function () {
        reject(new Error('JSONP request to ' + _url + ' timed out'));
        clearFunction(callbackFunction);
        removeScript(scriptId);
        window[callbackFunction] = function () {
          clearFunction(callbackFunction);
        };
      }, timeout);
      // Caught if got 404/500
      jsonpScript.onerror = function () {
        reject(new Error('JSONP request to ' + _url + ' failed'));
        clearFunction(callbackFunction);
        removeScript(scriptId);
        if (timeoutId) clearTimeout(timeoutId);
      };
    });
  }
  // export as global function
  /*
  let local;
  if (typeof global !== 'undefined') {
    local = global;
  } else if (typeof self !== 'undefined') {
    local = self;
  } else {
    try {
      local = Function('return this')();
    } catch (e) {
      throw new Error('polyfill failed because global object is unavailable in this environment');
    }
  }
  local.fetchJsonp = fetchJsonp;
  */
  module.exports = fetchJsonp;
 });
--- a/tagradio/index.html
+++ b/tagradio/index.html
@ -0,0 +1,41 @@
 <!DOCTYPE html>
 <html>
    <head>
    </head>
    <body>
 <!DOCTYPE html>
 <html>
 <head>
 <title>minimal archive.org search</title>
 </head>
 <body>
    <p>This is a minimal form to send a search request to archive.org</p>
    <form action="https://archive.org/advancedsearch.php">
        <input size="50" type="text" name="q" value="subject:&quot;field recording&quot;">
        <input type="submit" value="Search">
        <!-- the rest are hidden -->
        <input type="hidden" name="fl[]" value="date">
        <input type="hidden" name="fl[]" value="description">
        <input type="hidden" name="fl[]" value="format">
        <input type="hidden" name="fl[]" value="identifier">
        <input type="hidden" name="fl[]" value="licenseurl">
        <input type="hidden" name="fl[]" value="mediatype">
        <input type="hidden" name="fl[]" value="name">
        <input type="hidden" name="fl[]" value="rights">
        <input type="hidden" name="fl[]" value="subject">
        <input type="hidden" name="fl[]" value="title">
        <input type="hidden" name="fl[]" value="type">
        <input type="hidden" name="sort[]" value="year desc">
        <input type="hidden" name="sort[]" value="identifier asc">
        <input id="numresults" type="hidden" name="rows" value="10">
        <input type="hidden" name="page" value="1">
        <input type="hidden" name="output" value="json">
        <!-- <input type="hidden" name="output" value="tables">
        <input type="hidden" name="callback" value="callback"> -->
    </form>
    <div id="results"></div>
    <script src="fetch-jsonp.js"></script>
    <script src="tagradio.js"></script>
    </body>
 </html>
--- a/tagradio/tagradio.js
+++ b/tagradio/tagradio.js
@ -0,0 +1,56 @@
 // https://github.com/camsong/fetch-jsonp
 // https://raw.githubusercontent.com/camsong/fetch-jsonp/refs/heads/master/src/fetch-jsonp.js
 let form = document.querySelector("form");
 form.addEventListener("submit", event => {
    // console.log("GO NO FURTHER FORM... I'll take it from here");
    event.preventDefault();
    let formdata = new FormData(form);
    let usp = new URLSearchParams(formdata);
    console.log("urlsearchparams", usp.toString());
    let results_div = document.querySelector("#results");
    fetchJsonp("https://archive.org/advancedsearch.php?"+usp.toString())
        .then(resp => resp.json())
        .then(data => {
            console.log("data", data);
            data.response.docs.forEach (doc => {
                console.log(doc);
                let div = document.createElement("div");
                let a = document.createElement("a");
                let item_url = `https://archive.org/details/${doc.identifier}`;
                let item_metadata_url = `https://archive.org/metadata/${doc.identifier}`;
                a.href = item_url;
                a.textContent = doc.title;
                div.appendChild(a);
                results_div.appendChild(div);
                fetchJsonp(item_metadata_url)
                    .then (resp => resp.json())
                    .then(item_data => {
                        console.log("*", item_data);
                        let file_url = `https://${item_data.server}${item_data.dir}/`;
                        let mp3_files = item_data.files.filter(f => f.format == "VBR MP3");
                        let first_file_url = file_url + encodeURI(mp3_files[0].name);
                        let audio = document.createElement("audio");
                        div.appendChild(audio);
                        audio.controls = true;
                        audio.src = first_file_url;
                    })
            })
            // here we have access to the response data...
        })
    // fetch("https://archive.org/advancedsearch.php", {
    //     method: "cors",
    //     body: formdata
    // }).then(resp => resp.json())
    // .then(data => {
    //     console.log("data", data);
    // })
 })