tagradio + mixcloud (fun with subtitles)

main
Michael Murtaugh 2 months ago
commit 7053a5bbd4

1
.gitignore vendored

@ -0,0 +1 @@
*.mp3

@ -0,0 +1,127 @@
# yt-dlp installed with pip
# nb: you may need to update yt-dlp frequently to keep up with changes made by commercial services
# to prevent downloading tools like yt-dlp to stay working!
#
# pip install --upgrade yt-dlp
#
# Download the playlist, writing info in json
yt-dlp "https://www.mixcloud.com/radiowormrotterdam/playlists/worm-25/" --write-info-json
# let's move the files to a sub-folder
mkdir worm25
mv *.info.json *.webm worm25
#
# _.........._
# | |xpub | |
# | | | |
# | | | |
# | |________| |
# | ______ |
# | | | | |
# |__|____|_|__|
#
# IN the early days of computer history
# on computers using DOS (or Disk Operating System)
# file names needed to conform to a very strict standard
# of 8 letters, with only A-Z , dash -, and underscore _ , and numbers 0-9
# (and no spaces!)
# plus a 3 letter extension indicating the type of file
# for example README.TXT
#
# Copy one file in this folder and give it a short name
#
cp worm25/WORM\ 25\ A\ history\ of\ WORM\ in\ 25\ Objects\ \#1\ Mia\ on\ the\ hand-towel\ dispenser\ \(04.06.24\)\ \[radiowormrotterdam_worm-25-a-history-of-worm-in-25-objects-1-mia-o
n-the-hand-towel-dispenser-040624\].webm w25mia.webm
# __
# .,-;-;-,. /'_\
# _/_/_/_|_\_\) /
# '-<_><_><_><_>=/\
# `/_/====/_/-'\_\
# "" "" ""
# NOTE:
# it may seem pointless to move files around and rename them with short names
# BUT...
# coding/working with the commandline requires A LOT OF FOCUS
# so steps to reduce "cognitive load" like sifting through long lists of confusing filenames
# that make working on the commandline slower and *harder to read*
# are really worthwhile!
#
# Also when working with digital materials, it's often tempting to try to
# address an entire collection (in this case all the recordings, and the whole hour of each recording)
# BUT ...
# it's really important when testing things out that you focus on a small sample
# in this way you make experimentation, including the *necessary errors
# and missteps*, as fluid as possible to stay in the flow,
# so that you can get through the bugs
# to the interesting results that will give you the energy
# and confidence to keep going!
#
# so in this case....
#
# get something working for a 60 second sample
# THEN once you know it works...
# apply it to the whole hour long recording
# and eventually all the recordsings of the playlist
# Working with a long file also can take a lot of time when experimenting
# use ffmpeg to make a 30 second extract
# -ss is start time, -t duration (duration of final extract)
# at the same time, convert the wav to mp3 (for the browser)
ffmpeg -i w25mia.webm -ss 120 -t 60 w25mia60.wav
# use vosk to transribe, to srt
# nb: worm25_mia_60wav is an INPUT and needs to already exist
# worm25_mia_60.srt is an OUTPUT and will be (re)created
vosk-transcriber -l en-us -i w25mia60.wav -t srt -o w25mia60.srt
# SRT (SubRip Subtitle) comes from
# practices of PIRATE CURATION
# where films would be ripped from DVDs
# and distributed as video files
# SRT is then a simple (and small) text format for
# the missing subtitles, that can then
# be translated into many languages
# and distributed separately from the (heavier) video files
# Also due to the editability and understandability
# the format was so popular, that it became the
# basis of the W3C's web standard (web) VTT
#
# see also: https://www.opensubtitles.com/
#
# The two are nearly the same except
# the file needs to have a "header" (the first line should be:)
# WEBVTT
# And the timecodes use a dot instead of a comma..
# SO
# 00:00:00,075 --> 00:00:02,610
# in SRT becomes in VTT:
# 00:00:00.075 --> 00:00:02.610
#
# let's use a python script to convert from srt to vtt
scripts/srt2vtt.py w25mia60.srt w25mia60.vtt
# make an mp3 for the browser
ffmpeg -i w25mia60.wav w25mia60.mp3
# adjust vtt.html to point to the mp3 + vtt
# LOOK AT vtt.html
# NB: to see captions, we need to use <video> even though we just have <audio>
#
# This seesm to work in Firefox but not Chrome/Chromium.
# NB the video is made 100% width and a fixed (small) height to make the caption size nice
# BUT...
# It would be even better to style the captions ourselves and bring them into the page
# like other HTML content!
#
# VTT tracks can also trigger the *oncue* event, which means we can program CUSTOM title behaviors!
#
# LOOK AT vtt_basic.html
# and vtt_custom.html

@ -0,0 +1,6 @@
# vosk can also output JSON that includes the timing of each individual
# detected WORD!
# NOTE: I had an error when I did this and needed to PATCH some PYTHON code in VOSK
# see VOSKPATCH.TXT
vosk-transcriber -l en-us -i worm25_mia_60.wav -t json -o worm25_mia_60.json

@ -0,0 +1,16 @@
#!/usr/bin/env python
import argparse
import sys
import re
ap = argparse.ArgumentParser("convert srt into vtt")
ap.add_argument('infile', nargs='?', type=argparse.FileType('r'), default=sys.stdin)
ap.add_argument('outfile', nargs='?', type=argparse.FileType('w'), default=sys.stdout)
args = ap.parse_args()
print ("WEBVTT\n", file=args.outfile)
for line in args.infile:
print (re.sub(r"(\d\d:\d\d:\d\d),(\d\d\d) --> (\d\d:\d\d:\d\d),(\d\d\d)", r"\1.\2 --> \3.\4", line.rstrip()), file=args.outfile)

@ -0,0 +1,14 @@
<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml" lang="" xml:lang="">
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes" />
<style>
</style>
</head>
<body>
<video src="w25mia60.mp3" controls style="width: 100%; height: 320px;">
<track default kind="captions" label="vosk" src="w25mia60.vtt"></track>
</video>
</body>
</html>

@ -0,0 +1,28 @@
<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml" lang="" xml:lang="">
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes" />
<style>
</style>
</head>
<body>
<audio src="w25mia60.mp3" controls style="width: 100%">
<track default id="captions" kind="metadata" label="vosk" src="w25mia60.vtt"></track>
</audio>
<div id="caption" style="text-align: center; padding: 25px; font-size: 42px; color: line">caption here</div>
<script>
let captrack = document.querySelector("track#captions");
let capdiv = document.querySelector("#caption");
captrack.addEventListener("cuechange", (e) => {
console.log('track: cuechange', e);
let text = captrack.track.activeCues[0]?.text || "";
console.log(text);
capdiv.innerHTML = text;
});
captrack.track.mode = "hidden";
</script>
</body>
</html>

@ -0,0 +1,110 @@
WEBVTT
1
00:00:00.075 --> 00:00:02.610
have a particular connection to worms inner
2
00:00:02.610 --> 00:00:05.970
and outer workings i wanted to make
3
00:00:05.970 --> 00:00:08.640
the series because one has become over
4
00:00:08.640 --> 00:00:10.710
twenty five years and institution
5
00:00:11.460 --> 00:00:14.130
but an institution built with and for
6
00:00:14.130 --> 00:00:16.110
and from a d i y spirits
7
00:00:16.590 --> 00:00:19.080
and something that it still retains it's
8
00:00:19.080 --> 00:00:20.490
the nature of do i was spaces
9
00:00:20.490 --> 00:00:22.710
of course and any cultural spaces that
10
00:00:22.710 --> 00:00:24.390
people who make it come and go
11
00:00:24.570 --> 00:00:25.830
the name on the building stays the
12
00:00:25.830 --> 00:00:28.260
same but the vibe changes and the
13
00:00:28.260 --> 00:00:29.880
sense of identity shifts
14
00:00:31.020 --> 00:00:33.000
worm is a place of invention and
15
00:00:33.000 --> 00:00:35.880
reinvention and with the constant motion certain
16
00:00:35.880 --> 00:00:39.120
stories and memories do fade away sometimes
17
00:00:39.120 --> 00:00:39.870
quite quickly
18
00:00:40.770 --> 00:00:42.563
the idea of this history of why
19
00:00:42.563 --> 00:00:44.790
i'm not the only possible history of
20
00:00:44.790 --> 00:00:47.160
course is to try and capture a
21
00:00:47.160 --> 00:00:48.750
few of those stories with the people
22
00:00:48.750 --> 00:00:50.748
that i've met and people that maria
23
00:00:50.748 --> 00:00:52.680
it has known along the way
24
00:00:53.790 --> 00:00:54.780
but i'm not going to begin at
25
00:00:54.780 --> 00:00:56.880
the beginning that would be logical but
26
00:00:56.880 --> 00:00:59.370
actually have already begun obviously and today's
27
00:00:59.370 --> 00:00:59.970
episode

@ -0,0 +1,132 @@
(function (global, factory) {
if (typeof define === 'function' && define.amd) {
define(['exports', 'module'], factory);
} else if (typeof exports !== 'undefined' && typeof module !== 'undefined') {
factory(exports, module);
} else {
var mod = {
exports: {}
};
factory(mod.exports, mod);
global.fetchJsonp = mod.exports;
}
})(this, function (exports, module) {
'use strict';
var defaultOptions = {
timeout: 5000,
jsonpCallback: 'callback',
jsonpCallbackFunction: null
};
function generateCallbackFunction() {
return 'jsonp_' + Date.now() + '_' + Math.ceil(Math.random() * 100000);
}
function clearFunction(functionName) {
// IE8 throws an exception when you try to delete a property on window
// http://stackoverflow.com/a/1824228/751089
try {
delete window[functionName];
} catch (e) {
window[functionName] = undefined;
}
}
function removeScript(scriptId) {
var script = document.getElementById(scriptId);
if (script) {
document.getElementsByTagName('head')[0].removeChild(script);
}
}
function fetchJsonp(_url) {
var options = arguments.length <= 1 || arguments[1] === undefined ? {} : arguments[1];
// to avoid param reassign
var url = _url;
var timeout = options.timeout || defaultOptions.timeout;
var jsonpCallback = options.jsonpCallback || defaultOptions.jsonpCallback;
var timeoutId = undefined;
return new Promise(function (resolve, reject) {
var callbackFunction = options.jsonpCallbackFunction || generateCallbackFunction();
var scriptId = jsonpCallback + '_' + callbackFunction;
window[callbackFunction] = function (response) {
resolve({
ok: true,
// keep consistent with fetch API
json: function json() {
return Promise.resolve(response);
}
});
if (timeoutId) clearTimeout(timeoutId);
removeScript(scriptId);
clearFunction(callbackFunction);
};
// Check if the user set their own params, and if not add a ? to start a list of params
url += url.indexOf('?') === -1 ? '?' : '&';
var jsonpScript = document.createElement('script');
jsonpScript.setAttribute('src', '' + url + jsonpCallback + '=' + callbackFunction);
if (options.charset) {
jsonpScript.setAttribute('charset', options.charset);
}
if (options.nonce) {
jsonpScript.setAttribute('nonce', options.nonce);
}
if (options.referrerPolicy) {
jsonpScript.setAttribute('referrerPolicy', options.referrerPolicy);
}
if (options.crossorigin) {
jsonpScript.setAttribute('crossorigin', 'true');
}
jsonpScript.id = scriptId;
document.getElementsByTagName('head')[0].appendChild(jsonpScript);
timeoutId = setTimeout(function () {
reject(new Error('JSONP request to ' + _url + ' timed out'));
clearFunction(callbackFunction);
removeScript(scriptId);
window[callbackFunction] = function () {
clearFunction(callbackFunction);
};
}, timeout);
// Caught if got 404/500
jsonpScript.onerror = function () {
reject(new Error('JSONP request to ' + _url + ' failed'));
clearFunction(callbackFunction);
removeScript(scriptId);
if (timeoutId) clearTimeout(timeoutId);
};
});
}
// export as global function
/*
let local;
if (typeof global !== 'undefined') {
local = global;
} else if (typeof self !== 'undefined') {
local = self;
} else {
try {
local = Function('return this')();
} catch (e) {
throw new Error('polyfill failed because global object is unavailable in this environment');
}
}
local.fetchJsonp = fetchJsonp;
*/
module.exports = fetchJsonp;
});

@ -0,0 +1,41 @@
<!DOCTYPE html>
<html>
<head>
</head>
<body>
<!DOCTYPE html>
<html>
<head>
<title>minimal archive.org search</title>
</head>
<body>
<p>This is a minimal form to send a search request to archive.org</p>
<form action="https://archive.org/advancedsearch.php">
<input size="50" type="text" name="q" value="subject:&quot;field recording&quot;">
<input type="submit" value="Search">
<!-- the rest are hidden -->
<input type="hidden" name="fl[]" value="date">
<input type="hidden" name="fl[]" value="description">
<input type="hidden" name="fl[]" value="format">
<input type="hidden" name="fl[]" value="identifier">
<input type="hidden" name="fl[]" value="licenseurl">
<input type="hidden" name="fl[]" value="mediatype">
<input type="hidden" name="fl[]" value="name">
<input type="hidden" name="fl[]" value="rights">
<input type="hidden" name="fl[]" value="subject">
<input type="hidden" name="fl[]" value="title">
<input type="hidden" name="fl[]" value="type">
<input type="hidden" name="sort[]" value="year desc">
<input type="hidden" name="sort[]" value="identifier asc">
<input id="numresults" type="hidden" name="rows" value="10">
<input type="hidden" name="page" value="1">
<input type="hidden" name="output" value="json">
<!-- <input type="hidden" name="output" value="tables">
<input type="hidden" name="callback" value="callback"> -->
</form>
<div id="results"></div>
<script src="fetch-jsonp.js"></script>
<script src="tagradio.js"></script>
</body>
</html>

@ -0,0 +1,56 @@
// https://github.com/camsong/fetch-jsonp
// https://raw.githubusercontent.com/camsong/fetch-jsonp/refs/heads/master/src/fetch-jsonp.js
let form = document.querySelector("form");
form.addEventListener("submit", event => {
// console.log("GO NO FURTHER FORM... I'll take it from here");
event.preventDefault();
let formdata = new FormData(form);
let usp = new URLSearchParams(formdata);
console.log("urlsearchparams", usp.toString());
let results_div = document.querySelector("#results");
fetchJsonp("https://archive.org/advancedsearch.php?"+usp.toString())
.then(resp => resp.json())
.then(data => {
console.log("data", data);
data.response.docs.forEach (doc => {
console.log(doc);
let div = document.createElement("div");
let a = document.createElement("a");
let item_url = `https://archive.org/details/${doc.identifier}`;
let item_metadata_url = `https://archive.org/metadata/${doc.identifier}`;
a.href = item_url;
a.textContent = doc.title;
div.appendChild(a);
results_div.appendChild(div);
fetchJsonp(item_metadata_url)
.then (resp => resp.json())
.then(item_data => {
console.log("*", item_data);
let file_url = `https://${item_data.server}${item_data.dir}/`;
let mp3_files = item_data.files.filter(f => f.format == "VBR MP3");
let first_file_url = file_url + encodeURI(mp3_files[0].name);
let audio = document.createElement("audio");
div.appendChild(audio);
audio.controls = true;
audio.src = first_file_url;
})
})
// here we have access to the response data...
})
// fetch("https://archive.org/advancedsearch.php", {
// method: "cors",
// body: formdata
// }).then(resp => resp.json())
// .then(data => {
// console.log("data", data);
// })
})
Loading…
Cancel
Save