# yt-dlp installed with pip # nb: you may need to update yt-dlp frequently to keep up with changes made by commercial services # to prevent downloading tools like yt-dlp to stay working! # # pip install --upgrade yt-dlp # # Download the playlist, writing info in json yt-dlp "https://www.mixcloud.com/radiowormrotterdam/playlists/worm-25/" --write-info-json # let's move the files to a sub-folder mkdir worm25 mv *.info.json *.webm worm25 # # _.........._ # | |xpub | | # | | | | # | | | | # | |________| | # | ______ | # | | | | | # |__|____|_|__| # # IN the early days of computer history # on computers using DOS (or Disk Operating System) # file names needed to conform to a very strict standard # of 8 letters, with only A-Z , dash -, and underscore _ , and numbers 0-9 # (and no spaces!) # plus a 3 letter extension indicating the type of file # for example README.TXT # # Copy one file in this folder and give it a short name # cp worm25/WORM\ 25:\ A\ history\ of\ WORM\ in\ 25\ Objects\ \#1:\ Mia\ on\ the\ hand-towel\ dispenser\ \(04.06.24\)\ \[radiowormrotterdam_worm-25-a-history-of-worm-in-25-objects-1-mia-o n-the-hand-towel-dispenser-040624\].webm w25mia.webm # __ # .,-;-;-,. /'_\ # _/_/_/_|_\_\) / # '-<_><_><_><_>=/\ # `/_/====/_/-'\_\ # "" "" "" # NOTE: # it may seem pointless to move files around and rename them with short names # BUT... # coding/working with the commandline requires A LOT OF FOCUS # so steps to reduce "cognitive load" like sifting through long lists of confusing filenames # that make working on the commandline slower and *harder to read* # are really worthwhile! # # Also when working with digital materials, it's often tempting to try to # address an entire collection (in this case all the recordings, and the whole hour of each recording) # BUT ... # it's really important when testing things out that you focus on a small sample # in this way you make experimentation, including the *necessary errors # and missteps*, as fluid as possible to stay in the flow, # so that you can get through the bugs # to the interesting results that will give you the energy # and confidence to keep going! # # so in this case.... # # get something working for a 60 second sample # THEN once you know it works... # apply it to the whole hour long recording # and eventually all the recordsings of the playlist # Working with a long file also can take a lot of time when experimenting # use ffmpeg to make a 30 second extract # -ss is start time, -t duration (duration of final extract) # at the same time, convert the wav to mp3 (for the browser) ffmpeg -i w25mia.webm -ss 120 -t 60 w25mia60.wav # use vosk to transribe, to srt # nb: worm25_mia_60wav is an INPUT and needs to already exist # worm25_mia_60.srt is an OUTPUT and will be (re)created vosk-transcriber -l en-us -i w25mia60.wav -t srt -o w25mia60.srt # SRT (SubRip Subtitle) comes from # practices of PIRATE CURATION # where films would be ripped from DVDs # and distributed as video files # SRT is then a simple (and small) text format for # the missing subtitles, that can then # be translated into many languages # and distributed separately from the (heavier) video files # Also due to the editability and understandability # the format was so popular, that it became the # basis of the W3C's web standard (web) VTT # # see also: https://www.opensubtitles.com/ # # The two are nearly the same except # the file needs to have a "header" (the first line should be:) # WEBVTT # And the timecodes use a dot instead of a comma.. # SO # 00:00:00,075 --> 00:00:02,610 # in SRT becomes in VTT: # 00:00:00.075 --> 00:00:02.610 # # let's use a python script to convert from srt to vtt scripts/srt2vtt.py w25mia60.srt w25mia60.vtt # make an mp3 for the browser ffmpeg -i w25mia60.wav w25mia60.mp3 # adjust vtt.html to point to the mp3 + vtt # LOOK AT vtt.html # NB: to see captions, we need to use