#!/bin/bash
#
# vosk can also output JSON that includes the timing of each individual
# detected WORD!
# NOTE: I had an error when I did this and needed to PATCH some PYTHON code in VOSK
# see VOSKPATCH.TXT
vosk-transcriber -l en-us -i w25mia60.wav -t json -o w25mia60.json

# use a python script to translate the VOSK json output
# into a VTT with JSON objects for each caption
python3 scripts/voskjson2vtt.py w25mia60.json w25mia60_words.vtt

# Now check out vtt_words.html