SI25/mixcloud/scripts/voskjson2vtt.py

#!/usr/bin/env python
import json
import argparse
import sys

# requires: timecode.py
# some functions to help working with (srt/vtt) timecodes
from timecode import timecode_fromsecs

ap = argparse.ArgumentParser("convert VOSK json output into a special vtt with timed json per word")
ap.add_argument('infile', nargs='?', type=argparse.FileType('r'), default=sys.stdin)
ap.add_argument('outfile', nargs='?', type=argparse.FileType('w'), default=sys.stdout)
args = ap.parse_args()


data = json.load(args.infile)

def tc(s):
    return timecode_fromsecs(s, alwaysfract=True, fractdelim=".")

print ("WEBVTT", file=args.outfile)
print (file=args.outfile)
for m in data['monologues']:
    for term in m['terms']:
        if term['type'] == "WORD":
            print (f"{tc(term['start'])} --> {tc(term['end'])}", file=args.outfile)
            # print (f"{term['text']}")
            tterm = {'text': term['text'], 'confidence': term['confidence']}
            print (json.dumps(tterm), file=args.outfile)
            print (file=args.outfile)