SI25/mixcloud/scripts/voskjson2vtt.py

#!/usr/bin/env python
import json
import argparse
import sys 

# requires: timecode.py
# some functions to help working with (srt/vtt) timecodes
from timecode import timecode_fromsecs

ap = argparse.ArgumentParser("convert VOSK json output into a special vtt with timed json per word")
ap.add_argument('infile', nargs='?', type=argparse.FileType('r'), default=sys.stdin)
ap.add_argument('outfile', nargs='?', type=argparse.FileType('w'), default=sys.stdout)
args = ap.parse_args()


data = json.load(args.infile)

def tc(s):
    return timecode_fromsecs(s, alwaysfract=True, fractdelim=".")

print ("WEBVTT", file=args.outfile)
print (file=args.outfile)
for m in data['monologues']:
    for term in m['terms']:
        if term['type'] == "WORD":
            print (f"{tc(term['start'])} --> {tc(term['end'])}", file=args.outfile)
            # print (f"{term['text']}")
            tterm = {'text': term['text'], 'confidence': term['confidence']}
            print (json.dumps(tterm), file=args.outfile)
            print (file=args.outfile)
words 1 month ago			`#!/usr/bin/env python`
			`import json`
			`import argparse`
			`import sys`

			`# requires: timecode.py`
			`# some functions to help working with (srt/vtt) timecodes`
			`from timecode import timecode_fromsecs`

			`ap = argparse.ArgumentParser("convert VOSK json output into a special vtt with timed json per word")`
			`ap.add_argument('infile', nargs='?', type=argparse.FileType('r'), default=sys.stdin)`
			`ap.add_argument('outfile', nargs='?', type=argparse.FileType('w'), default=sys.stdout)`
			`args = ap.parse_args()`


			`data = json.load(args.infile)`

			`def tc(s):`
			`return timecode_fromsecs(s, alwaysfract=True, fractdelim=".")`

			`print ("WEBVTT", file=args.outfile)`
			`print (file=args.outfile)`
			`for m in data['monologues']:`
			`for term in m['terms']:`
			`if term['type'] == "WORD":`
			`print (f"{tc(term['start'])} --> {tc(term['end'])}", file=args.outfile)`
			`# print (f"{term['text']}")`
			`tterm = {'text': term['text'], 'confidence': term['confidence']}`
			`print (json.dumps(tterm), file=args.outfile)`
			`print (file=args.outfile)`