#!/usr/bin/env python import json import argparse import sys # requires: timecode.py # some functions to help working with (srt/vtt) timecodes from timecode import timecode_fromsecs ap = argparse.ArgumentParser("convert VOSK json output into a special vtt with timed json per word") ap.add_argument('infile', nargs='?', type=argparse.FileType('r'), default=sys.stdin) ap.add_argument('outfile', nargs='?', type=argparse.FileType('w'), default=sys.stdout) args = ap.parse_args() data = json.load(args.infile) def tc(s): return timecode_fromsecs(s, alwaysfract=True, fractdelim=".") print ("WEBVTT", file=args.outfile) print (file=args.outfile) for m in data['monologues']: for term in m['terms']: if term['type'] == "WORD": print (f"{tc(term['start'])} --> {tc(term['end'])}", file=args.outfile) # print (f"{term['text']}") tterm = {'text': term['text'], 'confidence': term['confidence']} print (json.dumps(tterm), file=args.outfile) print (file=args.outfile)