#!/usr/bin/env python3 import re, sys, re, json, math """ Usage: ogginfo some.ogg | python3 ogginfo-to-srt.py > some.json EXAMPLE OF OGGINFO OUTPUT User comments section follows... title=one ENCODER=Liquidsoap/1.3.3 (Unix; OCaml 4.05.0) Vorbis stream 1: Total data length: 2882 bytes Playback length: 0m:00.704s Average bitrate: 32.731445 kb/s """ ################################ # PASS 1: Read the structure def parse_ogginfo_output(f): state = "" output = {} output['streams'] = streams = [] time = 0.0 for line in f: # for debugging # print (line.rstrip()) if line.startswith("User comments section follows..."): print ("COMMENTS", file=sys.stderr) state = "comments" stream = {} stream['comments'] = comments = {} streams.append(stream) elif line.startswith("Vorbis stream"): print ("STREAM", file=sys.stderr) state = "stream" if 'stream' in stream: stream = {} stream['stream'] = streaminfo = {} streams.append(stream) else: stream['stream'] = streaminfo = {} elif line.startswith("Logical stream"): print ("EXIT STATE", file=sys.stderr) state = "" else: if state == "comments": name, value = line.strip().split("=", 1) name = name.strip() value = value.strip() print ("comment", (name, value), file=sys.stderr) comments[name] = value elif state == "stream": name, value = line.strip().split(":", 1) name = name.strip() value = value.strip() print ("stream", (name, value), file=sys.stderr) streaminfo[name] = value return output ################################ # PASS 2: Add start time, duration, and timecodes def parse_ogginfo_time (t): m = re.search(r"(\d+)m\:(\d+\.\d+)s", t) if m: m, s = m.groups() m = int(m) s = float(s) return (m*60)+s def timecode_fromsecs(rawsecs, fract=True, alwaysfract=True, fractdelim='.', alwayshours=False): # returns a string in HH:MM:SS[.xxx] notation # if fract is True, uses .xxx if either necessary (non-zero) # OR alwaysfract is True hours = math.floor(rawsecs / 3600) rawsecs -= hours * 3600 mins = math.floor(rawsecs / 60) rawsecs -= mins * 60 if fract: secs = math.floor(rawsecs) rawsecs -= secs if (rawsecs > 0 or alwaysfract): fract = "%.03f" % rawsecs if hours or alwayshours: return "%02d:%02d:%02d%s%s" % (hours, mins, secs, fractdelim, \ fract[2:]) else: return "%02d:%02d%s%s" % (mins, secs, fractdelim, fract[2:]) else: if hours or alwayshours: return "%02d:%02d:%02d" % (hours, mins, secs) else: return "%02d:%02d" % (mins, secs) def add_timing (output): t = 0.0 for d in output['streams']: duration_seconds = parse_ogginfo_time(d['stream']['Playback length']) d['start_time'] = t d['start_time_timecode'] = timecode_fromsecs(t) d['duration'] = duration_seconds d['duration_timecode'] = timecode_fromsecs(duration_seconds) t += duration_seconds def nextiter (items): prev_item = None for x in items: if (prev_item): yield prev_item, x prev_item = x yield x, None def print_srt(data, last_subtitle_duration=5.0, file=None): for cur, nxt in nextiter(data['streams']): if nxt: print ("{} --> {}".format(cur['start_time_timecode'], nxt['start_time_timecode']), file=file) print (json.dumps(cur['comments']), file=file) print (file=file) else: end_timecode = timecode_fromsecs(cur['start_time'] + last_subtitle_duration) print ("{} --> {}".format(cur['start_time_timecode'], end_timecode), file=file) print (json.dumps(cur['comments']), file=file) print (file=file) if __name__ == "__main__": import argparse ap = argparse.ArgumentParser("") ap.add_argument("--input", type=argparse.FileType("r"), default=sys.stdin) ap.add_argument("--output", type=argparse.FileType("w"), default=sys.stdout) ap.add_argument("--last-subtitle-duration", type=float, default=5.0) ap.add_argument("--json", default=False, action="store_true") ap.add_argument("--vtt", default=False, action="store_true") args = ap.parse_args() output = parse_ogginfo_output(args.input) add_timing(output) if args.json: print (json.dumps(output, indent=2), file=args.output) else: if args.vtt: print ("""WEBVTT Kind: captions Language: en """, file=args.output) print_srt(output, last_subtitle_duration=args.last_subtitle_duration, file=args.output)