You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

158 lines
5.0 KiB
Python

5 years ago
#!/usr/bin/env python3
import re, sys, re, json, math
"""
Usage:
ogginfo some.ogg | python3 ogginfo-to-srt.py > some.json
EXAMPLE OF OGGINFO OUTPUT
User comments section follows...
title=one
ENCODER=Liquidsoap/1.3.3 (Unix; OCaml 4.05.0)
Vorbis stream 1:
Total data length: 2882 bytes
Playback length: 0m:00.704s
Average bitrate: 32.731445 kb/s
"""
################################
# PASS 1: Read the structure
def parse_ogginfo_output(f):
state = ""
output = {}
output['streams'] = streams = []
time = 0.0
for line in f:
# for debugging
# print (line.rstrip())
if line.startswith("User comments section follows..."):
print ("COMMENTS", file=sys.stderr)
state = "comments"
stream = {}
stream['comments'] = comments = {}
streams.append(stream)
elif line.startswith("Vorbis stream"):
print ("STREAM", file=sys.stderr)
state = "stream"
if 'stream' in stream:
stream = {}
stream['stream'] = streaminfo = {}
streams.append(stream)
else:
stream['stream'] = streaminfo = {}
elif line.startswith("Logical stream"):
print ("EXIT STATE", file=sys.stderr)
state = ""
else:
if state == "comments":
name, value = line.strip().split("=", 1)
name = name.strip()
value = value.strip()
print ("comment", (name, value), file=sys.stderr)
comments[name] = value
elif state == "stream":
name, value = line.strip().split(":", 1)
name = name.strip()
value = value.strip()
print ("stream", (name, value), file=sys.stderr)
streaminfo[name] = value
return output
################################
# PASS 2: Add start time, duration, and timecodes
def parse_ogginfo_time (t):
m = re.search(r"(\d+)m\:(\d+\.\d+)s", t)
if m:
m, s = m.groups()
m = int(m)
s = float(s)
return (m*60)+s
def timecode_fromsecs(rawsecs, fract=True, alwaysfract=True, fractdelim='.', alwayshours=False):
# returns a string in HH:MM:SS[.xxx] notation
# if fract is True, uses .xxx if either necessary (non-zero)
# OR alwaysfract is True
hours = math.floor(rawsecs / 3600)
rawsecs -= hours * 3600
mins = math.floor(rawsecs / 60)
rawsecs -= mins * 60
if fract:
secs = math.floor(rawsecs)
rawsecs -= secs
if (rawsecs > 0 or alwaysfract):
fract = "%.03f" % rawsecs
if hours or alwayshours:
return "%02d:%02d:%02d%s%s" % (hours, mins, secs, fractdelim, \
fract[2:])
else:
return "%02d:%02d%s%s" % (mins, secs, fractdelim, fract[2:])
else:
if hours or alwayshours:
return "%02d:%02d:%02d" % (hours, mins, secs)
else:
return "%02d:%02d" % (mins, secs)
def add_timing (output):
t = 0.0
for d in output['streams']:
duration_seconds = parse_ogginfo_time(d['stream']['Playback length'])
d['start_time'] = t
d['start_time_timecode'] = timecode_fromsecs(t)
d['duration'] = duration_seconds
d['duration_timecode'] = timecode_fromsecs(duration_seconds)
t += duration_seconds
def nextiter (items):
prev_item = None
for x in items:
if (prev_item):
yield prev_item, x
prev_item = x
yield x, None
def print_srt(data, last_subtitle_duration=5.0, file=None):
for cur, nxt in nextiter(data['streams']):
if nxt:
print ("{} --> {}".format(cur['start_time_timecode'], nxt['start_time_timecode']), file=file)
print (json.dumps(cur['comments']), file=file)
print (file=file)
else:
end_timecode = timecode_fromsecs(cur['start_time'] + last_subtitle_duration)
print ("{} --> {}".format(cur['start_time_timecode'], end_timecode), file=file)
print (json.dumps(cur['comments']), file=file)
print (file=file)
if __name__ == "__main__":
import argparse
ap = argparse.ArgumentParser("")
ap.add_argument("--input", type=argparse.FileType("r"), default=sys.stdin)
ap.add_argument("--output", type=argparse.FileType("w"), default=sys.stdout)
ap.add_argument("--last-subtitle-duration", type=float, default=5.0)
ap.add_argument("--json", default=False, action="store_true")
ap.add_argument("--vtt", default=False, action="store_true")
args = ap.parse_args()
output = parse_ogginfo_output(args.input)
add_timing(output)
if args.json:
print (json.dumps(output, indent=2), file=args.output)
else:
if args.vtt:
print ("""WEBVTT
Kind: captions
Language: en
""", file=args.output)
print_srt(output, last_subtitle_duration=args.last_subtitle_duration, file=args.output)