You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
158 lines
5.0 KiB
Python
158 lines
5.0 KiB
Python
#!/usr/bin/env python3
|
|
import re, sys, re, json, math
|
|
|
|
"""
|
|
Usage:
|
|
|
|
ogginfo some.ogg | python3 ogginfo-to-srt.py > some.json
|
|
|
|
|
|
EXAMPLE OF OGGINFO OUTPUT
|
|
|
|
User comments section follows...
|
|
title=one
|
|
ENCODER=Liquidsoap/1.3.3 (Unix; OCaml 4.05.0)
|
|
Vorbis stream 1:
|
|
Total data length: 2882 bytes
|
|
Playback length: 0m:00.704s
|
|
Average bitrate: 32.731445 kb/s
|
|
"""
|
|
|
|
|
|
################################
|
|
# PASS 1: Read the structure
|
|
|
|
def parse_ogginfo_output(f):
|
|
state = ""
|
|
output = {}
|
|
output['streams'] = streams = []
|
|
time = 0.0
|
|
|
|
for line in f:
|
|
# for debugging
|
|
# print (line.rstrip())
|
|
|
|
if line.startswith("User comments section follows..."):
|
|
print ("COMMENTS", file=sys.stderr)
|
|
state = "comments"
|
|
stream = {}
|
|
stream['comments'] = comments = {}
|
|
streams.append(stream)
|
|
elif line.startswith("Vorbis stream"):
|
|
print ("STREAM", file=sys.stderr)
|
|
state = "stream"
|
|
if 'stream' in stream:
|
|
stream = {}
|
|
stream['stream'] = streaminfo = {}
|
|
streams.append(stream)
|
|
else:
|
|
stream['stream'] = streaminfo = {}
|
|
|
|
elif line.startswith("Logical stream"):
|
|
print ("EXIT STATE", file=sys.stderr)
|
|
state = ""
|
|
else:
|
|
if state == "comments":
|
|
name, value = line.strip().split("=", 1)
|
|
name = name.strip()
|
|
value = value.strip()
|
|
print ("comment", (name, value), file=sys.stderr)
|
|
comments[name] = value
|
|
elif state == "stream":
|
|
name, value = line.strip().split(":", 1)
|
|
name = name.strip()
|
|
value = value.strip()
|
|
print ("stream", (name, value), file=sys.stderr)
|
|
streaminfo[name] = value
|
|
|
|
return output
|
|
|
|
################################
|
|
# PASS 2: Add start time, duration, and timecodes
|
|
|
|
def parse_ogginfo_time (t):
|
|
m = re.search(r"(\d+)m\:(\d+\.\d+)s", t)
|
|
if m:
|
|
m, s = m.groups()
|
|
m = int(m)
|
|
s = float(s)
|
|
return (m*60)+s
|
|
|
|
def timecode_fromsecs(rawsecs, fract=True, alwaysfract=True, fractdelim='.', alwayshours=False):
|
|
# returns a string in HH:MM:SS[.xxx] notation
|
|
# if fract is True, uses .xxx if either necessary (non-zero)
|
|
# OR alwaysfract is True
|
|
hours = math.floor(rawsecs / 3600)
|
|
rawsecs -= hours * 3600
|
|
mins = math.floor(rawsecs / 60)
|
|
rawsecs -= mins * 60
|
|
if fract:
|
|
secs = math.floor(rawsecs)
|
|
rawsecs -= secs
|
|
if (rawsecs > 0 or alwaysfract):
|
|
fract = "%.03f" % rawsecs
|
|
if hours or alwayshours:
|
|
return "%02d:%02d:%02d%s%s" % (hours, mins, secs, fractdelim, \
|
|
fract[2:])
|
|
else:
|
|
return "%02d:%02d%s%s" % (mins, secs, fractdelim, fract[2:])
|
|
else:
|
|
if hours or alwayshours:
|
|
return "%02d:%02d:%02d" % (hours, mins, secs)
|
|
else:
|
|
return "%02d:%02d" % (mins, secs)
|
|
|
|
def add_timing (output):
|
|
t = 0.0
|
|
for d in output['streams']:
|
|
duration_seconds = parse_ogginfo_time(d['stream']['Playback length'])
|
|
d['start_time'] = t
|
|
d['start_time_timecode'] = timecode_fromsecs(t)
|
|
d['duration'] = duration_seconds
|
|
d['duration_timecode'] = timecode_fromsecs(duration_seconds)
|
|
t += duration_seconds
|
|
|
|
def nextiter (items):
|
|
prev_item = None
|
|
for x in items:
|
|
if (prev_item):
|
|
yield prev_item, x
|
|
prev_item = x
|
|
yield x, None
|
|
|
|
def print_srt(data, last_subtitle_duration=5.0, file=None):
|
|
for cur, nxt in nextiter(data['streams']):
|
|
if nxt:
|
|
print ("{} --> {}".format(cur['start_time_timecode'], nxt['start_time_timecode']), file=file)
|
|
print (json.dumps(cur['comments']), file=file)
|
|
print (file=file)
|
|
else:
|
|
end_timecode = timecode_fromsecs(cur['start_time'] + last_subtitle_duration)
|
|
print ("{} --> {}".format(cur['start_time_timecode'], end_timecode), file=file)
|
|
print (json.dumps(cur['comments']), file=file)
|
|
print (file=file)
|
|
|
|
if __name__ == "__main__":
|
|
import argparse
|
|
|
|
ap = argparse.ArgumentParser("")
|
|
ap.add_argument("--input", type=argparse.FileType("r"), default=sys.stdin)
|
|
ap.add_argument("--output", type=argparse.FileType("w"), default=sys.stdout)
|
|
ap.add_argument("--last-subtitle-duration", type=float, default=5.0)
|
|
ap.add_argument("--json", default=False, action="store_true")
|
|
ap.add_argument("--vtt", default=False, action="store_true")
|
|
args = ap.parse_args()
|
|
|
|
output = parse_ogginfo_output(args.input)
|
|
add_timing(output)
|
|
if args.json:
|
|
print (json.dumps(output, indent=2), file=args.output)
|
|
else:
|
|
if args.vtt:
|
|
print ("""WEBVTT
|
|
Kind: captions
|
|
Language: en
|
|
""", file=args.output)
|
|
print_srt(output, last_subtitle_duration=args.last_subtitle_duration, file=args.output)
|
|
|