You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

15 lines
514 B
Python

import csv, sys
outf = csv.writer(sys.stdout)
last_row = None
outf.writerow(["url", "start", "end", "text"])
for row in csv.DictReader(sys.stdin):
if last_row is not None:
if last_row['url'] == row['url']:
outf.writerow([last_row['url'], last_row['start'], row['start'], last_row['text']])
else:
outf.writerow([last_row['url'], last_row['start'], None, last_row['text']])
last_row = row
outf.writerow([last_row['url'], last_row['start'], None, last_row['text']])