use markdown + html5lib to structure the text

master
Michael Murtaugh 5 years ago
parent 5806ed6e90
commit 0ce9ba7932

@ -2,12 +2,24 @@ from urllib.request import urlopen
import markdown, html5lib import markdown, html5lib
sample_text = """
# https://hub.xpub.nl/bootleglibrary/book/374
This is an annotation of Mrs. Gersande's Binding Index
"""
pad_url = "https://pad.xpub.nl/p/boring_old_tomato_sandwiches" pad_url = "https://pad.xpub.nl/p/boring_old_tomato_sandwiches"
pad_text_url = pad_url + "/export/txt" pad_text_url = pad_url + "/export/txt"
f = urlopen(pad_text_url) f = urlopen(pad_text_url)
pad_text = f.read().decode('utf-8') pad_text = f.read().decode('utf-8')
pad_text = sample_text
# print (pad_text) # print (pad_text)
# Turn pad text into html text
html = markdown.markdown(pad_text) html = markdown.markdown(pad_text)
print (html)
# Turn html text in an elementtree
t = html5lib.parseFragment(html, namespaceHTMLElements=False) t = html5lib.parseFragment(html, namespaceHTMLElements=False)
print (t) print (t)
for elt in t: for elt in t:

Loading…
Cancel
Save