From 0ce9ba7932a2242d5850a01483a008a8eb9ee51a Mon Sep 17 00:00:00 2001 From: Michael Murtaugh Date: Tue, 14 Apr 2020 12:56:40 +0200 Subject: [PATCH] use markdown + html5lib to structure the text --- readfrompad.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/readfrompad.py b/readfrompad.py index f4e803e..3a61b66 100644 --- a/readfrompad.py +++ b/readfrompad.py @@ -2,12 +2,24 @@ from urllib.request import urlopen import markdown, html5lib +sample_text = """ +# https://hub.xpub.nl/bootleglibrary/book/374 +This is an annotation of Mrs. Gersande's Binding Index +""" + pad_url = "https://pad.xpub.nl/p/boring_old_tomato_sandwiches" pad_text_url = pad_url + "/export/txt" f = urlopen(pad_text_url) + pad_text = f.read().decode('utf-8') +pad_text = sample_text + # print (pad_text) +# Turn pad text into html text html = markdown.markdown(pad_text) +print (html) + +# Turn html text in an elementtree t = html5lib.parseFragment(html, namespaceHTMLElements=False) print (t) for elt in t: