diff --git a/readfrompad.py b/readfrompad.py index 8158413..f4e803e 100644 --- a/readfrompad.py +++ b/readfrompad.py @@ -1,11 +1,14 @@ from urllib.request import urlopen +import markdown, html5lib pad_url = "https://pad.xpub.nl/p/boring_old_tomato_sandwiches" pad_text_url = pad_url + "/export/txt" - f = urlopen(pad_text_url) pad_text = f.read().decode('utf-8') - -print (pad_text) - +# print (pad_text) +html = markdown.markdown(pad_text) +t = html5lib.parseFragment(html, namespaceHTMLElements=False) +print (t) +for elt in t: + print (elt)