use markdown + html5lib to structure the text
parent
52dd2f380c
commit
5806ed6e90
@ -1,11 +1,14 @@
|
|||||||
from urllib.request import urlopen
|
from urllib.request import urlopen
|
||||||
|
import markdown, html5lib
|
||||||
|
|
||||||
|
|
||||||
pad_url = "https://pad.xpub.nl/p/boring_old_tomato_sandwiches"
|
pad_url = "https://pad.xpub.nl/p/boring_old_tomato_sandwiches"
|
||||||
pad_text_url = pad_url + "/export/txt"
|
pad_text_url = pad_url + "/export/txt"
|
||||||
|
|
||||||
f = urlopen(pad_text_url)
|
f = urlopen(pad_text_url)
|
||||||
pad_text = f.read().decode('utf-8')
|
pad_text = f.read().decode('utf-8')
|
||||||
|
# print (pad_text)
|
||||||
print (pad_text)
|
html = markdown.markdown(pad_text)
|
||||||
|
t = html5lib.parseFragment(html, namespaceHTMLElements=False)
|
||||||
|
print (t)
|
||||||
|
for elt in t:
|
||||||
|
print (elt)
|
||||||
|
Loading…
Reference in New Issue