reorganized readfrompad with functions

master
Michael Murtaugh 5 years ago
parent 2a23d871de
commit c3ea0e31c6

@ -3,6 +3,13 @@ from reportlab.pdfgen import canvas
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, PageBreak
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from calibrestekje import Book, Publisher, init_session
from readfrompad import curl, parse_pad
from xml.etree import ElementTree as ET
paragraphs_by_header = parse_pad(curl("https://pad.xpub.nl/p/bootleg_annotations/export/txt"))
from pprint import pprint
pprint(paragraphs_by_header)
pagewidth, pageheight = landscape(A6)
@ -16,9 +23,10 @@ styles = getSampleStyleSheet()
session = init_session("sqlite:///metadata.db")
for book in session.query(Book).all():
book_url = "https://hub.xpub.nl/bootleglibrary/book/{}".format(book.id)
print (book.title)
print (book.authors)
print (book_url)
# print (book.authors)
# c.drawString(10,pageheight-10, book.title)
# c.showPage()
@ -41,7 +49,7 @@ for book in session.query(Book).all():
author_text = ""
for author in book.authors:
if not first:
text += ", "
author_text += ", "
author_text += "<font size=12>{}</font>".format(author.name)
first = False
@ -53,4 +61,23 @@ for book in session.query(Book).all():
content.append(PageBreak())
content.append(Spacer(1, 12))
# BACK SIDE
if book_url in paragraphs_by_header:
print ("FOUND ANNOTATIONS FOR BOOK", book_url)
# ANNOTATIONS FROM PAD
annotations = paragraphs_by_header[book_url]
for p in annotations:
p_text = ET.tostring(p, method="html", encoding="utf-8")
p = Paragraph(p_text, styles["Normal"])
content.append(p)
content.append(PageBreak())
content.append(Spacer(1, 12))
else:
# BLANK BACK SIDE
p = Paragraph("", styles["Normal"])
content.append(p)
content.append(PageBreak())
content.append(Spacer(1, 12))
doc.build(content)

@ -3,6 +3,7 @@ import markdown, html5lib
def parse_pad (pad_text):
print ("parse", pad_text)
html = markdown.markdown(pad_text)
t = html5lib.parseFragment(html, namespaceHTMLElements=False)
# create a "database" of paragraphs associated with each URL given in an H1
@ -20,8 +21,7 @@ def parse_pad (pad_text):
return paragraphs_by_header
def curl (url):
f = urlopen(pad_text_url)
return f.read().decode('utf-8')
return urlopen(url).read().decode('utf-8')
if __name__== "__main__":

Loading…
Cancel
Save