reorganized readfrompad with functions

master
Michael Murtaugh 5 years ago
parent 2a23d871de
commit c3ea0e31c6

@ -3,6 +3,13 @@ from reportlab.pdfgen import canvas
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, PageBreak from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, PageBreak
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from calibrestekje import Book, Publisher, init_session from calibrestekje import Book, Publisher, init_session
from readfrompad import curl, parse_pad
from xml.etree import ElementTree as ET
paragraphs_by_header = parse_pad(curl("https://pad.xpub.nl/p/bootleg_annotations/export/txt"))
from pprint import pprint
pprint(paragraphs_by_header)
pagewidth, pageheight = landscape(A6) pagewidth, pageheight = landscape(A6)
@ -16,9 +23,10 @@ styles = getSampleStyleSheet()
session = init_session("sqlite:///metadata.db") session = init_session("sqlite:///metadata.db")
for book in session.query(Book).all(): for book in session.query(Book).all():
book_url = "https://hub.xpub.nl/bootleglibrary/book/{}".format(book.id)
print (book.title) print (book.title)
print (book.authors) print (book_url)
# print (book.authors)
# c.drawString(10,pageheight-10, book.title) # c.drawString(10,pageheight-10, book.title)
# c.showPage() # c.showPage()
@ -41,7 +49,7 @@ for book in session.query(Book).all():
author_text = "" author_text = ""
for author in book.authors: for author in book.authors:
if not first: if not first:
text += ", " author_text += ", "
author_text += "<font size=12>{}</font>".format(author.name) author_text += "<font size=12>{}</font>".format(author.name)
first = False first = False
@ -53,4 +61,23 @@ for book in session.query(Book).all():
content.append(PageBreak()) content.append(PageBreak())
content.append(Spacer(1, 12)) content.append(Spacer(1, 12))
# BACK SIDE
if book_url in paragraphs_by_header:
print ("FOUND ANNOTATIONS FOR BOOK", book_url)
# ANNOTATIONS FROM PAD
annotations = paragraphs_by_header[book_url]
for p in annotations:
p_text = ET.tostring(p, method="html", encoding="utf-8")
p = Paragraph(p_text, styles["Normal"])
content.append(p)
content.append(PageBreak())
content.append(Spacer(1, 12))
else:
# BLANK BACK SIDE
p = Paragraph("", styles["Normal"])
content.append(p)
content.append(PageBreak())
content.append(Spacer(1, 12))
doc.build(content) doc.build(content)

@ -3,6 +3,7 @@ import markdown, html5lib
def parse_pad (pad_text): def parse_pad (pad_text):
print ("parse", pad_text)
html = markdown.markdown(pad_text) html = markdown.markdown(pad_text)
t = html5lib.parseFragment(html, namespaceHTMLElements=False) t = html5lib.parseFragment(html, namespaceHTMLElements=False)
# create a "database" of paragraphs associated with each URL given in an H1 # create a "database" of paragraphs associated with each URL given in an H1
@ -20,8 +21,7 @@ def parse_pad (pad_text):
return paragraphs_by_header return paragraphs_by_header
def curl (url): def curl (url):
f = urlopen(pad_text_url) return urlopen(url).read().decode('utf-8')
return f.read().decode('utf-8')
if __name__== "__main__": if __name__== "__main__":

Loading…
Cancel
Save