From c3ea0e31c639002729d06898d5cd75aa83d6d738 Mon Sep 17 00:00:00 2001 From: Michael Murtaugh Date: Tue, 14 Apr 2020 13:29:02 +0200 Subject: [PATCH] reorganized readfrompad with functions --- lesssimplelayout.py | 33 ++++++++++++++++++++++++++++++--- readfrompad.py | 4 ++-- 2 files changed, 32 insertions(+), 5 deletions(-) diff --git a/lesssimplelayout.py b/lesssimplelayout.py index ec885b2..d8898bc 100644 --- a/lesssimplelayout.py +++ b/lesssimplelayout.py @@ -3,6 +3,13 @@ from reportlab.pdfgen import canvas from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, PageBreak from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle from calibrestekje import Book, Publisher, init_session +from readfrompad import curl, parse_pad +from xml.etree import ElementTree as ET + + +paragraphs_by_header = parse_pad(curl("https://pad.xpub.nl/p/bootleg_annotations/export/txt")) +from pprint import pprint +pprint(paragraphs_by_header) pagewidth, pageheight = landscape(A6) @@ -16,9 +23,10 @@ styles = getSampleStyleSheet() session = init_session("sqlite:///metadata.db") for book in session.query(Book).all(): - + book_url = "https://hub.xpub.nl/bootleglibrary/book/{}".format(book.id) print (book.title) - print (book.authors) + print (book_url) + # print (book.authors) # c.drawString(10,pageheight-10, book.title) # c.showPage() @@ -41,7 +49,7 @@ for book in session.query(Book).all(): author_text = "" for author in book.authors: if not first: - text += ", " + author_text += ", " author_text += "{}".format(author.name) first = False @@ -53,4 +61,23 @@ for book in session.query(Book).all(): content.append(PageBreak()) content.append(Spacer(1, 12)) + # BACK SIDE + if book_url in paragraphs_by_header: + print ("FOUND ANNOTATIONS FOR BOOK", book_url) + # ANNOTATIONS FROM PAD + annotations = paragraphs_by_header[book_url] + for p in annotations: + p_text = ET.tostring(p, method="html", encoding="utf-8") + p = Paragraph(p_text, styles["Normal"]) + content.append(p) + content.append(PageBreak()) + content.append(Spacer(1, 12)) + else: + # BLANK BACK SIDE + p = Paragraph("", styles["Normal"]) + content.append(p) + content.append(PageBreak()) + content.append(Spacer(1, 12)) + + doc.build(content) \ No newline at end of file diff --git a/readfrompad.py b/readfrompad.py index 5e59807..d02dbce 100644 --- a/readfrompad.py +++ b/readfrompad.py @@ -3,6 +3,7 @@ import markdown, html5lib def parse_pad (pad_text): + print ("parse", pad_text) html = markdown.markdown(pad_text) t = html5lib.parseFragment(html, namespaceHTMLElements=False) # create a "database" of paragraphs associated with each URL given in an H1 @@ -20,8 +21,7 @@ def parse_pad (pad_text): return paragraphs_by_header def curl (url): - f = urlopen(pad_text_url) - return f.read().decode('utf-8') + return urlopen(url).read().decode('utf-8') if __name__== "__main__":