reorganized readfrompad with functions

5 years ago · c3ea0e31c6
parent 2a23d871de
commit c3ea0e31c6
2 changed files with 32 additions and 5 deletions
--- a/lesssimplelayout.py
+++ b/lesssimplelayout.py
@ -3,6 +3,13 @@ from reportlab.pdfgen import canvas
 from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, PageBreak
 from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
 from calibrestekje import Book, Publisher, init_session
+from readfrompad import curl, parse_pad
+from xml.etree import ElementTree as ET 
+
+
+paragraphs_by_header = parse_pad(curl("https://pad.xpub.nl/p/bootleg_annotations/export/txt"))
+from pprint import pprint
+pprint(paragraphs_by_header)

 pagewidth, pageheight = landscape(A6)

@ -16,9 +23,10 @@ styles = getSampleStyleSheet()
 session = init_session("sqlite:///metadata.db")

 for book in session.query(Book).all():
-
+    book_url = "https://hub.xpub.nl/bootleglibrary/book/{}".format(book.id)
    print (book.title)
-    print (book.authors)
+    print (book_url)
+    # print (book.authors)
    
    # c.drawString(10,pageheight-10, book.title)
    # c.showPage()
@ -41,7 +49,7 @@ for book in session.query(Book).all():
    author_text = ""
    for author in book.authors:
        if not first:
-            text += ", "
+            author_text += ", "
        author_text += "<font size=12>{}</font>".format(author.name)
        first = False

@ -53,4 +61,23 @@ for book in session.query(Book).all():
    content.append(PageBreak())
    content.append(Spacer(1, 12))

+    # BACK SIDE
+    if book_url in paragraphs_by_header:
+        print ("FOUND ANNOTATIONS FOR BOOK", book_url)
+        # ANNOTATIONS FROM PAD
+        annotations = paragraphs_by_header[book_url]
+        for p in annotations:
+            p_text = ET.tostring(p, method="html", encoding="utf-8")
+            p = Paragraph(p_text, styles["Normal"])
+            content.append(p)
+            content.append(PageBreak())
+            content.append(Spacer(1, 12))
+    else:
+        # BLANK BACK SIDE
+        p = Paragraph("", styles["Normal"])
+        content.append(p)
+        content.append(PageBreak())
+        content.append(Spacer(1, 12))
+
+
 doc.build(content)
--- a/readfrompad.py
+++ b/readfrompad.py
@ -3,6 +3,7 @@ import markdown, html5lib


 def parse_pad (pad_text):
+    print ("parse", pad_text)
    html = markdown.markdown(pad_text)
    t = html5lib.parseFragment(html, namespaceHTMLElements=False)
    # create a "database" of paragraphs associated with each URL given in an H1
@ -20,8 +21,7 @@ def parse_pad (pad_text):
    return paragraphs_by_header

 def curl (url):
-    f = urlopen(pad_text_url)
-    return f.read().decode('utf-8')
+    return urlopen(url).read().decode('utf-8')


 if __name__== "__main__":