reorganized readfrompad with functions

5 years ago · c3ea0e31c6
parent 2a23d871de
commit c3ea0e31c6
2 changed files with 32 additions and 5 deletions
--- a/lesssimplelayout.py
+++ b/lesssimplelayout.py
@ -3,6 +3,13 @@ from reportlab.pdfgen import canvas
 from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, PageBreak
 from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
 from calibrestekje import Book, Publisher, init_session
 from readfrompad import curl, parse_pad
 from xml.etree import ElementTree as ET 
 paragraphs_by_header = parse_pad(curl("https://pad.xpub.nl/p/bootleg_annotations/export/txt"))
 from pprint import pprint
 pprint(paragraphs_by_header)
 pagewidth, pageheight = landscape(A6)
@ -16,9 +23,10 @@ styles = getSampleStyleSheet()
 session = init_session("sqlite:///metadata.db")
 for book in session.query(Book).all():
-
+    book_url = "https://hub.xpub.nl/bootleglibrary/book/{}".format(book.id)
    print (book.title)
-    print (book.authors)
+    print (book_url)
    # print (book.authors)
    # c.drawString(10,pageheight-10, book.title)
    # c.showPage()
@ -41,7 +49,7 @@ for book in session.query(Book).all():
    author_text = ""
    for author in book.authors:
        if not first:
-            text += ", "
+            author_text += ", "
        author_text += "<font size=12>{}</font>".format(author.name)
        first = False
@ -53,4 +61,23 @@ for book in session.query(Book).all():
    content.append(PageBreak())
    content.append(Spacer(1, 12))
    # BACK SIDE
    if book_url in paragraphs_by_header:
        print ("FOUND ANNOTATIONS FOR BOOK", book_url)
        # ANNOTATIONS FROM PAD
        annotations = paragraphs_by_header[book_url]
        for p in annotations:
            p_text = ET.tostring(p, method="html", encoding="utf-8")
            p = Paragraph(p_text, styles["Normal"])
            content.append(p)
            content.append(PageBreak())
            content.append(Spacer(1, 12))
    else:
        # BLANK BACK SIDE
        p = Paragraph("", styles["Normal"])
        content.append(p)
        content.append(PageBreak())
        content.append(Spacer(1, 12))
 doc.build(content)
--- a/readfrompad.py
+++ b/readfrompad.py
@ -3,6 +3,7 @@ import markdown, html5lib
 def parse_pad (pad_text):
    print ("parse", pad_text)
    html = markdown.markdown(pad_text)
    t = html5lib.parseFragment(html, namespaceHTMLElements=False)
    # create a "database" of paragraphs associated with each URL given in an H1
@ -20,8 +21,7 @@ def parse_pad (pad_text):
    return paragraphs_by_header
 def curl (url):
-    f = urlopen(pad_text_url)
+    return urlopen(url).read().decode('utf-8')
    return f.read().decode('utf-8')
 if __name__== "__main__":