diff --git a/screenless/bureau/publications/article.html b/screenless/bureau/publications/article.html new file mode 100644 index 0000000..57af87d --- /dev/null +++ b/screenless/bureau/publications/article.html @@ -0,0 +1,55 @@ + + + ${title} + + + +

${title}

+

${date}

+ ${article} +

Article Source: ${url}

+ + diff --git a/screenless/bureau/publications/publications.py b/screenless/bureau/publications/publications.py index 83ae581..4f14e58 100644 --- a/screenless/bureau/publications/publications.py +++ b/screenless/bureau/publications/publications.py @@ -1,7 +1,15 @@ +# publications office for reading and writing +from datetime import datetime import json import os +import random +import string +import urllib.request, urllib.parse, urllib.error +import code128 +import lxml.html import PIL +from readability import readability from bureau import Bureau, add_command, add_api @@ -19,9 +27,15 @@ class Publications(Bureau): def __init__(self): Bureau.__init__(self) + + # set up db for published sites + # TODO: rename this to something less ambiguous self.db = os.path.expanduser("~/.screenless/PB.data") if not os.path.exists(self.db): os.mkdir(self.db) + + # set up urldb for short-codes + self.urldb = self.dbenv.open_db(b"urldb") @add_command("new", "Create a new Publication/Site") @@ -56,7 +70,54 @@ class Publications(Bureau): def _update_page(self, site, page): pass - + @add_command("r", "Print a web page for reading") + def print_url(self, data): + """ + Print out a web page for reading. The command requires a short-code, + typically referenced via barcode. Short-codes refer to full resource + URIs recorded in the Publications office 'urldb' database. + """ + shortcode, _ = data.split(".") + with self.dbenv.begin(db=self.urldb) as txn: + print("looking up short-code:", shortcode) + url = txn.get(shortcode.encode('utf-8')).decode() + + if not url: + print("ERROR: no valid URL in db for short code: ", shortcode) + return + + # download + headers = {'User-Agent': 'Mozilla/5.0'} + req = urllib.request.Request(url, None, headers) + urldata = urllib.request.urlopen(req) + + # re-render with readability + doc = readability.Document(urldata.read(), + url=url) + timestamp = datetime.now().strftime("Sourced %d %B, %Y at %H:%M") + + html = lxml.html.document_fromstring(doc.summary()) + + # store links then make corresponding svg barcodes + for link in html.findall(".//a"): + tmpcode = ''.join(random.choice(string.ascii_letters +\ + string.digits)\ + for _ in range(5)) + + with self.dbenv.begin(write=True, db=self.urldb) as txn: + if "href" in link.attrib: + txn.put(tmpcode.encode(), link.attrib["href"].encode()) + + svg = code128.svg("PBr." + tmpcode) + + sidenote = html.makeelement("span") + sidenote.attrib["class"] = "sidenote" + sidenote.append(lxml.html.fromstring(svg.encode())) + link.addnext(sidenote) + + self.print_full("article.html", title=doc.title(), + article=lxml.html.tostring(html).decode("utf-8"), + url=url, date=timestamp) def main():