# publications office for reading and writing from datetime import datetime import json import os import random import string import urllib.request, urllib.parse, urllib.error import code128 import lxml.html import PIL from readability import readability from bureau import Bureau, add_command, add_api class Publications(Bureau): """ The Publications Office serves as a kind of screenless content management system. Create, update and organize your sites while doing most of the work on paper or anything you can photograph. """ name = "Publications Office" prefix = "PB" version = 0 def __init__(self): Bureau.__init__(self) # set up db for published sites # TODO: rename this to something less ambiguous self.db = os.path.expanduser("~/.screenless/PB.data") if not os.path.exists(self.db): os.mkdir(self.db) # set up urldb for short-codes self.urldb = self.dbenv.open_db(b"urldb") @add_command("new", "Create a new Publication/Site") def new_site(self): """ Create a new Publication/Site, set up config and tace a picture from the document camera as the index page. Finally, it will print out the main page with commands for working with the site. """ site_dir = os.path.join(self.db, "1") site_id = 1 while os.path.exists(site_dir): site_id += 1 site_dir = os.path.join(self.db, str(site_id)) os.mkdir(site_dir) root_d = {"template": "default", "id": site_id} with open(os.path.join(site_dir, "root.json", "w")) as root_json: root_json.write(json.dumps(root_d)) photo = self.send("PX", "photo")["photo"] # TODO: come up with a generic set of img form operations for Bureau # should map regions defined with percentages to names form_img = PIL.Image.open(photo) fx, fy = form_img.size title_region = (0, 0, 0.5 * fx, 0.125 * fy) title_img = form_img.crop(title_region) content_region = (0, 0.125 * fy, fx, fy) content_img = form_img.crop(content_region) def _update_page(self, site, page): pass @add_command("r", "Print a web page for reading") def print_url(self, data): """ Print out a web page for reading. The command requires a short-code, typically referenced via barcode. Short-codes refer to full resource URIs recorded in the Publications office 'urldb' database. """ shortcode, _ = data.split(".") with self.dbenv.begin(db=self.urldb) as txn: print("looking up short-code:", shortcode) url = txn.get(shortcode.encode('utf-8')).decode() if not url: print("ERROR: no valid URL in db for short code: ", shortcode) return # download headers = {'User-Agent': 'Mozilla/5.0'} req = urllib.request.Request(url, None, headers) urldata = urllib.request.urlopen(req) # re-render with readability doc = readability.Document(urldata.read(), url=url) timestamp = datetime.now().strftime("Sourced %d %B, %Y at %H:%M") html = lxml.html.document_fromstring(doc.summary()) notecount = 0 # store links then make corresponding svg barcodes for link in html.findall(".//a"): notecount += 1 tmpcode = ''.join(random.choice(string.ascii_letters +\ string.digits)\ for _ in range(5)) with self.dbenv.begin(write=True, db=self.urldb) as txn: if "href" in link.attrib: txn.put(tmpcode.encode(), link.attrib["href"].encode()) svg = code128.svg("PBr." + tmpcode) footnote = html.makeelement("div") footnote.attrib["class"] = "footnote" notetext = html.makeelement("div") notetext.text = str(notecount) + ". " + link.attrib["href"] footnote.append(notetext) footnote.append(lxml.html.fromstring(svg.encode())) html.append(footnote) self.print_full("article.html", title=doc.title(), article=lxml.html.tostring(html).decode("utf-8"), url=url, date=timestamp) def main(): pb = Publications() pb.run() if __name__ == "__main__": main()