the-screenless-office/screenless/bureau/publications/publications.py

# publications office for reading and writing
from datetime import datetime
import json
import os
import random
import string
import urllib.request, urllib.parse, urllib.error

import code128
import feedparser
import lxml.html
import PIL
from readability import readability

from bureau import Bureau, add_command, add_api


class Publications(Bureau):
    """
    The Publications Office serves as a kind of screenless content management
    system.  Create, update and organize your sites while doing most of the work
    on paper or anything you can photograph.
    """

    name = "Publications Office"
    prefix = "PB"
    version = 0

    def __init__(self):
        Bureau.__init__(self)

        # set up db for published sites
        # TODO: rename this to something less ambiguous
        self.db = os.path.expanduser("~/.screenless/PB.data")
        if not os.path.exists(self.db):
            os.mkdir(self.db)

        # set up urldb for short-codes
        self.urldb = self.dbenv.open_db(b"urldb")

    def _make_shorturl(self, url):
        def _shortcode():
            return ''.join(random.choice(string.ascii_letters + string.digits)
                            for _ in range(5))
        
        # we only have about a billion so make sure we don't collide keys
        with self.dbenv.begin(write=True, db=self.urldb) as txn:
            res = "not None"
            while res is not None:
              tmpcode = _shortcode()
              res = txn.get(tmpcode.encode())
            txn.put(tmpcode.encode(), url.encode())

        return tmpcode


    def _get_url(self, shortcode):
        """look up a URL from a shortcode
        returns full unicode url
        """
        with self.dbenv.begin(db=self.urldb) as txn:
            return txn.get(shortcode.encode()).decode()

    @add_command("new", "Create a new Publication/Site")
    def new_site(self):
        """
        Create a new Publication/Site, set up config and tace a picture from
        the document camera as the index page.  Finally, it will print out
        the main page with commands for working with the site.
        """
        site_dir = os.path.join(self.db, "1")
        site_id = 1
        while os.path.exists(site_dir):
            site_id += 1
            site_dir = os.path.join(self.db, str(site_id))
        os.mkdir(site_dir)

        root_d = {"template": "default", "id": site_id}
        with open(os.path.join(site_dir, "root.json", "w")) as root_json:
            root_json.write(json.dumps(root_d))

        photo = self.send("PX", "photo")["photo"]

        # TODO: come up with a generic set of img form operations for Bureau
        #       should map regions defined with percentages to names
        form_img = PIL.Image.open(photo)
        fx, fy = form_img.size
        title_region = (0, 0, 0.5 * fx, 0.125 * fy)
        title_img = form_img.crop(title_region)
        content_region = (0, 0.125 * fy, fx, fy)
        content_img = form_img.crop(content_region)

    def _update_page(self, site, page):
        pass

    @add_command("news", "Print a personalized daily newspaper")
    def daily_news(self):
        news = self._get_news()
        # TODO: get weather
        # TODO: get finance
        inbox = self.send("PO", "unread")
        print("news", news[0])
        self.print_full("news.html", news=news, inbox=inbox)

    @add_command("r", "Print a web page for reading")
    def print_url(self, data):
        """
        Print out a web page for reading.  The command requires a short-code,
        typically referenced via barcode.  Short-codes refer to full resource
        URIs recorded in the Publications office 'urldb' database.
        """
        shortcode, _ = data.split(".")
        with self.dbenv.begin(db=self.urldb) as txn:
            print("looking up short-code:", shortcode)
            url = txn.get(shortcode.encode('utf-8')).decode()

        if not url:
            print("ERROR: no valid URL in db for short code: ", shortcode)
            return

        # download
        headers = {'User-Agent': 'Mozilla/5.0'}
        req = urllib.request.Request(url, None, headers)
        urldata = urllib.request.urlopen(req)

        # re-render with readability
        doc = readability.Document(urldata.read(),
                                   url=url)
        timestamp = datetime.now().strftime("Sourced %d %B, %Y at %H:%M")
        html = lxml.html.document_fromstring(doc.summary())

        notecount = 0
        # store links then make corresponding svg barcodes
        for link in html.findall(".//a"):
            notecount += 1
            tmpcode = self._make_shorturl(link.attrib["href"])

            svg = code128.svg("PBr." + tmpcode)

            footnote = html.makeelement("div")
            footnote.attrib["class"] = "footnote"
            notetext = html.makeelement("div")
            notetext.text = str(notecount) + ". " + link.attrib["href"]
            footnote.append(notetext)
            footnote.append(lxml.html.fromstring(svg.encode()))
            html.append(footnote)

        self.print_full("article.html", title=doc.title(),
                        article=lxml.html.tostring(html).decode("utf-8"),
                        url=url, date=timestamp)

    def _get_news(self):
        """fetch a set of latest news entries from sources specified in config
        """
        entries = []

        # TODO: come up with a good way to make this configurable
        feeds = [("http://feeds.bbci.co.uk/news/world/rss.xml", 10),
                 ("http://www.rbb-online.de/content/rbb/rbb/politik/uebersicht-kurznachrichten.xml/allitems=true/feed=rss/path=middleColumnList!teaserbox.xml", 8),
                 ("http://feeds.arstechnica.com/arstechnica/index/", 10),
                 ("http://feeds.feedburner.com/zerohedge/feed", 5),
                 ("http://planet.python.org/rss20.xml", 5)]

        for source in feeds:
            url = source[0]
            num_entries = source[1]
            feed = feedparser.parse(url)

            # work around if we don't have enough news
            if num_entries > len(feed.entries):
                num_entries = len(feed.entries)

            for _ in range(num_entries):
                entry = feed.entries.pop()
                entry.source = feed.feed.title
                entry.dbhash = self._make_shorturl(entry.link)
                if hasattr(entry, "media_thumbnail"):
                    entry.img = entry.media_thumbnail[-1]["url"]
                else:
                    entry.img = " "

                # limit summary to the last space below 500 characters
                if len(entry.summary) > 500:
                    end = entry.summary.rfind(" ", 0, 499)
                    entry.summary = entry.summary[0:end] + "…"

                entries.append(entry)

        return entries


def main():
    pub = Publications()
    pub.run()


if __name__ == "__main__":
    main()
article scraping, printing and template. barcodes and db for links. 8 years ago			`# publications office for reading and writing`
			`from datetime import datetime`
big cleanup and rearrange into modules. added skeleton for publications CMS. added a sales dept to play vids from barcodes. 8 years ago			`import json`
			`import os`
article scraping, printing and template. barcodes and db for links. 8 years ago			`import random`
			`import string`
			`import urllib.request, urllib.parse, urllib.error`
big cleanup and rearrange into modules. added skeleton for publications CMS. added a sales dept to play vids from barcodes. 8 years ago
article scraping, printing and template. barcodes and db for links. 8 years ago			`import code128`
fix missing module 7 years ago			`import feedparser`
article scraping, printing and template. barcodes and db for links. 8 years ago			`import lxml.html`
big cleanup and rearrange into modules. added skeleton for publications CMS. added a sales dept to play vids from barcodes. 8 years ago			`import PIL`
article scraping, printing and template. barcodes and db for links. 8 years ago			`from readability import readability`
big cleanup and rearrange into modules. added skeleton for publications CMS. added a sales dept to play vids from barcodes. 8 years ago
			`from bureau import Bureau, add_command, add_api`


			`class Publications(Bureau):`
			`"""`
			`The Publications Office serves as a kind of screenless content management`
			`system. Create, update and organize your sites while doing most of the work`
			`on paper or anything you can photograph.`
			`"""`

			`name = "Publications Office"`
			`prefix = "PB"`
			`version = 0`

			`def __init__(self):`
			`Bureau.__init__(self)`
article scraping, printing and template. barcodes and db for links. 8 years ago
			`# set up db for published sites`
			`# TODO: rename this to something less ambiguous`
big cleanup and rearrange into modules. added skeleton for publications CMS. added a sales dept to play vids from barcodes. 8 years ago			`self.db = os.path.expanduser("~/.screenless/PB.data")`
			`if not os.path.exists(self.db):`
			`os.mkdir(self.db)`
article scraping, printing and template. barcodes and db for links. 8 years ago
			`# set up urldb for short-codes`
			`self.urldb = self.dbenv.open_db(b"urldb")`
port news to the new system. 7 years ago
			`def _make_shorturl(self, url):`
			`def _shortcode():`
			`return ''.join(random.choice(string.ascii_letters + string.digits)`
			`for _ in range(5))`
big cleanup and rearrange into modules. added skeleton for publications CMS. added a sales dept to play vids from barcodes. 8 years ago
port news to the new system. 7 years ago			`# we only have about a billion so make sure we don't collide keys`
			`with self.dbenv.begin(write=True, db=self.urldb) as txn:`
			`res = "not None"`
			`while res is not None:`
			`tmpcode = _shortcode()`
news now working. template needs stylesheet work. 7 years ago			`res = txn.get(tmpcode.encode())`
port news to the new system. 7 years ago			`txn.put(tmpcode.encode(), url.encode())`

			`return tmpcode`


			`def _get_url(self, shortcode):`
			`"""look up a URL from a shortcode`
			`returns full unicode url`
			`"""`
			`with self.dbenv.begin(db=self.urldb) as txn:`
			`return txn.get(shortcode.encode()).decode()`
big cleanup and rearrange into modules. added skeleton for publications CMS. added a sales dept to play vids from barcodes. 8 years ago
			`@add_command("new", "Create a new Publication/Site")`
			`def new_site(self):`
			`"""`
			`Create a new Publication/Site, set up config and tace a picture from`
			`the document camera as the index page. Finally, it will print out`
			`the main page with commands for working with the site.`
			`"""`
			`site_dir = os.path.join(self.db, "1")`
			`site_id = 1`
			`while os.path.exists(site_dir):`
			`site_id += 1`
			`site_dir = os.path.join(self.db, str(site_id))`
			`os.mkdir(site_dir)`

			`root_d = {"template": "default", "id": site_id}`
			`with open(os.path.join(site_dir, "root.json", "w")) as root_json:`
			`root_json.write(json.dumps(root_d))`

			`photo = self.send("PX", "photo")["photo"]`

			`# TODO: come up with a generic set of img form operations for Bureau`
			`# should map regions defined with percentages to names`
			`form_img = PIL.Image.open(photo)`
			`fx, fy = form_img.size`
			`title_region = (0, 0, 0.5 * fx, 0.125 * fy)`
			`title_img = form_img.crop(title_region)`
			`content_region = (0, 0.125 * fy, fx, fy)`
			`content_img = form_img.crop(content_region)`

			`def _update_page(self, site, page):`
			`pass`

port news to the new system. 7 years ago			`@add_command("news", "Print a personalized daily newspaper")`
			`def daily_news(self):`
			`news = self._get_news()`
			`# TODO: get weather`
			`# TODO: get finance`
			`inbox = self.send("PO", "unread")`
news now working. template needs stylesheet work. 7 years ago			`print("news", news[0])`
port news to the new system. 7 years ago			`self.print_full("news.html", news=news, inbox=inbox)`

article scraping, printing and template. barcodes and db for links. 8 years ago			`@add_command("r", "Print a web page for reading")`
			`def print_url(self, data):`
			`"""`
			`Print out a web page for reading. The command requires a short-code,`
			`typically referenced via barcode. Short-codes refer to full resource`
			`URIs recorded in the Publications office 'urldb' database.`
			`"""`
			`shortcode, _ = data.split(".")`
			`with self.dbenv.begin(db=self.urldb) as txn:`
			`print("looking up short-code:", shortcode)`
			`url = txn.get(shortcode.encode('utf-8')).decode()`

			`if not url:`
			`print("ERROR: no valid URL in db for short code: ", shortcode)`
			`return`

port news to the new system. 7 years ago			`# download`
article scraping, printing and template. barcodes and db for links. 8 years ago			`headers = {'User-Agent': 'Mozilla/5.0'}`
			`req = urllib.request.Request(url, None, headers)`
			`urldata = urllib.request.urlopen(req)`

			`# re-render with readability`
			`doc = readability.Document(urldata.read(),`
			`url=url)`
			`timestamp = datetime.now().strftime("Sourced %d %B, %Y at %H:%M")`
			`html = lxml.html.document_fromstring(doc.summary())`

article template 2-col layout. typography tweaks. 8 years ago			`notecount = 0`
article scraping, printing and template. barcodes and db for links. 8 years ago			`# store links then make corresponding svg barcodes`
			`for link in html.findall(".//a"):`
article template 2-col layout. typography tweaks. 8 years ago			`notecount += 1`
port news to the new system. 7 years ago			`tmpcode = self._make_shorturl(link.attrib["href"])`
article scraping, printing and template. barcodes and db for links. 8 years ago
			`svg = code128.svg("PBr." + tmpcode)`

article template 2-col layout. typography tweaks. 8 years ago			`footnote = html.makeelement("div")`
			`footnote.attrib["class"] = "footnote"`
			`notetext = html.makeelement("div")`
			`notetext.text = str(notecount) + ". " + link.attrib["href"]`
			`footnote.append(notetext)`
			`footnote.append(lxml.html.fromstring(svg.encode()))`
			`html.append(footnote)`
article scraping, printing and template. barcodes and db for links. 8 years ago
			`self.print_full("article.html", title=doc.title(),`
			`article=lxml.html.tostring(html).decode("utf-8"),`
			`url=url, date=timestamp)`
big cleanup and rearrange into modules. added skeleton for publications CMS. added a sales dept to play vids from barcodes. 8 years ago
port news to the new system. 7 years ago			`def _get_news(self):`
			`"""fetch a set of latest news entries from sources specified in config`
			`"""`
			`entries = []`

			`# TODO: come up with a good way to make this configurable`
			`feeds = [("http://feeds.bbci.co.uk/news/world/rss.xml", 10),`
			`("http://www.rbb-online.de/content/rbb/rbb/politik/uebersicht-kurznachrichten.xml/allitems=true/feed=rss/path=middleColumnList!teaserbox.xml", 8),`
			`("http://feeds.arstechnica.com/arstechnica/index/", 10),`
			`("http://feeds.feedburner.com/zerohedge/feed", 5),`
			`("http://planet.python.org/rss20.xml", 5)]`

			`for source in feeds:`
			`url = source[0]`
			`num_entries = source[1]`
			`feed = feedparser.parse(url)`

			`# work around if we don't have enough news`
			`if num_entries > len(feed.entries):`
			`num_entries = len(feed.entries)`

			`for _ in range(num_entries):`
			`entry = feed.entries.pop()`
			`entry.source = feed.feed.title`
			`entry.dbhash = self._make_shorturl(entry.link)`
news now working. template needs stylesheet work. 7 years ago			`if hasattr(entry, "media_thumbnail"):`
			`entry.img = entry.media_thumbnail[-1]["url"]`
			`else:`
			`entry.img = " "`
port news to the new system. 7 years ago
			`# limit summary to the last space below 500 characters`
			`if len(entry.summary) > 500:`
			`end = entry.summary.rfind(" ", 0, 499)`
			`entry.summary = entry.summary[0:end] + "…"`

			`entries.append(entry)`

			`return entries`

big cleanup and rearrange into modules. added skeleton for publications CMS. added a sales dept to play vids from barcodes. 8 years ago
			`def main():`
port news to the new system. 7 years ago			`pub = Publications()`
			`pub.run()`
big cleanup and rearrange into modules. added skeleton for publications CMS. added a sales dept to play vids from barcodes. 8 years ago

			`if __name__ == "__main__":`
			`main()`