From be23c04af8451affb4425a7fac08d33195cd9220 Mon Sep 17 00:00:00 2001
From: Brendan Howell <brendan@howell-ersatz.com>
Date: Fri, 28 Apr 2017 14:12:36 +0200
Subject: [PATCH] port news to the new system.

---
 screenless/bureau/publications/news.html      | 78 ++++++++++++++++++
 .../bureau/publications/publications.py       | 81 ++++++++++++++++---
 2 files changed, 148 insertions(+), 11 deletions(-)
 create mode 100644 screenless/bureau/publications/news.html
diff --git a/screenless/bureau/publications/news.html b/screenless/bureau/publications/news.html
new file mode 100644
index 0000000..542496a
--- /dev/null
+++ b/screenless/bureau/publications/news.html
@@ -0,0 +1,78 @@
+<!DOCTYPE html>
+<html>
+  <title>${title}</title>
+  <meta charset="utf-8">
+  <style type="text/css">
+    body {
+      font-family: Junicode;
+      font-size: 10pt;
+      line-height: 1;
+      counter-reset: sidenote-counter;
+    }
+    h1 {
+      font-variant: small-caps;
+    }
+    img {
+      max-width: 100%;
+    }
+    a {
+      text-decoration: none;
+      color: inherit;
+    }
+    a::after {
+      counter-increment: sidenote-counter;
+      content: counter(sidenote-counter);
+      font-size: smaller;
+      vertical-align: super;
+    }
+    #twocol {
+      -moz-column-count: 2;
+      -webkit-column-count: 2;
+      column-count: 2;
+      text-align: justify;
+    }
+    #date #article-src {
+      padding-bottom: 10%;
+      font-style: italic;
+    }
+    .footnote {
+      font-size: smaller;
+      padding-bottom: 0.5em;
+    }
+    .footnote svg {
+      width: 50%;
+      height: 50%;
+    }
+  </style>
+<body>
+  <h1>Yesterday's Tomorrows</h1>
+  <hr />
+  <h6>Semper Contemporarius DATE_HERE</h6>
+  <div id="columns">
+    % for entry in news:
+  <div class="entry">
+    % if entry.img != " ":
+    <img src="${entry.img}" alt="img" />
+    <div class="title">${entry.title}</div>
+    <div class="source">${entry.title}</div>
+    <div class="summary">${entry.title}</div>
+    <div class="barcode">${entry.dbhash}</div>
+  </div>
+    % endfor
+  </div>
+  <div id="email">
+    <h2>Post</h2>
+    <table>
+    
+    % for msg in inbox:
+    <tr>
+      <td>${msg.fromname} &lt;${msg.fromaddr}&gt;</td>
+      <td>${msg.subject}</td><td>${msg.date}</td>
+    </tr>
+    <tr><td> </td><td>READ</td><td>DELETE</td><td>MARK AS SPAM</td></tr>
+    <tr><td> </td><td>BC-READ</td><td>BC-DELETE</td><td>BC-MARK AS SPAM</td></tr>
+    % endfor
+    </table>
+  </div>
+</body>
+</html>
diff --git a/screenless/bureau/publications/publications.py b/screenless/bureau/publications/publications.py
index ed2944c..f676f66 100644
--- a/screenless/bureau/publications/publications.py
+++ b/screenless/bureau/publications/publications.py
@@ -7,6 +7,7 @@ import string
 import urllib.request, urllib.parse, urllib.error
 
 import code128
+import feedreader
 import lxml.html
 import PIL
 from readability import readability
@@ -36,7 +37,29 @@ class Publications(Bureau):
 
         # set up urldb for short-codes
         self.urldb = self.dbenv.open_db(b"urldb")
+
+    def _make_shorturl(self, url):
+        def _shortcode():
+            return ''.join(random.choice(string.ascii_letters + string.digits)
+                            for _ in range(5))
         
+        # we only have about a billion so make sure we don't collide keys
+        with self.dbenv.begin(write=True, db=self.urldb) as txn:
+            res = "not None"
+            while res is not None:
+              tmpcode = _shortcode()
+              res = txn.get(tmpcode)
+            txn.put(tmpcode.encode(), url.encode())
+
+        return tmpcode
+
+
+    def _get_url(self, shortcode):
+        """look up a URL from a shortcode
+        returns full unicode url
+        """
+        with self.dbenv.begin(db=self.urldb) as txn:
+            return txn.get(shortcode.encode()).decode()
 
     @add_command("new", "Create a new Publication/Site")
     def new_site(self):
@@ -70,6 +93,14 @@ class Publications(Bureau):
     def _update_page(self, site, page):
         pass
 
+    @add_command("news", "Print a personalized daily newspaper")
+    def daily_news(self):
+        news = self._get_news()
+        # TODO: get weather
+        # TODO: get finance
+        inbox = self.send("PO", "unread")
+        self.print_full("news.html", news=news, inbox=inbox)
+
     @add_command("r", "Print a web page for reading")
     def print_url(self, data):
         """
@@ -86,7 +117,7 @@ class Publications(Bureau):
             print("ERROR: no valid URL in db for short code: ", shortcode)
             return
 
-        # download 
+        # download
         headers = {'User-Agent': 'Mozilla/5.0'}
         req = urllib.request.Request(url, None, headers)
         urldata = urllib.request.urlopen(req)
@@ -95,20 +126,13 @@ class Publications(Bureau):
         doc = readability.Document(urldata.read(),
                                    url=url)
         timestamp = datetime.now().strftime("Sourced %d %B, %Y at %H:%M")
-       
         html = lxml.html.document_fromstring(doc.summary())
 
         notecount = 0
         # store links then make corresponding svg barcodes
         for link in html.findall(".//a"):
             notecount += 1
-            tmpcode = ''.join(random.choice(string.ascii_letters +\
-                                            string.digits)\
-                              for _ in range(5))
-
-            with self.dbenv.begin(write=True, db=self.urldb) as txn:
-                if "href" in link.attrib:
-                    txn.put(tmpcode.encode(), link.attrib["href"].encode())
+            tmpcode = self._make_shorturl(link.attrib["href"])
 
             svg = code128.svg("PBr." + tmpcode)
 
@@ -124,10 +148,45 @@ class Publications(Bureau):
                         article=lxml.html.tostring(html).decode("utf-8"),
                         url=url, date=timestamp)
 
+    def _get_news(self):
+        """fetch a set of latest news entries from sources specified in config
+        """
+        entries = []
+
+        # TODO: come up with a good way to make this configurable
+        feeds = [("http://feeds.bbci.co.uk/news/world/rss.xml", 10),
+                 ("http://www.rbb-online.de/content/rbb/rbb/politik/uebersicht-kurznachrichten.xml/allitems=true/feed=rss/path=middleColumnList!teaserbox.xml", 8),
+                 ("http://feeds.arstechnica.com/arstechnica/index/", 10),
+                 ("http://feeds.feedburner.com/zerohedge/feed", 5),
+                 ("http://planet.python.org/rss20.xml", 5)]
+
+        for source in feeds:
+            url = source[0]
+            num_entries = source[1]
+            feed = feedparser.parse(url)
+
+            # work around if we don't have enough news
+            if num_entries > len(feed.entries):
+                num_entries = len(feed.entries)
+
+            for _ in range(num_entries):
+                entry = feed.entries.pop()
+                entry.source = feed.feed.title
+                entry.dbhash = self._make_shorturl(entry.link)
+
+                # limit summary to the last space below 500 characters
+                if len(entry.summary) > 500:
+                    end = entry.summary.rfind(" ", 0, 499)
+                    entry.summary = entry.summary[0:end] + "…"
+
+                entries.append(entry)
+
+        return entries
+
 
 def main():
-    pb = Publications()
-    pb.run()
+    pub = Publications()
+    pub.run()
 
 
 if __name__ == "__main__":

${msg.fromname} <${msg.fromaddr}>	${msg.subject}	${msg.date}
	READ	DELETE	MARK AS SPAM
	BC-READ	BC-DELETE	BC-MARK AS SPAM