port news to the new system.

8 years ago · be23c04af8
parent 3de427b4eb
commit be23c04af8
2 changed files with 148 additions and 11 deletions
--- a/screenless/bureau/publications/news.html
+++ b/screenless/bureau/publications/news.html
@ -0,0 +1,78 @@
 <!DOCTYPE html>
 <html>
  <title>${title}</title>
  <meta charset="utf-8">
  <style type="text/css">
    body {
      font-family: Junicode;
      font-size: 10pt;
      line-height: 1;
      counter-reset: sidenote-counter;
    }
    h1 {
      font-variant: small-caps;
    }
    img {
      max-width: 100%;
    }
    a {
      text-decoration: none;
      color: inherit;
    }
    a::after {
      counter-increment: sidenote-counter;
      content: counter(sidenote-counter);
      font-size: smaller;
      vertical-align: super;
    }
    #twocol {
      -moz-column-count: 2;
      -webkit-column-count: 2;
      column-count: 2;
      text-align: justify;
    }
    #date #article-src {
      padding-bottom: 10%;
      font-style: italic;
    }
    .footnote {
      font-size: smaller;
      padding-bottom: 0.5em;
    }
    .footnote svg {
      width: 50%;
      height: 50%;
    }
  </style>
 <body>
  <h1>Yesterday's Tomorrows</h1>
  <hr />
  <h6>Semper Contemporarius DATE_HERE</h6>
  <div id="columns">
    % for entry in news:
  <div class="entry">
    % if entry.img != " ":
    <img src="${entry.img}" alt="img" />
    <div class="title">${entry.title}</div>
    <div class="source">${entry.title}</div>
    <div class="summary">${entry.title}</div>
    <div class="barcode">${entry.dbhash}</div>
  </div>
    % endfor
  </div>
  <div id="email">
    <h2>Post</h2>
    <table>
    % for msg in inbox:
    <tr>
      <td>${msg.fromname} &lt;${msg.fromaddr}&gt;</td>
      <td>${msg.subject}</td><td>${msg.date}</td>
    </tr>
    <tr><td> </td><td>READ</td><td>DELETE</td><td>MARK AS SPAM</td></tr>
    <tr><td> </td><td>BC-READ</td><td>BC-DELETE</td><td>BC-MARK AS SPAM</td></tr>
    % endfor
    </table>
  </div>
 </body>
 </html>
--- a/screenless/bureau/publications/publications.py
+++ b/screenless/bureau/publications/publications.py
@ -7,6 +7,7 @@ import string
 import urllib.request, urllib.parse, urllib.error
 import code128
 import feedreader
 import lxml.html
 import PIL
 from readability import readability
@ -36,7 +37,29 @@ class Publications(Bureau):
        # set up urldb for short-codes
        self.urldb = self.dbenv.open_db(b"urldb")
    def _make_shorturl(self, url):
        def _shortcode():
            return ''.join(random.choice(string.ascii_letters + string.digits)
                            for _ in range(5))
        # we only have about a billion so make sure we don't collide keys
        with self.dbenv.begin(write=True, db=self.urldb) as txn:
            res = "not None"
            while res is not None:
              tmpcode = _shortcode()
              res = txn.get(tmpcode)
            txn.put(tmpcode.encode(), url.encode())
        return tmpcode
    def _get_url(self, shortcode):
        """look up a URL from a shortcode
        returns full unicode url
        """
        with self.dbenv.begin(db=self.urldb) as txn:
            return txn.get(shortcode.encode()).decode()
    @add_command("new", "Create a new Publication/Site")
    def new_site(self):
@ -70,6 +93,14 @@ class Publications(Bureau):
    def _update_page(self, site, page):
        pass
    @add_command("news", "Print a personalized daily newspaper")
    def daily_news(self):
        news = self._get_news()
        # TODO: get weather
        # TODO: get finance
        inbox = self.send("PO", "unread")
        self.print_full("news.html", news=news, inbox=inbox)
    @add_command("r", "Print a web page for reading")
    def print_url(self, data):
        """
@ -86,7 +117,7 @@ class Publications(Bureau):
            print("ERROR: no valid URL in db for short code: ", shortcode)
            return
-        # download 
+        # download
        headers = {'User-Agent': 'Mozilla/5.0'}
        req = urllib.request.Request(url, None, headers)
        urldata = urllib.request.urlopen(req)
@ -95,20 +126,13 @@ class Publications(Bureau):
        doc = readability.Document(urldata.read(),
                                   url=url)
        timestamp = datetime.now().strftime("Sourced %d %B, %Y at %H:%M")
        html = lxml.html.document_fromstring(doc.summary())
        notecount = 0
        # store links then make corresponding svg barcodes
        for link in html.findall(".//a"):
            notecount += 1
-            tmpcode = ''.join(random.choice(string.ascii_letters +\
+            tmpcode = self._make_shorturl(link.attrib["href"])
                                            string.digits)\
                              for _ in range(5))
            with self.dbenv.begin(write=True, db=self.urldb) as txn:
                if "href" in link.attrib:
                    txn.put(tmpcode.encode(), link.attrib["href"].encode())
            svg = code128.svg("PBr." + tmpcode)
@ -124,10 +148,45 @@ class Publications(Bureau):
                        article=lxml.html.tostring(html).decode("utf-8"),
                        url=url, date=timestamp)
    def _get_news(self):
        """fetch a set of latest news entries from sources specified in config
        """
        entries = []
        # TODO: come up with a good way to make this configurable
        feeds = [("http://feeds.bbci.co.uk/news/world/rss.xml", 10),
                 ("http://www.rbb-online.de/content/rbb/rbb/politik/uebersicht-kurznachrichten.xml/allitems=true/feed=rss/path=middleColumnList!teaserbox.xml", 8),
                 ("http://feeds.arstechnica.com/arstechnica/index/", 10),
                 ("http://feeds.feedburner.com/zerohedge/feed", 5),
                 ("http://planet.python.org/rss20.xml", 5)]
        for source in feeds:
            url = source[0]
            num_entries = source[1]
            feed = feedparser.parse(url)
            # work around if we don't have enough news
            if num_entries > len(feed.entries):
                num_entries = len(feed.entries)
            for _ in range(num_entries):
                entry = feed.entries.pop()
                entry.source = feed.feed.title
                entry.dbhash = self._make_shorturl(entry.link)
                # limit summary to the last space below 500 characters
                if len(entry.summary) > 500:
                    end = entry.summary.rfind(" ", 0, 499)
                    entry.summary = entry.summary[0:end] + "…"
                entries.append(entry)
        return entries
 def main():
-    pb = Publications()
+    pub = Publications()
-    pb.run()
+    pub.run()
 if __name__ == "__main__":