From be23c04af8451affb4425a7fac08d33195cd9220 Mon Sep 17 00:00:00 2001 From: Brendan Howell Date: Fri, 28 Apr 2017 14:12:36 +0200 Subject: [PATCH] port news to the new system. --- screenless/bureau/publications/news.html | 78 ++++++++++++++++++ .../bureau/publications/publications.py | 81 ++++++++++++++++--- 2 files changed, 148 insertions(+), 11 deletions(-) create mode 100644 screenless/bureau/publications/news.html diff --git a/screenless/bureau/publications/news.html b/screenless/bureau/publications/news.html new file mode 100644 index 0000000..542496a --- /dev/null +++ b/screenless/bureau/publications/news.html @@ -0,0 +1,78 @@ + + + ${title} + + + +

Yesterday's Tomorrows

+
+
Semper Contemporarius DATE_HERE
+
+ % for entry in news: +
+ % if entry.img != " ": + img +
${entry.title}
+
${entry.title}
+
${entry.title}
+
${entry.dbhash}
+
+ % endfor +
+
+

Post

+ + + % for msg in inbox: + + + + + + + % endfor +
${msg.fromname} <${msg.fromaddr}>${msg.subject}${msg.date}
READDELETEMARK AS SPAM
BC-READBC-DELETEBC-MARK AS SPAM
+
+ + diff --git a/screenless/bureau/publications/publications.py b/screenless/bureau/publications/publications.py index ed2944c..f676f66 100644 --- a/screenless/bureau/publications/publications.py +++ b/screenless/bureau/publications/publications.py @@ -7,6 +7,7 @@ import string import urllib.request, urllib.parse, urllib.error import code128 +import feedreader import lxml.html import PIL from readability import readability @@ -36,7 +37,29 @@ class Publications(Bureau): # set up urldb for short-codes self.urldb = self.dbenv.open_db(b"urldb") + + def _make_shorturl(self, url): + def _shortcode(): + return ''.join(random.choice(string.ascii_letters + string.digits) + for _ in range(5)) + # we only have about a billion so make sure we don't collide keys + with self.dbenv.begin(write=True, db=self.urldb) as txn: + res = "not None" + while res is not None: + tmpcode = _shortcode() + res = txn.get(tmpcode) + txn.put(tmpcode.encode(), url.encode()) + + return tmpcode + + + def _get_url(self, shortcode): + """look up a URL from a shortcode + returns full unicode url + """ + with self.dbenv.begin(db=self.urldb) as txn: + return txn.get(shortcode.encode()).decode() @add_command("new", "Create a new Publication/Site") def new_site(self): @@ -70,6 +93,14 @@ class Publications(Bureau): def _update_page(self, site, page): pass + @add_command("news", "Print a personalized daily newspaper") + def daily_news(self): + news = self._get_news() + # TODO: get weather + # TODO: get finance + inbox = self.send("PO", "unread") + self.print_full("news.html", news=news, inbox=inbox) + @add_command("r", "Print a web page for reading") def print_url(self, data): """ @@ -86,7 +117,7 @@ class Publications(Bureau): print("ERROR: no valid URL in db for short code: ", shortcode) return - # download + # download headers = {'User-Agent': 'Mozilla/5.0'} req = urllib.request.Request(url, None, headers) urldata = urllib.request.urlopen(req) @@ -95,20 +126,13 @@ class Publications(Bureau): doc = readability.Document(urldata.read(), url=url) timestamp = datetime.now().strftime("Sourced %d %B, %Y at %H:%M") - html = lxml.html.document_fromstring(doc.summary()) notecount = 0 # store links then make corresponding svg barcodes for link in html.findall(".//a"): notecount += 1 - tmpcode = ''.join(random.choice(string.ascii_letters +\ - string.digits)\ - for _ in range(5)) - - with self.dbenv.begin(write=True, db=self.urldb) as txn: - if "href" in link.attrib: - txn.put(tmpcode.encode(), link.attrib["href"].encode()) + tmpcode = self._make_shorturl(link.attrib["href"]) svg = code128.svg("PBr." + tmpcode) @@ -124,10 +148,45 @@ class Publications(Bureau): article=lxml.html.tostring(html).decode("utf-8"), url=url, date=timestamp) + def _get_news(self): + """fetch a set of latest news entries from sources specified in config + """ + entries = [] + + # TODO: come up with a good way to make this configurable + feeds = [("http://feeds.bbci.co.uk/news/world/rss.xml", 10), + ("http://www.rbb-online.de/content/rbb/rbb/politik/uebersicht-kurznachrichten.xml/allitems=true/feed=rss/path=middleColumnList!teaserbox.xml", 8), + ("http://feeds.arstechnica.com/arstechnica/index/", 10), + ("http://feeds.feedburner.com/zerohedge/feed", 5), + ("http://planet.python.org/rss20.xml", 5)] + + for source in feeds: + url = source[0] + num_entries = source[1] + feed = feedparser.parse(url) + + # work around if we don't have enough news + if num_entries > len(feed.entries): + num_entries = len(feed.entries) + + for _ in range(num_entries): + entry = feed.entries.pop() + entry.source = feed.feed.title + entry.dbhash = self._make_shorturl(entry.link) + + # limit summary to the last space below 500 characters + if len(entry.summary) > 500: + end = entry.summary.rfind(" ", 0, 499) + entry.summary = entry.summary[0:end] + "…" + + entries.append(entry) + + return entries + def main(): - pb = Publications() - pb.run() + pub = Publications() + pub.run() if __name__ == "__main__":