port news to the new system.

Brendan Howell 8 years ago
parent 3de427b4eb
commit be23c04af8

@ -0,0 +1,78 @@
<!DOCTYPE html>
<meta charset="utf-8">
<style type="text/css">
body {
font-family: Junicode;
font-size: 10pt;
line-height: 1;
counter-reset: sidenote-counter;
h1 {
font-variant: small-caps;
img {
max-width: 100%;
a {
text-decoration: none;
color: inherit;
a::after {
counter-increment: sidenote-counter;
content: counter(sidenote-counter);
font-size: smaller;
vertical-align: super;
#twocol {
-moz-column-count: 2;
-webkit-column-count: 2;
column-count: 2;
text-align: justify;
#date #article-src {
padding-bottom: 10%;
font-style: italic;
.footnote {
font-size: smaller;
padding-bottom: 0.5em;
.footnote svg {
width: 50%;
height: 50%;
<h1>Yesterday's Tomorrows</h1>
<hr />
<h6>Semper Contemporarius DATE_HERE</h6>
<div id="columns">
% for entry in news:
<div class="entry">
% if entry.img != " ":
<img src="${entry.img}" alt="img" />
<div class="title">${entry.title}</div>
<div class="source">${entry.title}</div>
<div class="summary">${entry.title}</div>
<div class="barcode">${entry.dbhash}</div>
% endfor
<div id="email">
% for msg in inbox:
<td>${msg.fromname} &lt;${msg.fromaddr}&gt;</td>
<tr><td> </td><td>READ</td><td>DELETE</td><td>MARK AS SPAM</td></tr>
<tr><td> </td><td>BC-READ</td><td>BC-DELETE</td><td>BC-MARK AS SPAM</td></tr>
% endfor

@ -7,6 +7,7 @@ import string
import urllib.request, urllib.parse, urllib.error
import code128
import feedreader
import lxml.html
import PIL
from readability import readability
@ -36,7 +37,29 @@ class Publications(Bureau):
# set up urldb for short-codes
self.urldb = self.dbenv.open_db(b"urldb")
def _make_shorturl(self, url):
def _shortcode():
return ''.join(random.choice(string.ascii_letters + string.digits)
for _ in range(5))
# we only have about a billion so make sure we don't collide keys
with self.dbenv.begin(write=True, db=self.urldb) as txn:
res = "not None"
while res is not None:
tmpcode = _shortcode()
res = txn.get(tmpcode)
txn.put(tmpcode.encode(), url.encode())
return tmpcode
def _get_url(self, shortcode):
"""look up a URL from a shortcode
returns full unicode url
with self.dbenv.begin(db=self.urldb) as txn:
return txn.get(shortcode.encode()).decode()
@add_command("new", "Create a new Publication/Site")
def new_site(self):
@ -70,6 +93,14 @@ class Publications(Bureau):
def _update_page(self, site, page):
@add_command("news", "Print a personalized daily newspaper")
def daily_news(self):
news = self._get_news()
# TODO: get weather
# TODO: get finance
inbox = self.send("PO", "unread")
self.print_full("news.html", news=news, inbox=inbox)
@add_command("r", "Print a web page for reading")
def print_url(self, data):
@ -86,7 +117,7 @@ class Publications(Bureau):
print("ERROR: no valid URL in db for short code: ", shortcode)
# download
# download
headers = {'User-Agent': 'Mozilla/5.0'}
req = urllib.request.Request(url, None, headers)
urldata = urllib.request.urlopen(req)
@ -95,20 +126,13 @@ class Publications(Bureau):
doc = readability.Document(urldata.read(),
timestamp = datetime.now().strftime("Sourced %d %B, %Y at %H:%M")
html = lxml.html.document_fromstring(doc.summary())
notecount = 0
# store links then make corresponding svg barcodes
for link in html.findall(".//a"):
notecount += 1
tmpcode = ''.join(random.choice(string.ascii_letters +\
for _ in range(5))
with self.dbenv.begin(write=True, db=self.urldb) as txn:
if "href" in link.attrib:
txn.put(tmpcode.encode(), link.attrib["href"].encode())
tmpcode = self._make_shorturl(link.attrib["href"])
svg = code128.svg("PBr." + tmpcode)
@ -124,10 +148,45 @@ class Publications(Bureau):
url=url, date=timestamp)
def _get_news(self):
"""fetch a set of latest news entries from sources specified in config
entries = []
# TODO: come up with a good way to make this configurable
feeds = [("http://feeds.bbci.co.uk/news/world/rss.xml", 10),
("http://www.rbb-online.de/content/rbb/rbb/politik/uebersicht-kurznachrichten.xml/allitems=true/feed=rss/path=middleColumnList!teaserbox.xml", 8),
("http://feeds.arstechnica.com/arstechnica/index/", 10),
("http://feeds.feedburner.com/zerohedge/feed", 5),
("http://planet.python.org/rss20.xml", 5)]
for source in feeds:
url = source[0]
num_entries = source[1]
feed = feedparser.parse(url)
# work around if we don't have enough news
if num_entries > len(feed.entries):
num_entries = len(feed.entries)
for _ in range(num_entries):
entry = feed.entries.pop()
entry.source = feed.feed.title
entry.dbhash = self._make_shorturl(entry.link)
# limit summary to the last space below 500 characters
if len(entry.summary) > 500:
end = entry.summary.rfind(" ", 0, 499)
entry.summary = entry.summary[0:end] + ""
return entries
def main():
pb = Publications()
pub = Publications()
if __name__ == "__main__":
