|
|
|
@ -41,6 +41,7 @@ class Publications(Bureau):
|
|
|
|
|
|
|
|
|
|
# set up urldb for short-codes
|
|
|
|
|
self.urldb = self.dbenv.open_db(b"urldb")
|
|
|
|
|
self.rev_urldb = self.dbenv.open_db(b"rev_urldb")
|
|
|
|
|
|
|
|
|
|
def _make_shorturl(self, url):
|
|
|
|
|
def _shortcode():
|
|
|
|
@ -55,6 +56,9 @@ class Publications(Bureau):
|
|
|
|
|
res = txn.get(tmpcode.encode())
|
|
|
|
|
txn.put(tmpcode.encode(), url.encode())
|
|
|
|
|
|
|
|
|
|
with self.dbenv.begin(write=True, db=self.rev_urldb) as txn:
|
|
|
|
|
txn.put(url.encode(), tmpcode.encode())
|
|
|
|
|
|
|
|
|
|
return tmpcode
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@ -243,8 +247,21 @@ class Publications(Bureau):
|
|
|
|
|
if num_entries > len(feed.entries):
|
|
|
|
|
num_entries = len(feed.entries)
|
|
|
|
|
|
|
|
|
|
for _ in range(num_entries):
|
|
|
|
|
entry = feed.entries.pop(0)
|
|
|
|
|
en_count = 0
|
|
|
|
|
while en_count < num_entries:
|
|
|
|
|
try:
|
|
|
|
|
entry = feed.entries.pop(0)
|
|
|
|
|
except IndexError:
|
|
|
|
|
# we are out of entries - quit
|
|
|
|
|
en_count = num_entries
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
# ignore the old news we've already seen
|
|
|
|
|
with self.dbenv.begin(db=self.rev_urldb) as txn:
|
|
|
|
|
res = txn.get(entry.link.encode())
|
|
|
|
|
if res is not None:
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
entry.source = feed.feed.title
|
|
|
|
|
entry.dbhash = self._make_shorturl(entry.link)
|
|
|
|
|
entry.svg = code128.svg("PBr." + entry.dbhash)
|
|
|
|
@ -261,6 +278,7 @@ class Publications(Bureau):
|
|
|
|
|
entry.summary = bleach.clean(entry.summary, strip=True)
|
|
|
|
|
|
|
|
|
|
entries.append(entry)
|
|
|
|
|
en_count += 1
|
|
|
|
|
|
|
|
|
|
# do this multi-threaded cuz downloads can be slow
|
|
|
|
|
threads = []
|
|
|
|
|