|
|
|
@ -5,6 +5,7 @@ import json
|
|
|
|
|
import os
|
|
|
|
|
import random
|
|
|
|
|
import string
|
|
|
|
|
import threading
|
|
|
|
|
import urllib.request, urllib.parse, urllib.error
|
|
|
|
|
|
|
|
|
|
import bleach
|
|
|
|
@ -204,20 +205,36 @@ class Publications(Bureau):
|
|
|
|
|
"""
|
|
|
|
|
feeds = self.config["newsfeeds"]
|
|
|
|
|
entries = []
|
|
|
|
|
feed_data = {}
|
|
|
|
|
threads = []
|
|
|
|
|
|
|
|
|
|
for source in feeds:
|
|
|
|
|
url = source["url"]
|
|
|
|
|
num_entries = source["count"]
|
|
|
|
|
|
|
|
|
|
# get feed data with requests using a timeout
|
|
|
|
|
try:
|
|
|
|
|
resp = requests.get(url, timeout=20.0)
|
|
|
|
|
except requests.ReadTimeout:
|
|
|
|
|
self.log.warning("Timeout reading RSS feed %s", url)
|
|
|
|
|
continue
|
|
|
|
|
def fetch_feed(url, feed_data):
|
|
|
|
|
"""
|
|
|
|
|
get feed data with requests using a timeout
|
|
|
|
|
"""
|
|
|
|
|
try:
|
|
|
|
|
resp = requests.get(url, timeout=20.0)
|
|
|
|
|
feed_data[url] = io.BytesIO(resp.content)
|
|
|
|
|
except requests.ReadTimeout:
|
|
|
|
|
self.log.warning("Timeout reading RSS feed %s", url)
|
|
|
|
|
feed_data[url] = None
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
thread = threading.Thread(target=fetch_feed, args=(url, feed_data))
|
|
|
|
|
threads.append(thread)
|
|
|
|
|
thread.start()
|
|
|
|
|
|
|
|
|
|
for thread in threads:
|
|
|
|
|
thread.join()
|
|
|
|
|
|
|
|
|
|
feed_data = io.BytesIO(resp.content)
|
|
|
|
|
feed = feedparser.parse(feed_data)
|
|
|
|
|
for source in feeds:
|
|
|
|
|
feed = feedparser.parse(feed_data[source["url"]])
|
|
|
|
|
if feed is None:
|
|
|
|
|
continue
|
|
|
|
|
num_entries = source["count"]
|
|
|
|
|
|
|
|
|
|
# work around if we don't have enough news
|
|
|
|
|
if num_entries > len(feed.entries):
|
|
|
|
@ -228,13 +245,6 @@ class Publications(Bureau):
|
|
|
|
|
entry.source = feed.feed.title
|
|
|
|
|
entry.dbhash = self._make_shorturl(entry.link)
|
|
|
|
|
entry.svg = code128.svg("PBr." + entry.dbhash)
|
|
|
|
|
og_data = self._get_ogdata(entry.link)
|
|
|
|
|
if hasattr(entry, "media_thumbnail"):
|
|
|
|
|
entry.img = entry.media_thumbnail[-1]["url"]
|
|
|
|
|
elif "image" in og_data:
|
|
|
|
|
entry.img = og_data["image"]
|
|
|
|
|
else:
|
|
|
|
|
entry.img = " "
|
|
|
|
|
|
|
|
|
|
# skip bogus entries with no text
|
|
|
|
|
if not hasattr(entry, "summary"):
|
|
|
|
@ -249,6 +259,30 @@ class Publications(Bureau):
|
|
|
|
|
|
|
|
|
|
entries.append(entry)
|
|
|
|
|
|
|
|
|
|
# do this multi-threaded cuz downloads can be slow
|
|
|
|
|
threads = []
|
|
|
|
|
for i in range(len(entries)):
|
|
|
|
|
entry = entries[i]
|
|
|
|
|
def fetch_og(entry):
|
|
|
|
|
"""
|
|
|
|
|
get OpenGraph data for entry
|
|
|
|
|
"""
|
|
|
|
|
og_data = self._get_ogdata(entry.link)
|
|
|
|
|
if hasattr(entry, "media_thumbnail"):
|
|
|
|
|
entry.img = entry.media_thumbnail[-1]["url"]
|
|
|
|
|
elif "image" in og_data:
|
|
|
|
|
entry.img = og_data["image"]
|
|
|
|
|
else:
|
|
|
|
|
entry.img = " "
|
|
|
|
|
|
|
|
|
|
thread = threading.Thread(target=fetch_og, args=(entry))
|
|
|
|
|
threads.append(thread)
|
|
|
|
|
thread.start()
|
|
|
|
|
|
|
|
|
|
# wait till we're done
|
|
|
|
|
for thread in threads:
|
|
|
|
|
thread.join()
|
|
|
|
|
|
|
|
|
|
return entries
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|