From 74ca1e5aaf224d407774b8bb696d0d4e8a47dcbc Mon Sep 17 00:00:00 2001 From: Brendan Howell Date: Sun, 15 Oct 2017 22:31:12 +0200 Subject: [PATCH] try some multi-threading to deal with slow downloads of feeds and metadata --- .../bureau/publications/publications.py | 66 ++++++++++++++----- 1 file changed, 50 insertions(+), 16 deletions(-) diff --git a/screenless/bureau/publications/publications.py b/screenless/bureau/publications/publications.py index 5e72ae1..160ac3e 100644 --- a/screenless/bureau/publications/publications.py +++ b/screenless/bureau/publications/publications.py @@ -5,6 +5,7 @@ import json import os import random import string +import threading import urllib.request, urllib.parse, urllib.error import bleach @@ -204,20 +205,36 @@ class Publications(Bureau): """ feeds = self.config["newsfeeds"] entries = [] + feed_data = {} + threads = [] for source in feeds: url = source["url"] - num_entries = source["count"] - # get feed data with requests using a timeout - try: - resp = requests.get(url, timeout=20.0) - except requests.ReadTimeout: - self.log.warning("Timeout reading RSS feed %s", url) - continue + def fetch_feed(url, feed_data): + """ + get feed data with requests using a timeout + """ + try: + resp = requests.get(url, timeout=20.0) + feed_data[url] = io.BytesIO(resp.content) + except requests.ReadTimeout: + self.log.warning("Timeout reading RSS feed %s", url) + feed_data[url] = None + return + + thread = threading.Thread(target=fetch_feed, args=(url, feed_data)) + threads.append(thread) + thread.start() + + for thread in threads: + thread.join() - feed_data = io.BytesIO(resp.content) - feed = feedparser.parse(feed_data) + for source in feeds: + feed = feedparser.parse(feed_data[source["url"]]) + if feed is None: + continue + num_entries = source["count"] # work around if we don't have enough news if num_entries > len(feed.entries): @@ -228,13 +245,6 @@ class Publications(Bureau): entry.source = feed.feed.title entry.dbhash = self._make_shorturl(entry.link) entry.svg = code128.svg("PBr." + entry.dbhash) - og_data = self._get_ogdata(entry.link) - if hasattr(entry, "media_thumbnail"): - entry.img = entry.media_thumbnail[-1]["url"] - elif "image" in og_data: - entry.img = og_data["image"] - else: - entry.img = " " # skip bogus entries with no text if not hasattr(entry, "summary"): @@ -249,6 +259,30 @@ class Publications(Bureau): entries.append(entry) + # do this multi-threaded cuz downloads can be slow + threads = [] + for i in range(len(entries)): + entry = entries[i] + def fetch_og(entry): + """ + get OpenGraph data for entry + """ + og_data = self._get_ogdata(entry.link) + if hasattr(entry, "media_thumbnail"): + entry.img = entry.media_thumbnail[-1]["url"] + elif "image" in og_data: + entry.img = og_data["image"] + else: + entry.img = " " + + thread = threading.Thread(target=fetch_og, args=(entry)) + threads.append(thread) + thread.start() + + # wait till we're done + for thread in threads: + thread.join() + return entries