From 74ca1e5aaf224d407774b8bb696d0d4e8a47dcbc Mon Sep 17 00:00:00 2001
From: Brendan Howell <brendan@howell-ersatz.com>
Date: Sun, 15 Oct 2017 22:31:12 +0200
Subject: [PATCH] try some multi-threading to deal with slow downloads of feeds
 and metadata

---
 .../bureau/publications/publications.py       | 66 ++++++++++++++-----
 1 file changed, 50 insertions(+), 16 deletions(-)

diff --git a/screenless/bureau/publications/publications.py b/screenless/bureau/publications/publications.py
index 5e72ae1..160ac3e 100644
--- a/screenless/bureau/publications/publications.py
+++ b/screenless/bureau/publications/publications.py
@@ -5,6 +5,7 @@ import json
 import os
 import random
 import string
+import threading
 import urllib.request, urllib.parse, urllib.error
 
 import bleach
@@ -204,20 +205,36 @@ class Publications(Bureau):
         """
         feeds = self.config["newsfeeds"]
         entries = []
+        feed_data = {}
+        threads = []
 
         for source in feeds:
             url = source["url"]
-            num_entries = source["count"]
 
-            # get feed data with requests using a timeout
-            try:
-                resp = requests.get(url, timeout=20.0)
-            except requests.ReadTimeout:
-                self.log.warning("Timeout reading RSS feed %s", url)
-                continue
+            def fetch_feed(url, feed_data):
+                """
+                get feed data with requests using a timeout
+                """
+                try:
+                    resp = requests.get(url, timeout=20.0)
+                    feed_data[url] = io.BytesIO(resp.content)
+                except requests.ReadTimeout:
+                    self.log.warning("Timeout reading RSS feed %s", url)
+                    feed_data[url] = None
+                    return
+
+            thread = threading.Thread(target=fetch_feed, args=(url, feed_data))
+            threads.append(thread)
+            thread.start()
+
+        for thread in threads:
+            thread.join()
 
-            feed_data = io.BytesIO(resp.content)
-            feed = feedparser.parse(feed_data)
+        for source in feeds:
+            feed = feedparser.parse(feed_data[source["url"]])
+            if feed is None:
+                continue
+            num_entries = source["count"]
 
             # work around if we don't have enough news
             if num_entries > len(feed.entries):
@@ -228,13 +245,6 @@ class Publications(Bureau):
                 entry.source = feed.feed.title
                 entry.dbhash = self._make_shorturl(entry.link)
                 entry.svg = code128.svg("PBr." + entry.dbhash)
-                og_data = self._get_ogdata(entry.link)
-                if hasattr(entry, "media_thumbnail"):
-                    entry.img = entry.media_thumbnail[-1]["url"]
-                elif "image" in og_data:
-                    entry.img = og_data["image"]
-                else:
-                    entry.img = " "
 
                 # skip bogus entries with no text
                 if not hasattr(entry, "summary"):
@@ -249,6 +259,30 @@ class Publications(Bureau):
 
                 entries.append(entry)
 
+        # do this multi-threaded cuz downloads can be slow
+        threads = []
+        for i in range(len(entries)):
+            entry = entries[i]
+            def fetch_og(entry):
+                """
+                get OpenGraph data for entry
+                """
+                og_data = self._get_ogdata(entry.link)
+                if hasattr(entry, "media_thumbnail"):
+                    entry.img = entry.media_thumbnail[-1]["url"]
+                elif "image" in og_data:
+                    entry.img = og_data["image"]
+                else:
+                    entry.img = " "
+
+            thread = threading.Thread(target=fetch_og, args=(entry))
+            threads.append(thread)
+            thread.start()
+
+        # wait till we're done
+        for thread in threads:
+            thread.join()
+
         return entries