use requests, cope with site timeouts

8 years ago · 1ff5b3e886
parent 7e5ad5cd71
commit 1ff5b3e886
1 changed files with 8 additions and 5 deletions
--- a/screenless/bureau/publications/publications.py
+++ b/screenless/bureau/publications/publications.py
@ -124,13 +124,16 @@ class Publications(Bureau):
        else:
            url = url.decode()

-        # download
+        # download page with requests
        headers = {'User-Agent': 'Mozilla/5.0'}
-        req = urllib.request.Request(url, None, headers)
-        urldata = urllib.request.urlopen(req)
+        try:
+            resp = requests.get(url, timeout=20.0, headers=headers)
+        except requests.ReadTimeout:
+            self.log.warning("Timeout reading RSS feed %s", url)
+            return  # TODO: do we need to spit out an error?

        # re-render with readability
-        doc = readability.Document(urldata.read(),
+        doc = readability.Document(resp.text,
                                   url=url)
        timestamp = datetime.now().strftime("Sourced %d %B, %Y at %H:%M")
        html = lxml.html.document_fromstring(doc.summary())
@ -175,7 +178,7 @@ class Publications(Bureau):
            try:
                resp = requests.get(url, timeout=20.0)
            except requests.ReadTimeout:
-                self.log("Timeout reading RSS feed %s", url)
+                self.log.warning("Timeout reading RSS feed %s", url)
                continue

            feed_data = io.BytesIO(resp.content)