support OpenGraph data for news

7 years ago · 10066d82d4
parent c8b47ec336
commit 10066d82d4
1 changed files with 33 additions and 1 deletions
--- a/screenless/bureau/publications/publications.py
+++ b/screenless/bureau/publications/publications.py
@ -164,6 +164,35 @@ class Publications(Bureau):
                        article=lxml.html.tostring(html).decode("utf-8"),
                        url=url, date=timestamp)

+    def _get_ogdata(self, url):
+        """
+        returns an object with OpenGraph metadata if available
+        """
+        ogdata = {}
+
+        # download page with requests
+        headers = {'User-Agent': 'Mozilla/5.0'}
+        try:
+            resp = requests.get(url, timeout=20.0, headers=headers)
+        except requests.ReadTimeout:
+            self.log.warning("Timeout fetching OpenGraph data from document %s",
+                             url)
+            return ogdata
+
+        html = lxml.html.document_fromstring(resp.text)
+
+        #find all elements with property="og:<something>"
+        elements = html.findall(".//[@property]")
+        for element in elements:
+            prop = element.get("property")
+            val = element.get("content")
+            if prop.startswith("og:"):
+                prop = prop[3:]
+                ogdata[prop] = val
+                print("set og:", prop, "to", val)
+
+        return ogdata
+
    def _get_news(self):
        """fetch a set of latest news entries from sources specified in config
        """
@ -189,12 +218,15 @@ class Publications(Bureau):
                num_entries = len(feed.entries)

            for _ in range(num_entries):
-                entry = feed.entries.pop()
+                entry = feed.entries.pop(0)
                entry.source = feed.feed.title
                entry.dbhash = self._make_shorturl(entry.link)
                entry.svg = code128.svg("PBr." + entry.dbhash)
+                og_data = self._get_ogdata(entry.link)
                if hasattr(entry, "media_thumbnail"):
                    entry.img = entry.media_thumbnail[-1]["url"]
+                elif hasattr(og_data, "image"):
+                    entry.img = og_data["image"]
                else:
                    entry.img = " "