support OpenGraph data for news

workspace
Brendan Howell 7 years ago
parent c8b47ec336
commit 10066d82d4

@ -164,6 +164,35 @@ class Publications(Bureau):
article=lxml.html.tostring(html).decode("utf-8"),
url=url, date=timestamp)
def _get_ogdata(self, url):
"""
returns an object with OpenGraph metadata if available
"""
ogdata = {}
# download page with requests
headers = {'User-Agent': 'Mozilla/5.0'}
try:
resp = requests.get(url, timeout=20.0, headers=headers)
except requests.ReadTimeout:
self.log.warning("Timeout fetching OpenGraph data from document %s",
url)
return ogdata
html = lxml.html.document_fromstring(resp.text)
#find all elements with property="og:<something>"
elements = html.findall(".//[@property]")
for element in elements:
prop = element.get("property")
val = element.get("content")
if prop.startswith("og:"):
prop = prop[3:]
ogdata[prop] = val
print("set og:", prop, "to", val)
return ogdata
def _get_news(self):
"""fetch a set of latest news entries from sources specified in config
"""
@ -189,12 +218,15 @@ class Publications(Bureau):
num_entries = len(feed.entries)
for _ in range(num_entries):
entry = feed.entries.pop()
entry = feed.entries.pop(0)
entry.source = feed.feed.title
entry.dbhash = self._make_shorturl(entry.link)
entry.svg = code128.svg("PBr." + entry.dbhash)
og_data = self._get_ogdata(entry.link)
if hasattr(entry, "media_thumbnail"):
entry.img = entry.media_thumbnail[-1]["url"]
elif hasattr(og_data, "image"):
entry.img = og_data["image"]
else:
entry.img = " "

Loading…
Cancel
Save