|
|
@ -195,9 +195,11 @@ class Publications(Bureau):
|
|
|
|
return
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
|
|
# re-render with readability
|
|
|
|
# re-render with readability
|
|
|
|
#doc = readability.Document(resp.text,
|
|
|
|
|
|
|
|
# url=url)
|
|
|
|
|
|
|
|
# TODO: might be cool to try to use the "byline" and "title" fields of the doc
|
|
|
|
# TODO: might be cool to try to use the "byline" and "title" fields of the doc
|
|
|
|
|
|
|
|
if self.log.getEffectiveLevel() == logging.DEBUG:
|
|
|
|
|
|
|
|
with open("/tmp/raw_article.html", "w") as html_out:
|
|
|
|
|
|
|
|
html_out.write(resp.text)
|
|
|
|
|
|
|
|
self.log.debug("raw article html saved to /tmp/raw_article.html")
|
|
|
|
doc = readabilipy.simple_json_from_html_string(resp.text, use_readability=True)
|
|
|
|
doc = readabilipy.simple_json_from_html_string(resp.text, use_readability=True)
|
|
|
|
|
|
|
|
|
|
|
|
timestamp = datetime.now().strftime("Sourced %d %B, %Y at %H:%M")
|
|
|
|
timestamp = datetime.now().strftime("Sourced %d %B, %Y at %H:%M")
|
|
|
|