debug to save raw article html

workspace
Brendan Howell 3 years ago
parent a6dd18507d
commit b8616927fe

@ -195,9 +195,11 @@ class Publications(Bureau):
return return
# re-render with readability # re-render with readability
#doc = readability.Document(resp.text,
# url=url)
# TODO: might be cool to try to use the "byline" and "title" fields of the doc # TODO: might be cool to try to use the "byline" and "title" fields of the doc
if self.log.getEffectiveLevel() == logging.DEBUG:
with open("/tmp/raw_article.html", "w") as html_out:
html_out.write(resp.text)
self.log.debug("raw article html saved to /tmp/raw_article.html")
doc = readabilipy.simple_json_from_html_string(resp.text, use_readability=True) doc = readabilipy.simple_json_from_html_string(resp.text, use_readability=True)
timestamp = datetime.now().strftime("Sourced %d %B, %Y at %H:%M") timestamp = datetime.now().strftime("Sourced %d %B, %Y at %H:%M")

Loading…
Cancel
Save