From b8616927fe74c59077254cb86a3649fba0803ffd Mon Sep 17 00:00:00 2001 From: Brendan Howell Date: Thu, 3 Feb 2022 00:09:29 +0100 Subject: [PATCH] debug to save raw article html --- screenless/bureau/publications/publications.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/screenless/bureau/publications/publications.py b/screenless/bureau/publications/publications.py index 2996d40..aa2f8e0 100644 --- a/screenless/bureau/publications/publications.py +++ b/screenless/bureau/publications/publications.py @@ -195,9 +195,11 @@ class Publications(Bureau): return # re-render with readability - #doc = readability.Document(resp.text, - # url=url) # TODO: might be cool to try to use the "byline" and "title" fields of the doc + if self.log.getEffectiveLevel() == logging.DEBUG: + with open("/tmp/raw_article.html", "w") as html_out: + html_out.write(resp.text) + self.log.debug("raw article html saved to /tmp/raw_article.html") doc = readabilipy.simple_json_from_html_string(resp.text, use_readability=True) timestamp = datetime.now().strftime("Sourced %d %B, %Y at %H:%M")