From 9efd790f38d27d59d05a554d7804e0dceb12e844 Mon Sep 17 00:00:00 2001 From: Brendan Howell Date: Wed, 8 Dec 2021 21:41:25 +0100 Subject: [PATCH] try to use readability.js --- screenless/bureau/publications/publications.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/screenless/bureau/publications/publications.py b/screenless/bureau/publications/publications.py index 34e0423..d2d6840 100644 --- a/screenless/bureau/publications/publications.py +++ b/screenless/bureau/publications/publications.py @@ -17,6 +17,7 @@ import kode256 import lxml.html import PIL from readability import readability +import readabilipy import requests from bureau import Bureau, add_command, add_api @@ -195,10 +196,12 @@ class Publications(Bureau): return # re-render with readability - doc = readability.Document(resp.text, - url=url) + #doc = readability.Document(resp.text, + # url=url) + doc = readabilipy.simple_json_from_html_string(resp.text, use_readability=True) + timestamp = datetime.now().strftime("Sourced %d %B, %Y at %H:%M") - html = lxml.html.document_fromstring(doc.summary()) + html = lxml.html.document_fromstring(doc["content"]) notecount = 0 # store links then make corresponding svg barcodes