try to use readability.js

workspace
Brendan Howell 3 years ago
parent eb5c6c0046
commit 9efd790f38

@ -17,6 +17,7 @@ import kode256
import lxml.html
import PIL
from readability import readability
import readabilipy
import requests
from bureau import Bureau, add_command, add_api
@ -195,10 +196,12 @@ class Publications(Bureau):
return
# re-render with readability
doc = readability.Document(resp.text,
url=url)
#doc = readability.Document(resp.text,
# url=url)
doc = readabilipy.simple_json_from_html_string(resp.text, use_readability=True)
timestamp = datetime.now().strftime("Sourced %d %B, %Y at %H:%M")
html = lxml.html.document_fromstring(doc.summary())
html = lxml.html.document_fromstring(doc["content"])
notecount = 0
# store links then make corresponding svg barcodes

Loading…
Cancel
Save