try to use readability.js

workspace
Brendan Howell 3 years ago
parent eb5c6c0046
commit 9efd790f38

@ -17,6 +17,7 @@ import kode256
import lxml.html import lxml.html
import PIL import PIL
from readability import readability from readability import readability
import readabilipy
import requests import requests
from bureau import Bureau, add_command, add_api from bureau import Bureau, add_command, add_api
@ -195,10 +196,12 @@ class Publications(Bureau):
return return
# re-render with readability # re-render with readability
doc = readability.Document(resp.text, #doc = readability.Document(resp.text,
url=url) # url=url)
doc = readabilipy.simple_json_from_html_string(resp.text, use_readability=True)
timestamp = datetime.now().strftime("Sourced %d %B, %Y at %H:%M") timestamp = datetime.now().strftime("Sourced %d %B, %Y at %H:%M")
html = lxml.html.document_fromstring(doc.summary()) html = lxml.html.document_fromstring(doc["content"])
notecount = 0 notecount = 0
# store links then make corresponding svg barcodes # store links then make corresponding svg barcodes

Loading…
Cancel
Save