From 652d069c3509b179790548f03e14cd37b2c6e5dd Mon Sep 17 00:00:00 2001 From: Castro0o Date: Tue, 7 Apr 2020 16:33:33 +0200 Subject: [PATCH] removing imgs srcset value --- dumpwiki.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/dumpwiki.py b/dumpwiki.py index a3dc706..80ede08 100644 --- a/dumpwiki.py +++ b/dumpwiki.py @@ -67,6 +67,15 @@ def rewritelinks (html): html = ET.tostring(t, method="html", encoding="unicode") return html + +def rewriteimgs(html): + t = html5lib.parseFragment(html, treebuilder = "etree", namespaceHTMLElements = False) + # remove the srcset value as it prevents images from displaying + for img in t.findall(".//img[@srcset]"): + img.attrib['srcset'] = "" + html = ET.tostring(t, method="html", encoding="unicode") + return html + publish=site.Categories['Publish'] for cat in publish.members(): if cat.namespace != 14: @@ -83,6 +92,8 @@ for cat in publish.members(): print(p) htmlsrc = site.parse(page=p.name)['text']['*'] htmlsrc = rewritelinks(htmlsrc) + htmlsrc = rewriteimgs(htmlsrc) + if args.local is True: html = template.render(page=p, body=htmlsrc, staticpath='..') else: