diff --git a/dumpwiki.py b/dumpwiki.py index a3dc706..80ede08 100644 --- a/dumpwiki.py +++ b/dumpwiki.py @@ -67,6 +67,15 @@ def rewritelinks (html): html = ET.tostring(t, method="html", encoding="unicode") return html + +def rewriteimgs(html): + t = html5lib.parseFragment(html, treebuilder = "etree", namespaceHTMLElements = False) + # remove the srcset value as it prevents images from displaying + for img in t.findall(".//img[@srcset]"): + img.attrib['srcset'] = "" + html = ET.tostring(t, method="html", encoding="unicode") + return html + publish=site.Categories['Publish'] for cat in publish.members(): if cat.namespace != 14: @@ -83,6 +92,8 @@ for cat in publish.members(): print(p) htmlsrc = site.parse(page=p.name)['text']['*'] htmlsrc = rewritelinks(htmlsrc) + htmlsrc = rewriteimgs(htmlsrc) + if args.local is True: html = template.render(page=p, body=htmlsrc, staticpath='..') else: