|
|
@ -55,15 +55,25 @@ def filenameforlink(href):
|
|
|
|
href = urlquote(href)
|
|
|
|
href = urlquote(href)
|
|
|
|
return href
|
|
|
|
return href
|
|
|
|
|
|
|
|
|
|
|
|
def rewritelinks (html):
|
|
|
|
|
|
|
|
t = html5lib.parseFragment(html, treebuilder = "etree", namespaceHTMLElements = False)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def rewriteimglinks(tree):
|
|
|
|
|
|
|
|
# t = html5lib.parseFragment(html, treebuilder = "etree", namespaceHTMLElements = False)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# invoke after img src have be rewritten
|
|
|
|
# remove links to wiki File: pages
|
|
|
|
# remove links to wiki File: pages
|
|
|
|
for a in t.findall(".//a[@class='image']"): # select img wrapping a
|
|
|
|
for a in tree.findall(".//a[@class='image']"): # select img wrapping a
|
|
|
|
href = a.attrib.get('href')
|
|
|
|
href = a.attrib.get('href')
|
|
|
|
if a.findall(".//img") and 'File:' in href: # ensure a has child: img
|
|
|
|
if a.findall(".//img"): # ensure a has child: img
|
|
|
|
a.attrib['href'] = 'javascript:void(0);' # disable href
|
|
|
|
img = a.find(".//img")
|
|
|
|
|
|
|
|
img_src = img.attrib['src']
|
|
|
|
|
|
|
|
a.attrib['href'] = img_src # 'javascript:void(0);' # disable href
|
|
|
|
|
|
|
|
a.attrib['target'] = "_blank"
|
|
|
|
|
|
|
|
print(a)
|
|
|
|
|
|
|
|
print(ET.tostring(a, method="html", encoding="unicode"))
|
|
|
|
|
|
|
|
return tree
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def rewritelinks (html):
|
|
|
|
|
|
|
|
t = html5lib.parseFragment(html, treebuilder = "etree", namespaceHTMLElements = False)
|
|
|
|
for a in t.findall(".//*[@href]"):
|
|
|
|
for a in t.findall(".//*[@href]"):
|
|
|
|
linkclass = a.attrib.get("class", "")
|
|
|
|
linkclass = a.attrib.get("class", "")
|
|
|
|
href = a.attrib.get("href")
|
|
|
|
href = a.attrib.get("href")
|
|
|
@ -108,6 +118,9 @@ def rewriteimgs(html):
|
|
|
|
img.attrib['srcset'] = "" # rm srcset value:it prevent imgs displaying
|
|
|
|
img.attrib['srcset'] = "" # rm srcset value:it prevent imgs displaying
|
|
|
|
img.attrib['width'] = ""
|
|
|
|
img.attrib['width'] = ""
|
|
|
|
img.attrib['height'] = ""
|
|
|
|
img.attrib['height'] = ""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
t = rewriteimglinks(tree=t)
|
|
|
|
|
|
|
|
|
|
|
|
html = ET.tostring(t, method="html", encoding="unicode")
|
|
|
|
html = ET.tostring(t, method="html", encoding="unicode")
|
|
|
|
return html
|
|
|
|
return html
|
|
|
|
|
|
|
|
|
|
|
|