diff --git a/dumpwiki.py b/dumpwiki.py index 9fed667..12336cb 100644 --- a/dumpwiki.py +++ b/dumpwiki.py @@ -66,15 +66,26 @@ def filenameforlink(href): href = urlquote(href) return href -def rewritelinks (html): - t = html5lib.parseFragment(html, treebuilder = "etree", namespaceHTMLElements = False) - # remove links to wiki File: pages - for a in t.findall(".//a[@class='image']"): # select img wrapping a - href = a.attrib.get('href') - if a.findall(".//img") and 'File:' in href: # ensure a has child: img - a.attrib['href'] = 'javascript:void(0);' # disable href +def rewriteimglinks(tree, page): + # invoke after img src has been rewritten + # To: remove links to wiki File on all pages + # but Overview_main_page page where link to publication page is added + if page.name == 'Overview main page': + for div_parent in tree.findall(".//div[@class='tooltip']"): + anchor_of_img = div_parent.find(".//div/a") + if anchor_of_img.find(".//img") is not None: # needs child + a_tag = div_parent.find(".//p/span/a") + publication_href = a_tag.attrib.get('href') + anchor_of_img.attrib['href'] = publication_href + else: + for a in tree.findall(".//a[@class='image']"): # select img wrapping a + if a.findall(".//img"): # ensure a has child: img + a.attrib['href'] = 'javascript:void(0);' # disable href + return tree +def rewritelinks(html): + t = html5lib.parseFragment(html, treebuilder = "etree", namespaceHTMLElements = False) for a in t.findall(".//*[@href]"): linkclass = a.attrib.get("class", "") href = a.attrib.get("href") @@ -89,7 +100,7 @@ def rewritelinks (html): return html -def rewriteimgs(html): +def rewriteimgs(html, page): t = html5lib.parseFragment(html, treebuilder = "etree", namespaceHTMLElements = False) # replace images url with local image in ../images @@ -119,6 +130,9 @@ def rewriteimgs(html): img.attrib['srcset'] = "" # rm srcset value:it prevent imgs displaying img.attrib['width'] = "" img.attrib['height'] = "" + + t = rewriteimglinks(tree=t, page=page) + html = ET.tostring(t, method="html", encoding="unicode") return html @@ -126,7 +140,7 @@ def dumppage(p, template, rewrite_images=True): htmlsrc = site.parse(page=p.name)['text']['*'] htmlsrc = rewritelinks(htmlsrc) if rewrite_images: - htmlsrc = rewriteimgs(htmlsrc) + htmlsrc = rewriteimgs(html=htmlsrc, page=p) html = template.render(page=p, body=htmlsrc, staticpath='.') with open(os.path.join(args.output, filenameforpage(p)), 'w') as f: f.write(html) diff --git a/static/index.css b/static/index.css index 3cea8a2..68d8aec 100644 --- a/static/index.css +++ b/static/index.css @@ -293,3 +293,5 @@ margin-left: 40px; height: 2px; background-color: #0BEFEB; } + +a.image {cursor: pointer!important;} /* KEEP THIS: show imgs as link in Overview */ \ No newline at end of file