diff --git a/dumpwiki.py b/dumpwiki.py index 3f07913..12336cb 100644 --- a/dumpwiki.py +++ b/dumpwiki.py @@ -68,23 +68,20 @@ def filenameforlink(href): def rewriteimglinks(tree, page): - #on Overview_main_page - # add link to publication on - - # invoke after img src have be rewritten - # remove links to wiki File: pages - for a in tree.findall(".//a[@class='image']"): # select img wrapping a - href = a.attrib.get('href') - if a.findall(".//img"): # ensure a has child: img - if page.name == 'Overview main page': - img = a.find(".//img") - img_src = img.attrib['src'] - a.attrib['href'] = img_src - else: + # invoke after img src has been rewritten + # To: remove links to wiki File on all pages + # but Overview_main_page page where link to publication page is added + if page.name == 'Overview main page': + for div_parent in tree.findall(".//div[@class='tooltip']"): + anchor_of_img = div_parent.find(".//div/a") + if anchor_of_img.find(".//img") is not None: # needs child + a_tag = div_parent.find(".//p/span/a") + publication_href = a_tag.attrib.get('href') + anchor_of_img.attrib['href'] = publication_href + else: + for a in tree.findall(".//a[@class='image']"): # select img wrapping a + if a.findall(".//img"): # ensure a has child: img a.attrib['href'] = 'javascript:void(0);' # disable href - - print(a) - print(ET.tostring(a, method="html", encoding="unicode")) return tree def rewritelinks(html):