From 4426fbca8b382c62d007e11c10fe937658bbe732 Mon Sep 17 00:00:00 2001 From: Castro0o Date: Thu, 16 Apr 2020 11:36:57 +0200 Subject: [PATCH 1/4] image parent links: link to image file --- dumpwiki.py | 23 ++++++++++++++++++----- static/archive.css | 2 -- 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/dumpwiki.py b/dumpwiki.py index 7a37930..c4fc24e 100644 --- a/dumpwiki.py +++ b/dumpwiki.py @@ -55,15 +55,25 @@ def filenameforlink(href): href = urlquote(href) return href -def rewritelinks (html): - t = html5lib.parseFragment(html, treebuilder = "etree", namespaceHTMLElements = False) +def rewriteimglinks(tree): + # t = html5lib.parseFragment(html, treebuilder = "etree", namespaceHTMLElements = False) + + # invoke after img src have be rewritten # remove links to wiki File: pages - for a in t.findall(".//a[@class='image']"): # select img wrapping a + for a in tree.findall(".//a[@class='image']"): # select img wrapping a href = a.attrib.get('href') - if a.findall(".//img") and 'File:' in href: # ensure a has child: img - a.attrib['href'] = 'javascript:void(0);' # disable href + if a.findall(".//img"): # ensure a has child: img + img = a.find(".//img") + img_src = img.attrib['src'] + a.attrib['href'] = img_src # 'javascript:void(0);' # disable href + a.attrib['target'] = "_blank" + print(a) + print(ET.tostring(a, method="html", encoding="unicode")) + return tree +def rewritelinks (html): + t = html5lib.parseFragment(html, treebuilder = "etree", namespaceHTMLElements = False) for a in t.findall(".//*[@href]"): linkclass = a.attrib.get("class", "") href = a.attrib.get("href") @@ -108,6 +118,9 @@ def rewriteimgs(html): img.attrib['srcset'] = "" # rm srcset value:it prevent imgs displaying img.attrib['width'] = "" img.attrib['height'] = "" + + t = rewriteimglinks(tree=t) + html = ET.tostring(t, method="html", encoding="unicode") return html diff --git a/static/archive.css b/static/archive.css index fd416eb..b3dea09 100644 --- a/static/archive.css +++ b/static/archive.css @@ -12,5 +12,3 @@ a:hover { text-decoration: none; color: blue; } - -a.image {cursor: default!important;} /* KEEP THIS: it is important to avoid images to seeming like links */ \ No newline at end of file From 875de65e35c11ac04ff157aa3a7368c09ccb724b Mon Sep 17 00:00:00 2001 From: Castro0o Date: Thu, 16 Apr 2020 14:24:50 +0200 Subject: [PATCH 2/4] TODO overview page --- dumpwiki.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dumpwiki.py b/dumpwiki.py index c4fc24e..918be0b 100644 --- a/dumpwiki.py +++ b/dumpwiki.py @@ -57,7 +57,8 @@ def filenameforlink(href): def rewriteimglinks(tree): - # t = html5lib.parseFragment(html, treebuilder = "etree", namespaceHTMLElements = False) + #on Overview_main_page + # add link to publication on # invoke after img src have be rewritten # remove links to wiki File: pages From 11fe0bf03bebc765a68579815ec53879559209d8 Mon Sep 17 00:00:00 2001 From: Castro0o Date: Fri, 17 Apr 2020 09:17:11 +0200 Subject: [PATCH 3/4] Overview main page: image as links (wip) --- dumpwiki.py | 21 ++++++++++++--------- static/archive.css | 2 ++ static/index.css | 2 ++ 3 files changed, 16 insertions(+), 9 deletions(-) diff --git a/dumpwiki.py b/dumpwiki.py index 3638ba4..3f07913 100644 --- a/dumpwiki.py +++ b/dumpwiki.py @@ -67,7 +67,7 @@ def filenameforlink(href): return href -def rewriteimglinks(tree): +def rewriteimglinks(tree, page): #on Overview_main_page # add link to publication on @@ -76,15 +76,18 @@ def rewriteimglinks(tree): for a in tree.findall(".//a[@class='image']"): # select img wrapping a href = a.attrib.get('href') if a.findall(".//img"): # ensure a has child: img - img = a.find(".//img") - img_src = img.attrib['src'] - a.attrib['href'] = img_src # 'javascript:void(0);' # disable href - a.attrib['target'] = "_blank" + if page.name == 'Overview main page': + img = a.find(".//img") + img_src = img.attrib['src'] + a.attrib['href'] = img_src + else: + a.attrib['href'] = 'javascript:void(0);' # disable href + print(a) print(ET.tostring(a, method="html", encoding="unicode")) return tree -def rewritelinks (html): +def rewritelinks(html): t = html5lib.parseFragment(html, treebuilder = "etree", namespaceHTMLElements = False) for a in t.findall(".//*[@href]"): linkclass = a.attrib.get("class", "") @@ -100,7 +103,7 @@ def rewritelinks (html): return html -def rewriteimgs(html): +def rewriteimgs(html, page): t = html5lib.parseFragment(html, treebuilder = "etree", namespaceHTMLElements = False) # replace images url with local image in ../images @@ -131,7 +134,7 @@ def rewriteimgs(html): img.attrib['width'] = "" img.attrib['height'] = "" - t = rewriteimglinks(tree=t) + t = rewriteimglinks(tree=t, page=page) html = ET.tostring(t, method="html", encoding="unicode") return html @@ -140,7 +143,7 @@ def dumppage(p, template, rewrite_images=True): htmlsrc = site.parse(page=p.name)['text']['*'] htmlsrc = rewritelinks(htmlsrc) if rewrite_images: - htmlsrc = rewriteimgs(htmlsrc) + htmlsrc = rewriteimgs(html=htmlsrc, page=p) html = template.render(page=p, body=htmlsrc, staticpath='.') with open(os.path.join(args.output, filenameforpage(p)), 'w') as f: f.write(html) diff --git a/static/archive.css b/static/archive.css index f2db950..0c99a6b 100644 --- a/static/archive.css +++ b/static/archive.css @@ -65,3 +65,5 @@ a:hover { text-decoration: none; color: blue; } + +a.image {cursor: default!important;} /* KEEP THIS: it is important to avoid images to seeming like links */ \ No newline at end of file diff --git a/static/index.css b/static/index.css index 3cea8a2..68d8aec 100644 --- a/static/index.css +++ b/static/index.css @@ -293,3 +293,5 @@ margin-left: 40px; height: 2px; background-color: #0BEFEB; } + +a.image {cursor: pointer!important;} /* KEEP THIS: show imgs as link in Overview */ \ No newline at end of file From e1fb598cf4c45b9f25f78123c1707befae614476 Mon Sep 17 00:00:00 2001 From: Castro0o Date: Fri, 17 Apr 2020 10:06:31 +0200 Subject: [PATCH 4/4] Overview main page: img link to publication pages --- dumpwiki.py | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/dumpwiki.py b/dumpwiki.py index 3f07913..12336cb 100644 --- a/dumpwiki.py +++ b/dumpwiki.py @@ -68,23 +68,20 @@ def filenameforlink(href): def rewriteimglinks(tree, page): - #on Overview_main_page - # add link to publication on - - # invoke after img src have be rewritten - # remove links to wiki File: pages - for a in tree.findall(".//a[@class='image']"): # select img wrapping a - href = a.attrib.get('href') - if a.findall(".//img"): # ensure a has child: img - if page.name == 'Overview main page': - img = a.find(".//img") - img_src = img.attrib['src'] - a.attrib['href'] = img_src - else: + # invoke after img src has been rewritten + # To: remove links to wiki File on all pages + # but Overview_main_page page where link to publication page is added + if page.name == 'Overview main page': + for div_parent in tree.findall(".//div[@class='tooltip']"): + anchor_of_img = div_parent.find(".//div/a") + if anchor_of_img.find(".//img") is not None: # needs child + a_tag = div_parent.find(".//p/span/a") + publication_href = a_tag.attrib.get('href') + anchor_of_img.attrib['href'] = publication_href + else: + for a in tree.findall(".//a[@class='image']"): # select img wrapping a + if a.findall(".//img"): # ensure a has child: img a.attrib['href'] = 'javascript:void(0);' # disable href - - print(a) - print(ET.tostring(a, method="html", encoding="unicode")) return tree def rewritelinks(html):