|
|
@ -67,7 +67,7 @@ def filenameforlink(href):
|
|
|
|
return href
|
|
|
|
return href
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def rewriteimglinks(tree):
|
|
|
|
def rewriteimglinks(tree, page):
|
|
|
|
#on Overview_main_page
|
|
|
|
#on Overview_main_page
|
|
|
|
# add link to publication on <a><img>
|
|
|
|
# add link to publication on <a><img>
|
|
|
|
|
|
|
|
|
|
|
@ -76,15 +76,18 @@ def rewriteimglinks(tree):
|
|
|
|
for a in tree.findall(".//a[@class='image']"): # select img wrapping a
|
|
|
|
for a in tree.findall(".//a[@class='image']"): # select img wrapping a
|
|
|
|
href = a.attrib.get('href')
|
|
|
|
href = a.attrib.get('href')
|
|
|
|
if a.findall(".//img"): # ensure a has child: img
|
|
|
|
if a.findall(".//img"): # ensure a has child: img
|
|
|
|
img = a.find(".//img")
|
|
|
|
if page.name == 'Overview main page':
|
|
|
|
img_src = img.attrib['src']
|
|
|
|
img = a.find(".//img")
|
|
|
|
a.attrib['href'] = img_src # 'javascript:void(0);' # disable href
|
|
|
|
img_src = img.attrib['src']
|
|
|
|
a.attrib['target'] = "_blank"
|
|
|
|
a.attrib['href'] = img_src
|
|
|
|
|
|
|
|
else:
|
|
|
|
|
|
|
|
a.attrib['href'] = 'javascript:void(0);' # disable href
|
|
|
|
|
|
|
|
|
|
|
|
print(a)
|
|
|
|
print(a)
|
|
|
|
print(ET.tostring(a, method="html", encoding="unicode"))
|
|
|
|
print(ET.tostring(a, method="html", encoding="unicode"))
|
|
|
|
return tree
|
|
|
|
return tree
|
|
|
|
|
|
|
|
|
|
|
|
def rewritelinks (html):
|
|
|
|
def rewritelinks(html):
|
|
|
|
t = html5lib.parseFragment(html, treebuilder = "etree", namespaceHTMLElements = False)
|
|
|
|
t = html5lib.parseFragment(html, treebuilder = "etree", namespaceHTMLElements = False)
|
|
|
|
for a in t.findall(".//*[@href]"):
|
|
|
|
for a in t.findall(".//*[@href]"):
|
|
|
|
linkclass = a.attrib.get("class", "")
|
|
|
|
linkclass = a.attrib.get("class", "")
|
|
|
@ -100,7 +103,7 @@ def rewritelinks (html):
|
|
|
|
return html
|
|
|
|
return html
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def rewriteimgs(html):
|
|
|
|
def rewriteimgs(html, page):
|
|
|
|
t = html5lib.parseFragment(html, treebuilder = "etree", namespaceHTMLElements = False)
|
|
|
|
t = html5lib.parseFragment(html, treebuilder = "etree", namespaceHTMLElements = False)
|
|
|
|
|
|
|
|
|
|
|
|
# replace images url with local image in ../images
|
|
|
|
# replace images url with local image in ../images
|
|
|
@ -131,7 +134,7 @@ def rewriteimgs(html):
|
|
|
|
img.attrib['width'] = ""
|
|
|
|
img.attrib['width'] = ""
|
|
|
|
img.attrib['height'] = ""
|
|
|
|
img.attrib['height'] = ""
|
|
|
|
|
|
|
|
|
|
|
|
t = rewriteimglinks(tree=t)
|
|
|
|
t = rewriteimglinks(tree=t, page=page)
|
|
|
|
|
|
|
|
|
|
|
|
html = ET.tostring(t, method="html", encoding="unicode")
|
|
|
|
html = ET.tostring(t, method="html", encoding="unicode")
|
|
|
|
return html
|
|
|
|
return html
|
|
|
@ -140,7 +143,7 @@ def dumppage(p, template, rewrite_images=True):
|
|
|
|
htmlsrc = site.parse(page=p.name)['text']['*']
|
|
|
|
htmlsrc = site.parse(page=p.name)['text']['*']
|
|
|
|
htmlsrc = rewritelinks(htmlsrc)
|
|
|
|
htmlsrc = rewritelinks(htmlsrc)
|
|
|
|
if rewrite_images:
|
|
|
|
if rewrite_images:
|
|
|
|
htmlsrc = rewriteimgs(htmlsrc)
|
|
|
|
htmlsrc = rewriteimgs(html=htmlsrc, page=p)
|
|
|
|
html = template.render(page=p, body=htmlsrc, staticpath='.')
|
|
|
|
html = template.render(page=p, body=htmlsrc, staticpath='.')
|
|
|
|
with open(os.path.join(args.output, filenameforpage(p)), 'w') as f:
|
|
|
|
with open(os.path.join(args.output, filenameforpage(p)), 'w') as f:
|
|
|
|
f.write(html)
|
|
|
|
f.write(html)
|
|
|
|