|
|
|
@ -5,6 +5,9 @@ import html5lib
|
|
|
|
|
from xml.etree import ElementTree as ET
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
THUMB_SIZE = 320
|
|
|
|
|
FULL_SIZE = 640
|
|
|
|
|
|
|
|
|
|
NS_CATEGORY = 14
|
|
|
|
|
|
|
|
|
|
p = argparse.ArgumentParser(description="Dump wiki files to html")
|
|
|
|
@ -51,6 +54,36 @@ def filenameforlink(href):
|
|
|
|
|
path = path+".html"
|
|
|
|
|
return path
|
|
|
|
|
|
|
|
|
|
def rewriteimagelink(a):
|
|
|
|
|
href = a.attrib.get("href")
|
|
|
|
|
path = href
|
|
|
|
|
if "/" in href:
|
|
|
|
|
path = path.split("/")[-1]
|
|
|
|
|
print ("rewriteimagelink", path)
|
|
|
|
|
|
|
|
|
|
r = site.api("query", prop="imageinfo", titles=path, iiprop="url", iiurlwidth=str(THUMB_SIZE), formatversion=2)
|
|
|
|
|
iinfo = r['query']['pages'][0]['imageinfo'][0]
|
|
|
|
|
thumburl = iinfo['thumburl']
|
|
|
|
|
#fullsizeurl = iinfo['url']
|
|
|
|
|
#filepageurl = iinfo['descriptionurl']
|
|
|
|
|
|
|
|
|
|
r = site.api("query", prop="imageinfo", titles=path, iiprop="url", iiurlwidth=str(FULL_SIZE), formatversion=2)
|
|
|
|
|
iinfo = r['query']['pages'][0]['imageinfo'][0]
|
|
|
|
|
fullsizeurl = iinfo['thumburl']
|
|
|
|
|
#fullsizeurl = iinfo['url']
|
|
|
|
|
#filepageurl = iinfo['descriptionurl']
|
|
|
|
|
|
|
|
|
|
a.attrib['href'] = fullsizeurl
|
|
|
|
|
img = a.find("img")
|
|
|
|
|
img.attrib['src'] = thumburl
|
|
|
|
|
if "width" in img.attrib:
|
|
|
|
|
del img.attrib["width"]
|
|
|
|
|
if "height" in img.attrib:
|
|
|
|
|
del img.attrib["height"]
|
|
|
|
|
if "srcset" in img.attrib:
|
|
|
|
|
del img.attrib["srcset"]
|
|
|
|
|
print ("rewriteimagelink", thumburl, fullsizeurl)
|
|
|
|
|
|
|
|
|
|
def rewritelinks(html):
|
|
|
|
|
t = html5lib.parseFragment(html, treebuilder = "etree", namespaceHTMLElements = False)
|
|
|
|
|
for a in t.findall(".//*[@href]"):
|
|
|
|
@ -60,7 +93,11 @@ def rewritelinks(html):
|
|
|
|
|
# leave external links alone
|
|
|
|
|
continue
|
|
|
|
|
# print ("LINK", href)
|
|
|
|
|
if href.startswith("/mediadesign/"):
|
|
|
|
|
if linkclass == "image":
|
|
|
|
|
# link to presentation version of image
|
|
|
|
|
# change img.src to a thumbnail
|
|
|
|
|
rewriteimagelink(a)
|
|
|
|
|
elif href.startswith("/mediadesign/"):
|
|
|
|
|
new_href = filenameforlink(href)
|
|
|
|
|
# print ("Rewriting link {} to {}".format(href, new_href), file=sys.stderr)
|
|
|
|
|
a.attrib['href'] = new_href
|
|
|
|
|