|
|
@ -36,7 +36,6 @@ with open(imgsjson_fn, 'r') as imgsjson_file:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
SLASH = "\u2044"
|
|
|
|
SLASH = "\u2044"
|
|
|
|
HYPHEN = "\u2010"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def filenameforpage(p):
|
|
|
|
def filenameforpage(p):
|
|
|
@ -47,12 +46,20 @@ def filenameforlink(href):
|
|
|
|
href = urlunquote(href)
|
|
|
|
href = urlunquote(href)
|
|
|
|
if href.startswith("/sandbox/itchwiki/index.php/"):
|
|
|
|
if href.startswith("/sandbox/itchwiki/index.php/"):
|
|
|
|
href = href[len("/sandbox/itchwiki/index.php/"):]
|
|
|
|
href = href[len("/sandbox/itchwiki/index.php/"):]
|
|
|
|
href = href.replace(' ','_').replace('/', SLASH).replace('‐', HYPHEN) + '.html'
|
|
|
|
href = href.replace(' ','_').replace('/', SLASH) + '.html'
|
|
|
|
href = urlquote(href)
|
|
|
|
href = urlquote(href)
|
|
|
|
return href
|
|
|
|
return href
|
|
|
|
|
|
|
|
|
|
|
|
def rewritelinks (html):
|
|
|
|
def rewritelinks (html):
|
|
|
|
t = html5lib.parseFragment(html, treebuilder = "etree", namespaceHTMLElements = False)
|
|
|
|
t = html5lib.parseFragment(html, treebuilder = "etree", namespaceHTMLElements = False)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# remove links to wiki File: pages
|
|
|
|
|
|
|
|
for a in t.findall(".//a[@class='image']"): # select img wrapping a
|
|
|
|
|
|
|
|
href = a.attrib.get('href')
|
|
|
|
|
|
|
|
if a.findall(".//img") and 'File:' in href: # ensure a has child: img
|
|
|
|
|
|
|
|
a.attrib['href'] = 'javascript:void(0);' # disable href
|
|
|
|
|
|
|
|
print('a wrapping img:', ET.tostring(a))
|
|
|
|
|
|
|
|
|
|
|
|
for a in t.findall(".//*[@href]"):
|
|
|
|
for a in t.findall(".//*[@href]"):
|
|
|
|
linkclass = a.attrib.get("class", "")
|
|
|
|
linkclass = a.attrib.get("class", "")
|
|
|
|
href = a.attrib.get("href")
|
|
|
|
href = a.attrib.get("href")
|
|
|
|