|
|
@ -3,7 +3,7 @@ from mwclient import Site
|
|
|
|
from pprint import pprint
|
|
|
|
from pprint import pprint
|
|
|
|
from jinja2 import Template
|
|
|
|
from jinja2 import Template
|
|
|
|
from functions import unpack_response, clean_dir, remove_nonwords
|
|
|
|
from functions import unpack_response, clean_dir, remove_nonwords
|
|
|
|
|
|
|
|
import html5lib
|
|
|
|
from functions import Colors
|
|
|
|
from functions import Colors
|
|
|
|
import argparse
|
|
|
|
import argparse
|
|
|
|
|
|
|
|
|
|
|
@ -53,6 +53,17 @@ def filenameforpage(p):
|
|
|
|
f=p.name.replace(' ','_').replace('/', SLASH) + '.html'
|
|
|
|
f=p.name.replace(' ','_').replace('/', SLASH) + '.html'
|
|
|
|
return f
|
|
|
|
return f
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def rewritelinks (html)
|
|
|
|
|
|
|
|
t = html5lib.parseFragment(html, treebuilder="etree", namespaceHTMLElements=False)
|
|
|
|
|
|
|
|
for a in t.findall(".//*[@href]"):
|
|
|
|
|
|
|
|
linkclass = a.attrib.get("class", "")
|
|
|
|
|
|
|
|
href = a.attrib.get("href")
|
|
|
|
|
|
|
|
if "external" in linkclass:
|
|
|
|
|
|
|
|
# leave external links alone
|
|
|
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
print ("LINK", href)
|
|
|
|
|
|
|
|
# a.attrib['href'] = new_href
|
|
|
|
|
|
|
|
|
|
|
|
publish=site.Categories['Publish']
|
|
|
|
publish=site.Categories['Publish']
|
|
|
|
for cat in publish.members():
|
|
|
|
for cat in publish.members():
|
|
|
|
if cat.namespace!=14:
|
|
|
|
if cat.namespace!=14:
|
|
|
|