rewritelinks starting

pull/6/head
Michael Murtaugh 4 years ago
parent 61b4fe977e
commit bf73e71300

@ -3,7 +3,7 @@ from mwclient import Site
from pprint import pprint
from jinja2 import Template
from functions import unpack_response, clean_dir, remove_nonwords
import html5lib
from functions import Colors
import argparse
@ -53,6 +53,17 @@ def filenameforpage(p):
f=p.name.replace(' ','_').replace('/', SLASH) + '.html'
return f
def rewritelinks (html)
t = html5lib.parseFragment(html, treebuilder="etree", namespaceHTMLElements=False)
for a in t.findall(".//*[@href]"):
linkclass = a.attrib.get("class", "")
href = a.attrib.get("href")
if "external" in linkclass:
# leave external links alone
continue
print ("LINK", href)
# a.attrib['href'] = new_href
publish=site.Categories['Publish']
for cat in publish.members():
if cat.namespace!=14:

Loading…
Cancel
Save