rewritelinks starting

master
Michael Murtaugh 5 years ago
parent 61b4fe977e
commit bf73e71300

@ -3,7 +3,7 @@ from mwclient import Site
from pprint import pprint from pprint import pprint
from jinja2 import Template from jinja2 import Template
from functions import unpack_response, clean_dir, remove_nonwords from functions import unpack_response, clean_dir, remove_nonwords
import html5lib
from functions import Colors from functions import Colors
import argparse import argparse
@ -53,6 +53,17 @@ def filenameforpage(p):
f=p.name.replace(' ','_').replace('/', SLASH) + '.html' f=p.name.replace(' ','_').replace('/', SLASH) + '.html'
return f return f
def rewritelinks (html)
t = html5lib.parseFragment(html, treebuilder="etree", namespaceHTMLElements=False)
for a in t.findall(".//*[@href]"):
linkclass = a.attrib.get("class", "")
href = a.attrib.get("href")
if "external" in linkclass:
# leave external links alone
continue
print ("LINK", href)
# a.attrib['href'] = new_href
publish=site.Categories['Publish'] publish=site.Categories['Publish']
for cat in publish.members(): for cat in publish.members():
if cat.namespace!=14: if cat.namespace!=14:

Loading…
Cancel
Save