|
|
@ -8,12 +8,16 @@ from xml.etree import ElementTree as ET
|
|
|
|
from urllib.parse import quote as urlquote, unquote as urlunquote
|
|
|
|
from urllib.parse import quote as urlquote, unquote as urlunquote
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
NS_MAIN = 0
|
|
|
|
|
|
|
|
NS_CATEGORY = 14
|
|
|
|
|
|
|
|
|
|
|
|
p = argparse.ArgumentParser(description="Dump wiki files to html",
|
|
|
|
p = argparse.ArgumentParser(description="Dump wiki files to html",
|
|
|
|
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
|
|
|
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
|
|
|
p.add_argument("--host", metavar='', default="hub.xpub.nl/sandbox", help='wiki host')
|
|
|
|
p.add_argument("--host", metavar='', default="hub.xpub.nl/sandbox", help='wiki host')
|
|
|
|
p.add_argument("--path", metavar='', default="/itchwiki/", help="Wiki path. Should end with /")
|
|
|
|
p.add_argument("--path", metavar='', default="/itchwiki/", help="Wiki path. Should end with /")
|
|
|
|
p.add_argument("--output", default="../archive", help="Output path for pages")
|
|
|
|
p.add_argument("--output", default="../archive", help="Output path for pages")
|
|
|
|
p.add_argument("--one", default=False, action="store_true", help="Output one page from each category only")
|
|
|
|
p.add_argument("--one", default=False, action="store_true", help="Output one page from each category only")
|
|
|
|
|
|
|
|
p.add_argument("--skipimages", default=False, action="store_true", help="Don't do images (for testing)")
|
|
|
|
p.add_argument("--imgsrc", default='archive',
|
|
|
|
p.add_argument("--imgsrc", default='archive',
|
|
|
|
choices=['archive', 'remote'],
|
|
|
|
choices=['archive', 'remote'],
|
|
|
|
help="What is the source of the images?")
|
|
|
|
help="What is the source of the images?")
|
|
|
@ -30,8 +34,9 @@ with open('login.txt', 'r') as login: # read login user & pwd
|
|
|
|
user, pwd = loginlines.split('\n')
|
|
|
|
user, pwd = loginlines.split('\n')
|
|
|
|
site.login(username=user, password=pwd) # login to wiki
|
|
|
|
site.login(username=user, password=pwd) # login to wiki
|
|
|
|
|
|
|
|
|
|
|
|
imgsjson_fn = os.path.join(wd, 'images.json') # read images.json file
|
|
|
|
if not args.skipimages:
|
|
|
|
with open(imgsjson_fn, 'r') as imgsjson_file:
|
|
|
|
imgsjson_fn = os.path.join(wd, 'images.json') # read images.json file
|
|
|
|
|
|
|
|
with open(imgsjson_fn, 'r') as imgsjson_file:
|
|
|
|
images_info = json.load(imgsjson_file)
|
|
|
|
images_info = json.load(imgsjson_file)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@ -106,10 +111,20 @@ def rewriteimgs(html):
|
|
|
|
html = ET.tostring(t, method="html", encoding="unicode")
|
|
|
|
html = ET.tostring(t, method="html", encoding="unicode")
|
|
|
|
return html
|
|
|
|
return html
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def dumppage(p, template, rewrite_images=True):
|
|
|
|
|
|
|
|
htmlsrc = site.parse(page=p.name)['text']['*']
|
|
|
|
|
|
|
|
htmlsrc = rewritelinks(htmlsrc)
|
|
|
|
|
|
|
|
if rewrite_images:
|
|
|
|
|
|
|
|
htmlsrc = rewriteimgs(htmlsrc)
|
|
|
|
|
|
|
|
# TODO: ANdre structure of archive: from ./archive/0 to: ./archive ./0
|
|
|
|
|
|
|
|
html = template.render(page=p, body=htmlsrc, staticpath=f'../{wd_name}')
|
|
|
|
|
|
|
|
with open(os.path.join(args.output, filenameforpage(p)), 'w') as f:
|
|
|
|
|
|
|
|
f.write(html)
|
|
|
|
|
|
|
|
# print(html, file=f)
|
|
|
|
|
|
|
|
|
|
|
|
publish=site.Categories['Publish']
|
|
|
|
publish=site.Categories['Publish']
|
|
|
|
for cat in publish.members():
|
|
|
|
for cat in publish.members():
|
|
|
|
if cat.namespace != 14:
|
|
|
|
if cat.namespace == NS_CATEGORY:
|
|
|
|
continue
|
|
|
|
|
|
|
|
print('dumping category {}'.format(cat.page_title))
|
|
|
|
print('dumping category {}'.format(cat.page_title))
|
|
|
|
# title=site.Categories['Title']
|
|
|
|
# title=site.Categories['Title']
|
|
|
|
try:
|
|
|
|
try:
|
|
|
@ -120,20 +135,13 @@ for cat in publish.members():
|
|
|
|
template = Template(templatefile.read())
|
|
|
|
template = Template(templatefile.read())
|
|
|
|
for p in cat.members():
|
|
|
|
for p in cat.members():
|
|
|
|
print(p)
|
|
|
|
print(p)
|
|
|
|
htmlsrc = site.parse(page=p.name)['text']['*']
|
|
|
|
dumppage(p, template, rewrite_images=not args.skipimages)
|
|
|
|
htmlsrc = rewritelinks(htmlsrc)
|
|
|
|
|
|
|
|
htmlsrc = rewriteimgs(htmlsrc)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# TODO: ANdre structure of archive: from ./archive/0 to: ./archive ./0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
html = template.render(page=p, body=htmlsrc, staticpath=f'../{wd_name}')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
with open(os.path.join(args.output, filenameforpage(p)), 'w') as f:
|
|
|
|
|
|
|
|
f.write(html)
|
|
|
|
|
|
|
|
# print(html, file=f)
|
|
|
|
|
|
|
|
if args.one:
|
|
|
|
if args.one:
|
|
|
|
break
|
|
|
|
break
|
|
|
|
|
|
|
|
else:
|
|
|
|
|
|
|
|
print("Dumping page {}".format(cat.page_title))
|
|
|
|
|
|
|
|
with open('templates/default.html') as templatefile:
|
|
|
|
|
|
|
|
template = Template(templatefile.read())
|
|
|
|
|
|
|
|
dumppage(cat, template, rewrite_images=not args.skipimages)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|