diff --git a/dumpwiki.py b/dumpwiki.py new file mode 100644 index 0000000..02b1aa5 --- /dev/null +++ b/dumpwiki.py @@ -0,0 +1,65 @@ +import os, json, sys, urllib +from mwclient import Site +from pprint import pprint +from jinja2 import Template +from functions import unpack_response, clean_dir, remove_nonwords + +from functions import Colors +import argparse + +p = argparse.ArgumentParser(description="Dump wiki files to html", + formatter_class=argparse.ArgumentDefaultsHelpFormatter) +p.add_argument("--host", metavar='', default="hub.xpub.nl/sandbox", help='wiki host') +p.add_argument("--path", metavar='', default="/itchwiki/", help="Wiki path. Should end with /") +p.add_argument("--output", default="/var/www/html/archive", help="Output path for pages") +# p.add_argument("--conditions", "-c", metavar='', +# default='[[File:+]][[Title::+]][[Part::+]][[Date::+]]', +# help='The query conditions') +# p.add_argument("--printouts", "-p", metavar='', +# default='?Title|?Date|?Part|?Partof|?Creator|?Organization|?Format|?Event|?Topic|?Language', +# help='Selection of properties to printout') +# p.add_argument("--sort", "-s", metavar='', +# default='Date,Title,Part', +# help='Sorting according to conditions') +# p.add_argument("--order", "-o", metavar='', +# default='asc,asc,asc', +# help='Order of sorting conditions. Should same amount as the --sort properties') +# p.add_argument('--limit', '-l', help='(optional) Limit the number of returned ' +# 'items') +# # TODO: GET limit to work.Perhaps with a site.raw_api method +# p.add_argument('--dry', '-d', action='store_true', +# help='dry-run: will only show the query but not run it') + +args = p.parse_args() + + + +# site and login + +site = Site(host=args.host, path=args.path) + +with open('login.txt', 'r') as login: # read login user & pwd + loginlines = login.read() + user, pwd = loginlines.split('\n') + site.login(username=user, password=pwd) # login to wiki + +# read template files + +with open('templates/title.html') as title_html: + title_template = Template(title_html.read()) + +def filenameforpage(p): + f=p.name.replace(' ','_') + '.html' + return f + +title=site.Categories['Title'] +for p in title.members(): + print(p) + htmlsrc = site.parse(page=p.name)['text']['*'] + with open(os.path.join(args.output, filenameforpage(p)), 'w') as f: + print(htmlsrc, file=f) + # break + + + +