From a57bfedd682ac6b79c636e78fd111b4ce417889b Mon Sep 17 00:00:00 2001
From: Sandra <you@example.com>
Date: Wed, 1 Apr 2020 19:47:22 +0200
Subject: [PATCH] something

---
 dumpwiki.py | 37 +++++++++++++------------------------
 1 file changed, 13 insertions(+), 24 deletions(-)

diff --git a/dumpwiki.py b/dumpwiki.py
index 522470d..1a97ef0 100644
--- a/dumpwiki.py
+++ b/dumpwiki.py
@@ -6,34 +6,17 @@ from functions import unpack_response, clean_dir, remove_nonwords
 import html5lib
 from functions import Colors
 import argparse
+from xml.etree import ElementTree as ET
 
 p = argparse.ArgumentParser(description="Dump wiki files to html",
                             formatter_class=argparse.ArgumentDefaultsHelpFormatter)
 p.add_argument("--host",  metavar='', default="hub.xpub.nl/sandbox", help='wiki host')
 p.add_argument("--path", metavar='', default="/itchwiki/", help="Wiki path. Should end with /")
 p.add_argument("--output", default="/var/www/html/archive", help="Output path for pages")
-# p.add_argument("--conditions", "-c", metavar='',
-#                default='[[File:+]][[Title::+]][[Part::+]][[Date::+]]',
-#                help='The query conditions')
-# p.add_argument("--printouts", "-p", metavar='',
-#                default='?Title|?Date|?Part|?Partof|?Creator|?Organization|?Format|?Event|?Topic|?Language',
-#                help='Selection of properties to printout')
-# p.add_argument("--sort", "-s", metavar='',
-#                default='Date,Title,Part',
-#                help='Sorting according to conditions')
-# p.add_argument("--order", "-o", metavar='', 
-#                default='asc,asc,asc',
-#                help='Order of sorting conditions. Should same amount as the --sort properties')
-# p.add_argument('--limit', '-l', help='(optional) Limit the number of returned '
-#                                      'items')
-# # TODO: GET limit to work.Perhaps with a site.raw_api method
-# p.add_argument('--dry', '-d', action='store_true',
-#                help='dry-run: will only show the query but not run it')
+p.add_argument("--one", default=False, action="store_true", help="Output one page from each category only")
 
 args = p.parse_args()
 
-
-
 # site and login
 
 site = Site(host=args.host, path=args.path)
@@ -50,11 +33,11 @@ with open('login.txt', 'r') as login:  # read login user & pwd
 SLASH = "\u2044"
 
 def filenameforpage(p):
-    f=p.name.replace(' ','_').replace('/', SLASH) + '.html'
+    f = p.name.replace(' ','_').replace('/', SLASH) + '.html'
     return f
 
-def rewritelinks (html)
-    t = html5lib.parseFragment(html, treebuilder="etree", namespaceHTMLElements=False)
+def rewritelinks (html):
+    t = html5lib.parseFragment(html, treebuilder = "etree", namespaceHTMLElements = False)
     for a in t.findall(".//*[@href]"):
         linkclass = a.attrib.get("class", "")
         href = a.attrib.get("href")
@@ -62,11 +45,15 @@ def rewritelinks (html)
             # leave external links alone
             continue
         print ("LINK", href)
+        if href.startswith("/sandbox/itchwiki/index.php/"):
+            new_href = 
         # a.attrib['href'] = new_href
+    html = ET.tostring(t, method="html", encoding="unicode")
+    return html
 
 publish=site.Categories['Publish']
 for cat in publish.members():
-    if cat.namespace!=14:
+    if cat.namespace != 14:
         continue 
     print('dumping category {}'.format(cat.page_title))
     # title=site.Categories['Title']
@@ -79,10 +66,12 @@ for cat in publish.members():
     for p in cat.members():
         print(p)
         htmlsrc = site.parse(page=p.name)['text']['*']
+        htmlsrc = rewritelinks(htmlsrc)
         html = template.render(page=p, body=htmlsrc)
         with open(os.path.join(args.output, filenameforpage(p)), 'w') as f:
             print(html, file=f)
-        # break
+        if args.one:
+            break