dump (main) pages tagged with Category:Publish as well, added --skipimages option for testing

5 years ago · dea78943e0
parent bcf1c1a981
commit dea78943e0
1 changed files with 36 additions and 28 deletions
--- a/dumpwiki.py
+++ b/dumpwiki.py
@ -8,12 +8,16 @@ from xml.etree import ElementTree as ET
 from urllib.parse import quote as urlquote, unquote as urlunquote


+NS_MAIN = 0
+NS_CATEGORY = 14
+
 p = argparse.ArgumentParser(description="Dump wiki files to html",
                            formatter_class=argparse.ArgumentDefaultsHelpFormatter)
 p.add_argument("--host",  metavar='', default="hub.xpub.nl/sandbox", help='wiki host')
 p.add_argument("--path", metavar='', default="/itchwiki/", help="Wiki path. Should end with /")
 p.add_argument("--output", default="../archive", help="Output path for pages")
 p.add_argument("--one", default=False, action="store_true", help="Output one page from each category only")
+p.add_argument("--skipimages", default=False, action="store_true", help="Don't do images (for testing)")
 p.add_argument("--imgsrc", default='archive',
               choices=['archive', 'remote'],
               help="What is the source of the images?")
@ -30,9 +34,10 @@ with open('login.txt', 'r') as login:  # read login user & pwd
    user, pwd = loginlines.split('\n')
    site.login(username=user, password=pwd)  # login to wiki

-imgsjson_fn = os.path.join(wd, 'images.json') # read images.json file
-with open(imgsjson_fn, 'r') as imgsjson_file:
-    images_info = json.load(imgsjson_file)
+if not args.skipimages:
+    imgsjson_fn = os.path.join(wd, 'images.json') # read images.json file
+    with open(imgsjson_fn, 'r') as imgsjson_file:
+        images_info = json.load(imgsjson_file)


 SLASH = "\u2044"
@ -106,34 +111,37 @@ def rewriteimgs(html):
    html = ET.tostring(t, method="html", encoding="unicode")
    return html

+def dumppage(p, template, rewrite_images=True):
+    htmlsrc = site.parse(page=p.name)['text']['*']
+    htmlsrc = rewritelinks(htmlsrc)
+    if rewrite_images:
+        htmlsrc = rewriteimgs(htmlsrc)
+    # TODO: ANdre structure of archive: from ./archive/0 to: ./archive ./0
+    html = template.render(page=p, body=htmlsrc, staticpath=f'../{wd_name}')
+    with open(os.path.join(args.output, filenameforpage(p)), 'w') as f:
+        f.write(html)
+        # print(html, file=f)
+
 publish=site.Categories['Publish']
 for cat in publish.members():
-    if cat.namespace != 14:
-        continue 
-    print('dumping category {}'.format(cat.page_title))
-    # title=site.Categories['Title']
-    try:
-        with open('templates/{}.html'.format(cat.page_title.lower())) as templatefile:
-            template = Template(templatefile.read())
-    except FileNotFoundError:
+    if cat.namespace == NS_CATEGORY:
+        print('dumping category {}'.format(cat.page_title))
+        # title=site.Categories['Title']
+        try:
+            with open('templates/{}.html'.format(cat.page_title.lower())) as templatefile:
+                template = Template(templatefile.read())
+        except FileNotFoundError:
+            with open('templates/default.html') as templatefile:
+                template = Template(templatefile.read())        
+        for p in cat.members():
+            print(p)
+            dumppage(p, template, rewrite_images=not args.skipimages)
+            if args.one:
+                break
+    else:
+        print("Dumping page {}".format(cat.page_title))
        with open('templates/default.html') as templatefile:
            template = Template(templatefile.read())        
-    for p in cat.members():
-        print(p)
-        htmlsrc = site.parse(page=p.name)['text']['*']
-        htmlsrc = rewritelinks(htmlsrc)
-        htmlsrc = rewriteimgs(htmlsrc)
-
-        # TODO: ANdre structure of archive: from ./archive/0 to: ./archive ./0
-
-        html = template.render(page=p, body=htmlsrc, staticpath=f'../{wd_name}')
-
-        with open(os.path.join(args.output, filenameforpage(p)), 'w') as f:
-            f.write(html)
-            # print(html, file=f)
-        if args.one:
-            break
-
-
+        dumppage(cat, template, rewrite_images=not args.skipimages)