From 63422103a29dd4faace5de7f0ca227e1349adc87 Mon Sep 17 00:00:00 2001
From: Castro0o <andre@andrecastro>
Date: Wed, 8 Apr 2020 09:24:05 +0200
Subject: [PATCH 1/4] download_imgs.py saving images to images/ in partent
 directory

---
 download_imgs.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/download_imgs.py b/download_imgs.py
index a8af106..31bbc95 100644
--- a/download_imgs.py
+++ b/download_imgs.py
@@ -6,9 +6,9 @@ from functions import update_json, remove_nonwords
 
 
 site = Site(host='hub.xpub.nl/sandbox', path='/itchwiki/')
-wd = os.path.dirname(os.path.abspath(__file__)) # working directory
-
-imgdir = os.path.join(wd, 'images')
+wd = os.path.dirname(os.path.abspath(__file__))  # working directory
+parent_d = os.path.dirname(wd) # parent directory
+imgdir = os.path.join(parent_d, 'images')
 os.makedirs(imgdir, exist_ok=True) # create images/ dir
 
 imgsjson_fn = os.path.join(wd, 'images.json')

From 5de30ed9caa090858e6da325e7fe6f2e10922cbf Mon Sep 17 00:00:00 2001
From: Castro0o <andre@andrecastro>
Date: Wed, 8 Apr 2020 09:26:02 +0200
Subject: [PATCH 2/4] readme edit

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index a3d3bdc..f5df694 100644
--- a/README.md
+++ b/README.md
@@ -83,7 +83,7 @@ Run scripts together with `./run.sh`
 1 script at a time:
 
 `python3 download_imgs.py`
-* Downloads all images from wiki to `images/` directory
+* Downloads all images from wiki to `../images/` directory
 * and stores each image's metadata to `images.json`
 
 `python3 query2html.py`

From bf66a2d572890ea1e539b9542ba4ce6139628467 Mon Sep 17 00:00:00 2001
From: Castro0o <andre@andrecastro>
Date: Wed, 8 Apr 2020 10:36:59 +0200
Subject: [PATCH 3/4] images: remote, local, archoive

---
 README.md   |  9 +++++++--
 dumpwiki.py | 54 +++++++++++++++++++++++++++++++++++++++--------------
 2 files changed, 47 insertions(+), 16 deletions(-)

diff --git a/README.md b/README.md
index f5df694..78cabea 100644
--- a/README.md
+++ b/README.md
@@ -39,9 +39,14 @@ mypassword
 ### locally on your own machine:
 create archive folder: `mkdir archive`
 
-run script outputting to archive folder and displaying the images from the wiki:
+run script outputting to archive folder and **displaying the images from the wiki**:
+
+`python3 dumpwiki.py --output archive --local --imgsrc remote`
+
+run script outputting to archive folder and **displaying the images from local ../images**:
+* requires running `download_imgs.py`
+`python3 dumpwiki.py --output archive --local --imgsrc local`
 
-`python3 dumpwiki.py --output archive --local`
 
 
 ### Categories and Templates:
diff --git a/dumpwiki.py b/dumpwiki.py
index 4152757..54c24bd 100644
--- a/dumpwiki.py
+++ b/dumpwiki.py
@@ -1,8 +1,6 @@
-import os, json, sys, urllib
+import os, json, sys
 from mwclient import Site
-from pprint import pprint
 from jinja2 import Template
-from functions import unpack_response, clean_dir, remove_nonwords
 import html5lib
 from functions import Colors
 import argparse
@@ -17,20 +15,25 @@ p.add_argument("--path", metavar='', default="/itchwiki/", help="Wiki path. Shou
 p.add_argument("--output", default="/var/www/html/archive", help="Output path for pages")
 p.add_argument("--one", default=False, action="store_true", help="Output one page from each category only")
 p.add_argument("--local", default=False, action="store_true", help="When creating a local archives. Add full URL to images")
+p.add_argument("--imgsrc", default='archive',
+               choices=['archive', 'local', 'remote'],
+               help="What is the source of the images?")
 
 args = p.parse_args()
 print(args)
 # site and login
 
 site = Site(host=args.host, path=args.path)
+wd = os.path.dirname(os.path.abspath(__file__))  # working directory
 
 with open('login.txt', 'r') as login:  # read login user & pwd
     loginlines = login.read()
     user, pwd = loginlines.split('\n')
     site.login(username=user, password=pwd)  # login to wiki
 
-# read template files
-
+imgsjson_fn = os.path.join(wd, 'images.json') # read images.json file
+with open(imgsjson_fn, 'r') as imgsjson_file:
+    images_info = json.load(imgsjson_file)
 
 
 SLASH = "\u2044"
@@ -61,20 +64,43 @@ def rewritelinks (html):
         if href.startswith("/sandbox/itchwiki/index.php/"):
             new_href = filenameforlink(href)
             a.attrib['href'] = new_href
-    if args.local is True:
-        for img in t.findall(".//img[@src]"):
-            src = img.attrib.get("src")
-            if not src.startswith('http'):
-                img.attrib['src'] = 'https://hub.xpub.nl' + src
-    html = ET.tostring(t, method="html", encoding="unicode")
     return html
 
 
 def rewriteimgs(html):
     t = html5lib.parseFragment(html, treebuilder = "etree", namespaceHTMLElements = False)
-    # remove the srcset value as it prevents images from displaying
-    for img in t.findall(".//img[@srcset]"):
-        img.attrib['srcset'] = ""
+
+    # replace images url with local image in ../images
+    for img in t.findall(".//img[@src]"):
+        # imgsrc can be:
+            # remote: url remains
+            # archive f' images/{img_filename}'
+            # local: f'../../images/{img_filename}'
+
+        if args.imgsrc == 'remote':
+            src = img.attrib.get("src")
+            if not src.startswith('http'):
+                img.attrib['src'] = 'https://hub.xpub.nl' + src
+        else:  # local / archive imgsrc
+            img_alt = img.attrib.get("alt")  # alt property has filename
+            img_page = f'File:{img_alt}' # find image it images.json
+            try:
+                # get its filename
+                img_filename = images_info[img_page]['filename']
+            except KeyError:
+                print(Colors.WARNING, f"{printout_dict['page']} is not is missing from the local downloaded images")
+                print(Colors.GREEN, 'run python3 download_imgs.py to fix the issue', Colors.ENDC)
+                sys.exit()
+            if args.imgsrc == 'local':
+                # 2 dirs above HTML files dir: archive/
+                img.attrib['src'] = f'../../images/{img_filename}'
+            if args.imgsrc == 'archive':
+                # same dir as HTML files: archive/
+                img.attrib['src'] = f'./images/{img_filename}'
+
+        img.attrib['srcset'] = ""  # rm srcset value:it prevent imgs displaying
+        img.attrib['width'] = ""
+        img.attrib['height'] = ""
     html = ET.tostring(t, method="html", encoding="unicode")
     return html
 

From 984a96156bf9648ba5c694ae8b484ff017b39154 Mon Sep 17 00:00:00 2001
From: Castro0o <andre@andrecastro>
Date: Wed, 8 Apr 2020 19:19:15 +0200
Subject: [PATCH 4/4] removed --local

---
 README.md        |  8 ++++----
 download_imgs.py |  2 +-
 dumpwiki.py      | 22 ++++++++--------------
 3 files changed, 13 insertions(+), 19 deletions(-)

diff --git a/README.md b/README.md
index 78cabea..34c024a 100644
--- a/README.md
+++ b/README.md
@@ -41,11 +41,11 @@ create archive folder: `mkdir archive`
 
 run script outputting to archive folder and **displaying the images from the wiki**:
 
-`python3 dumpwiki.py --output archive --local --imgsrc remote`
+`python3 dumpwiki.py --imgsrc remote`
 
-run script outputting to archive folder and **displaying the images from local ../images**:
+run script outputting to archive folder and **displaying the images from local ../archive/images**:
 * requires running `download_imgs.py`
-`python3 dumpwiki.py --output archive --local --imgsrc local`
+`python3 dumpwiki.py`
 
 
 
@@ -88,7 +88,7 @@ Run scripts together with `./run.sh`
 1 script at a time:
 
 `python3 download_imgs.py`
-* Downloads all images from wiki to `../images/` directory
+* Downloads all images from wiki to `../archive/images/` directory
 * and stores each image's metadata to `images.json`
 
 `python3 query2html.py`
diff --git a/download_imgs.py b/download_imgs.py
index 31bbc95..ddf997e 100644
--- a/download_imgs.py
+++ b/download_imgs.py
@@ -8,7 +8,7 @@ from functions import update_json, remove_nonwords
 site = Site(host='hub.xpub.nl/sandbox', path='/itchwiki/')
 wd = os.path.dirname(os.path.abspath(__file__))  # working directory
 parent_d = os.path.dirname(wd) # parent directory
-imgdir = os.path.join(parent_d, 'images')
+imgdir = os.path.join(parent_d, 'archive/images')
 os.makedirs(imgdir, exist_ok=True) # create images/ dir
 
 imgsjson_fn = os.path.join(wd, 'images.json')
diff --git a/dumpwiki.py b/dumpwiki.py
index 54c24bd..771d7ef 100644
--- a/dumpwiki.py
+++ b/dumpwiki.py
@@ -12,11 +12,10 @@ p = argparse.ArgumentParser(description="Dump wiki files to html",
                             formatter_class=argparse.ArgumentDefaultsHelpFormatter)
 p.add_argument("--host",  metavar='', default="hub.xpub.nl/sandbox", help='wiki host')
 p.add_argument("--path", metavar='', default="/itchwiki/", help="Wiki path. Should end with /")
-p.add_argument("--output", default="/var/www/html/archive", help="Output path for pages")
+p.add_argument("--output", default="../archive", help="Output path for pages")
 p.add_argument("--one", default=False, action="store_true", help="Output one page from each category only")
-p.add_argument("--local", default=False, action="store_true", help="When creating a local archives. Add full URL to images")
 p.add_argument("--imgsrc", default='archive',
-               choices=['archive', 'local', 'remote'],
+               choices=['archive', 'remote'],
                help="What is the source of the images?")
 
 args = p.parse_args()
@@ -25,7 +24,7 @@ print(args)
 
 site = Site(host=args.host, path=args.path)
 wd = os.path.dirname(os.path.abspath(__file__))  # working directory
-
+wd_name = os.path.split(wd)[-1] # name of dir running script
 with open('login.txt', 'r') as login:  # read login user & pwd
     loginlines = login.read()
     user, pwd = loginlines.split('\n')
@@ -91,12 +90,8 @@ def rewriteimgs(html):
                 print(Colors.WARNING, f"{printout_dict['page']} is not is missing from the local downloaded images")
                 print(Colors.GREEN, 'run python3 download_imgs.py to fix the issue', Colors.ENDC)
                 sys.exit()
-            if args.imgsrc == 'local':
-                # 2 dirs above HTML files dir: archive/
-                img.attrib['src'] = f'../../images/{img_filename}'
-            if args.imgsrc == 'archive':
-                # same dir as HTML files: archive/
-                img.attrib['src'] = f'./images/{img_filename}'
+            # same dir as HTML files: archive/
+            img.attrib['src'] = f'./images/{img_filename}'
 
         img.attrib['srcset'] = ""  # rm srcset value:it prevent imgs displaying
         img.attrib['width'] = ""
@@ -122,10 +117,9 @@ for cat in publish.members():
         htmlsrc = rewritelinks(htmlsrc)
         htmlsrc = rewriteimgs(htmlsrc)
 
-        if args.local is True:
-            html = template.render(page=p, body=htmlsrc, staticpath='..')
-        else:
-            html = template.render(page=p, body=htmlsrc, staticpath='0')
+        # TODO: ANdre structure of archive: from ./archive/0 to: ./archive ./0
+
+        html = template.render(page=p, body=htmlsrc, staticpath=f'../{wd_name}')
 
         with open(os.path.join(args.output, filenameforpage(p)), 'w') as f:
             f.write(html)