Merge branch 'master' of https://git.xpub.nl/XPUB/special-issue-11-wiki2html

5 years ago · b0d77bf931
parent 82c48e12d4 64d738bcde
commit b0d77bf931
5 changed files with 157 additions and 30 deletions
--- a/README.md
+++ b/README.md
@ -39,9 +39,14 @@ mypassword
 ### locally on your own machine:
 create archive folder: `mkdir archive`

-run script outputting to archive folder and displaying the images from the wiki:
+run script outputting to archive folder and **displaying the images from the wiki**:
+
+`python3 dumpwiki.py --imgsrc remote`
+
+run script outputting to archive folder and **displaying the images from local ../archive/images**:
+* requires running `download_imgs.py`
+`python3 dumpwiki.py`

-`python3 dumpwiki.py --output archive --local`


 ### Categories and Templates:
@ -83,7 +88,7 @@ Run scripts together with `./run.sh`
 1 script at a time:

 `python3 download_imgs.py`
-* Downloads all images from wiki to `images/` directory
+* Downloads all images from wiki to `../archive/images/` directory
 * and stores each image's metadata to `images.json`

 `python3 query2html.py`
--- a/download_imgs.py
+++ b/download_imgs.py
@ -6,9 +6,9 @@ from functions import update_json, remove_nonwords


 site = Site(host='hub.xpub.nl/sandbox', path='/itchwiki/')
-wd = os.path.dirname(os.path.abspath(__file__)) # working directory
-
-imgdir = os.path.join(wd, 'images')
+wd = os.path.dirname(os.path.abspath(__file__))  # working directory
+parent_d = os.path.dirname(wd) # parent directory
+imgdir = os.path.join(parent_d, 'archive/images')
 os.makedirs(imgdir, exist_ok=True) # create images/ dir

 imgsjson_fn = os.path.join(wd, 'images.json')
--- a/dumpwiki.py
+++ b/dumpwiki.py
@ -1,8 +1,6 @@
-import os, json, sys, urllib
+import os, json, sys
 from mwclient import Site
-from pprint import pprint
 from jinja2 import Template
-from functions import unpack_response, clean_dir, remove_nonwords
 import html5lib
 from functions import Colors
 import argparse
@ -14,27 +12,32 @@ p = argparse.ArgumentParser(description="Dump wiki files to html",
                            formatter_class=argparse.ArgumentDefaultsHelpFormatter)
 p.add_argument("--host",  metavar='', default="hub.xpub.nl/sandbox", help='wiki host')
 p.add_argument("--path", metavar='', default="/itchwiki/", help="Wiki path. Should end with /")
-p.add_argument("--output", default="/var/www/html/archive", help="Output path for pages")
+p.add_argument("--output", default="../archive", help="Output path for pages")
 p.add_argument("--one", default=False, action="store_true", help="Output one page from each category only")
-p.add_argument("--local", default=False, action="store_true", help="When creating a local archives. Add full URL to images")
+p.add_argument("--imgsrc", default='archive',
+               choices=['archive', 'remote'],
+               help="What is the source of the images?")

 args = p.parse_args()
 print(args)
 # site and login

 site = Site(host=args.host, path=args.path)
-
+wd = os.path.dirname(os.path.abspath(__file__))  # working directory
+wd_name = os.path.split(wd)[-1] # name of dir running script
 with open('login.txt', 'r') as login:  # read login user & pwd
    loginlines = login.read()
    user, pwd = loginlines.split('\n')
    site.login(username=user, password=pwd)  # login to wiki

-# read template files
-
+imgsjson_fn = os.path.join(wd, 'images.json') # read images.json file
+with open(imgsjson_fn, 'r') as imgsjson_file:
+    images_info = json.load(imgsjson_file)


 SLASH = "\u2044"

+
 def filenameforpage(p):
    f = p.name.replace(' ','_').replace('/', SLASH) + '.html'
    return f
@ -49,6 +52,13 @@ def filenameforlink(href):

 def rewritelinks (html):
    t = html5lib.parseFragment(html, treebuilder = "etree", namespaceHTMLElements = False)
+
+    # remove links to wiki File: pages
+    for a in t.findall(".//a[@class='image']"):  # select img wrapping a
+        href = a.attrib.get('href')
+        if a.findall(".//img") and 'File:' in href:  # ensure a has child: img
+            a.attrib['href'] = 'javascript:void(0);'  # disable href
+
    for a in t.findall(".//*[@href]"):
        linkclass = a.attrib.get("class", "")
        href = a.attrib.get("href")
@ -59,20 +69,40 @@ def rewritelinks (html):
        if href.startswith("/sandbox/itchwiki/index.php/"):
            new_href = filenameforlink(href)
            a.attrib['href'] = new_href
-    if args.local is True:
-        for img in t.findall(".//img[@src]"):
-            src = img.attrib.get("src")
-            if not src.startswith('http'):
-                img.attrib['src'] = 'https://hub.xpub.nl' + src
    html = ET.tostring(t, method="html", encoding="unicode")
    return html


 def rewriteimgs(html):
    t = html5lib.parseFragment(html, treebuilder = "etree", namespaceHTMLElements = False)
-    # remove the srcset value as it prevents images from displaying
-    for img in t.findall(".//img[@srcset]"):
-        img.attrib['srcset'] = ""
+
+    # replace images url with local image in ../images
+    for img in t.findall(".//img[@src]"):
+        # imgsrc can be:
+            # remote: url remains
+            # archive f' images/{img_filename}'
+            # local: f'../../images/{img_filename}'
+
+        if args.imgsrc == 'remote':
+            src = img.attrib.get("src")
+            if not src.startswith('http'):
+                img.attrib['src'] = 'https://hub.xpub.nl' + src
+        else:  # local / archive imgsrc
+            img_alt = img.attrib.get("alt")  # alt property has filename
+            img_page = f'File:{img_alt}' # find image it images.json
+            try:
+                # get its filename
+                img_filename = images_info[img_page]['filename']
+            except KeyError:
+                print(Colors.WARNING, f"{printout_dict['page']} is not is missing from the local downloaded images")
+                print(Colors.GREEN, 'run python3 download_imgs.py to fix the issue', Colors.ENDC)
+                sys.exit()
+            # same dir as HTML files: archive/
+            img.attrib['src'] = f'./images/{img_filename}'
+
+        img.attrib['srcset'] = ""  # rm srcset value:it prevent imgs displaying
+        img.attrib['width'] = ""
+        img.attrib['height'] = ""
    html = ET.tostring(t, method="html", encoding="unicode")
    return html

@ -94,13 +124,13 @@ for cat in publish.members():
        htmlsrc = rewritelinks(htmlsrc)
        htmlsrc = rewriteimgs(htmlsrc)

-        if args.local is True:
-            html = template.render(page=p, body=htmlsrc, staticpath='..')
-        else:
-            html = template.render(page=p, body=htmlsrc, staticpath='0')
+        # TODO: ANdre structure of archive: from ./archive/0 to: ./archive ./0
+
+        html = template.render(page=p, body=htmlsrc, staticpath=f'../{wd_name}')

        with open(os.path.join(args.output, filenameforpage(p)), 'w') as f:
-            print(html, file=f)
+            f.write(html)
+            # print(html, file=f)
        if args.one:
            break

--- a/static/archive.css
+++ b/static/archive.css
@ -11,4 +11,6 @@ a, a:visited{
 a:hover {
 	text-decoration: none;
  	color: blue;
-}
+}
+
+a.image {cursor: default!important;} /* KEEP THIS: it is important to avoid images to seeming like links */
--- a/static/timeline.css
+++ b/static/timeline.css
@ -1,3 +1,93 @@
-body { background: red;
-       color: white;
+body{ width: max-content;}
+
+div#body{ width: max-content;}
+
+.mw-parser-output{
+	/*! position: left top; */
+	/*! display: hidden; */
+	/*! display: inline; */
+	/*! vertical-align: top; */
+}
+
+.mw-parser-output > p{
+display: inline-block;
+}
+
+.content {
+	border-style: solid 1px black;
+	background-color: red;
+	color: #444;
+	cursor: pointer;
+	padding: 5px;
+	position: fixed;
+	font-size: 15px;
+    z-index: +1;
+    font-family: Times New Roman;
+}
+
+.horizontal-scroll-wrapper {
+	display: inline-table;
+	margin-top: 10vw;
+	width: 20vw;
+	height: 80vw;
+	overflow-y: auto;
+	overflow-x: hidden;
+}
+
+.event {
+	text-align-last: auto;
+	font-family: Arial;
+	font-size: 10px;
+/* 	padding-top: 5vw; */
+	 padding-left: 5vw;
+	display: inline-block;
+	width:400px;
+	vertical-align: top;
+}
+
+.thumbborder {
+/*	display: none;*/
+	width: 30%;
+	height: auto;
+}
+
+.thumbborder:hover {
+	width:100%; 
+	transition:0.5s;
+	height: auto;
+}
+
+.bar-chart {
+display: table;
+margin-top: 20px;
+}
+
+.row {
+display: table-row;
+}
+
+.row div {
+display: table-cell;
+width: 60px;
+height: 44px;
+border-right: 2px solid rgb(255, 255, 255);
+}
+
+.row .axis-y {
+width: 96px;
+border-right: 1px solid #000;
+vertical-align: top;
+}
+
+.axis-x div {
+border-top: 1px solid #000;
+}
+
+.axis-x .axis-y {
+border: none;
+}
+
+.axis-x div, .axis-y {
+text-align: center;
+font-weight: bold;
 }