From 0b29abe5b7b74cea521d2ae263850c74d150319a Mon Sep 17 00:00:00 2001 From: Castro0o Date: Tue, 7 Apr 2020 17:44:06 +0200 Subject: [PATCH 01/13] replacing hyphen --- dumpwiki.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/dumpwiki.py b/dumpwiki.py index 80ede08..16f1635 100644 --- a/dumpwiki.py +++ b/dumpwiki.py @@ -34,6 +34,8 @@ with open('login.txt', 'r') as login: # read login user & pwd SLASH = "\u2044" +HYPHEN = "\u2010" + def filenameforpage(p): f = p.name.replace(' ','_').replace('/', SLASH) + '.html' @@ -43,7 +45,7 @@ def filenameforlink(href): href = urlunquote(href) if href.startswith("/sandbox/itchwiki/index.php/"): href = href[len("/sandbox/itchwiki/index.php/"):] - href = href.replace(' ','_').replace('/', SLASH) + '.html' + href = href.replace(' ','_').replace('/', SLASH).replace('‐', HYPHEN) + '.html' href = urlquote(href) return href From 25092cb9dd56ff96c249a193d5af83b874e58d87 Mon Sep 17 00:00:00 2001 From: ioanatomici Date: Tue, 7 Apr 2020 17:53:38 +0200 Subject: [PATCH 02/13] no encoding error --- dumpwiki.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dumpwiki.py b/dumpwiki.py index 16f1635..beabf5f 100644 --- a/dumpwiki.py +++ b/dumpwiki.py @@ -34,7 +34,6 @@ with open('login.txt', 'r') as login: # read login user & pwd SLASH = "\u2044" -HYPHEN = "\u2010" def filenameforpage(p): @@ -45,7 +44,7 @@ def filenameforlink(href): href = urlunquote(href) if href.startswith("/sandbox/itchwiki/index.php/"): href = href[len("/sandbox/itchwiki/index.php/"):] - href = href.replace(' ','_').replace('/', SLASH).replace('‐', HYPHEN) + '.html' + href = href.replace(' ','_').replace('/', SLASH) + '.html' href = urlquote(href) return href @@ -102,7 +101,8 @@ for cat in publish.members(): html = template.render(page=p, body=htmlsrc, staticpath='0') with open(os.path.join(args.output, filenameforpage(p)), 'w') as f: - print(html, file=f) + pass +# print(html, file=f) if args.one: break From c0ad1dd62167472ac5977773aaf296076d9cb2c4 Mon Sep 17 00:00:00 2001 From: Castro0o Date: Tue, 7 Apr 2020 18:08:37 +0200 Subject: [PATCH 03/13] writing html content to file --- dumpwiki.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/dumpwiki.py b/dumpwiki.py index beabf5f..4152757 100644 --- a/dumpwiki.py +++ b/dumpwiki.py @@ -34,6 +34,7 @@ with open('login.txt', 'r') as login: # read login user & pwd SLASH = "\u2044" +HYPHEN = "\u2010" def filenameforpage(p): @@ -44,7 +45,7 @@ def filenameforlink(href): href = urlunquote(href) if href.startswith("/sandbox/itchwiki/index.php/"): href = href[len("/sandbox/itchwiki/index.php/"):] - href = href.replace(' ','_').replace('/', SLASH) + '.html' + href = href.replace(' ','_').replace('/', SLASH).replace('‐', HYPHEN) + '.html' href = urlquote(href) return href @@ -101,8 +102,8 @@ for cat in publish.members(): html = template.render(page=p, body=htmlsrc, staticpath='0') with open(os.path.join(args.output, filenameforpage(p)), 'w') as f: - pass -# print(html, file=f) + f.write(html) + # print(html, file=f) if args.one: break From c2cb9d1de3cf5d2d5cd805236713cfd2302a3f6e Mon Sep 17 00:00:00 2001 From: ioanatomici Date: Tue, 7 Apr 2020 18:35:37 +0200 Subject: [PATCH 04/13] timeline.css --- static/timeline.css | 94 +++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 91 insertions(+), 3 deletions(-) diff --git a/static/timeline.css b/static/timeline.css index db8c68d..65ceedf 100644 --- a/static/timeline.css +++ b/static/timeline.css @@ -1,3 +1,91 @@ -body { background: red; - color: white; -} \ No newline at end of file +.mw-parser-output{ + position: left top; + display: hidden; +} + + +.content { + border-style: solid 1px black; + transform: rotate(90deg); + transform-origin: right top; + background-color: red; + color: #444; + cursor: pointer; + padding: 5px; + position: fixed; + font-size: 15px; + z-index: +1; + font-family: Times New Roman; +} + +.horizontal-scroll-wrapper { + display: inline-table; + margin-top: 10vw; + width: 20vw; + height: 80vw; + overflow-y: auto; + overflow-x: hidden; + transform: rotate(-90deg) /*translateY(-100px)*/; + transform-origin: right top; + +} + +.event { + text-align-last: auto; + height: 10vw; + transform: rotate(90deg); + transform-origin: right top; + font-family: Arial; + font-size: 10px; + padding-top: 5vw; + padding-left: 5vw; +} + +.thumbborder { +/* display: none;*/ + width: 20%; + height: auto; +} + +.thumbborder:hover { + width:100%; + transition:0.5s; + height: auto; +} + +.bar-chart { +display: table; +margin-top: 20px; +} + +.row { +display: table-row; +} + +.row div { +display: table-cell; +width: 60px; +height: 44px; +border-right: 2px solid rgb(255, 255, 255); +} + +.row .axis-y { +width: 96px; +border-right: 1px solid #000; +vertical-align: top; +} + +.axis-x div { +border-top: 1px solid #000; +} + +.axis-x .axis-y { +border: none; +} + +.axis-x div, .axis-y { +text-align: center; +font-weight: bold; +} + + From 63422103a29dd4faace5de7f0ca227e1349adc87 Mon Sep 17 00:00:00 2001 From: Castro0o Date: Wed, 8 Apr 2020 09:24:05 +0200 Subject: [PATCH 05/13] download_imgs.py saving images to images/ in partent directory --- download_imgs.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/download_imgs.py b/download_imgs.py index a8af106..31bbc95 100644 --- a/download_imgs.py +++ b/download_imgs.py @@ -6,9 +6,9 @@ from functions import update_json, remove_nonwords site = Site(host='hub.xpub.nl/sandbox', path='/itchwiki/') -wd = os.path.dirname(os.path.abspath(__file__)) # working directory - -imgdir = os.path.join(wd, 'images') +wd = os.path.dirname(os.path.abspath(__file__)) # working directory +parent_d = os.path.dirname(wd) # parent directory +imgdir = os.path.join(parent_d, 'images') os.makedirs(imgdir, exist_ok=True) # create images/ dir imgsjson_fn = os.path.join(wd, 'images.json') From 5de30ed9caa090858e6da325e7fe6f2e10922cbf Mon Sep 17 00:00:00 2001 From: Castro0o Date: Wed, 8 Apr 2020 09:26:02 +0200 Subject: [PATCH 06/13] readme edit --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index a3d3bdc..f5df694 100644 --- a/README.md +++ b/README.md @@ -83,7 +83,7 @@ Run scripts together with `./run.sh` 1 script at a time: `python3 download_imgs.py` -* Downloads all images from wiki to `images/` directory +* Downloads all images from wiki to `../images/` directory * and stores each image's metadata to `images.json` `python3 query2html.py` From bf66a2d572890ea1e539b9542ba4ce6139628467 Mon Sep 17 00:00:00 2001 From: Castro0o Date: Wed, 8 Apr 2020 10:36:59 +0200 Subject: [PATCH 07/13] images: remote, local, archoive --- README.md | 9 +++++++-- dumpwiki.py | 54 +++++++++++++++++++++++++++++++++++++++-------------- 2 files changed, 47 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index f5df694..78cabea 100644 --- a/README.md +++ b/README.md @@ -39,9 +39,14 @@ mypassword ### locally on your own machine: create archive folder: `mkdir archive` -run script outputting to archive folder and displaying the images from the wiki: +run script outputting to archive folder and **displaying the images from the wiki**: + +`python3 dumpwiki.py --output archive --local --imgsrc remote` + +run script outputting to archive folder and **displaying the images from local ../images**: +* requires running `download_imgs.py` +`python3 dumpwiki.py --output archive --local --imgsrc local` -`python3 dumpwiki.py --output archive --local` ### Categories and Templates: diff --git a/dumpwiki.py b/dumpwiki.py index 4152757..54c24bd 100644 --- a/dumpwiki.py +++ b/dumpwiki.py @@ -1,8 +1,6 @@ -import os, json, sys, urllib +import os, json, sys from mwclient import Site -from pprint import pprint from jinja2 import Template -from functions import unpack_response, clean_dir, remove_nonwords import html5lib from functions import Colors import argparse @@ -17,20 +15,25 @@ p.add_argument("--path", metavar='', default="/itchwiki/", help="Wiki path. Shou p.add_argument("--output", default="/var/www/html/archive", help="Output path for pages") p.add_argument("--one", default=False, action="store_true", help="Output one page from each category only") p.add_argument("--local", default=False, action="store_true", help="When creating a local archives. Add full URL to images") +p.add_argument("--imgsrc", default='archive', + choices=['archive', 'local', 'remote'], + help="What is the source of the images?") args = p.parse_args() print(args) # site and login site = Site(host=args.host, path=args.path) +wd = os.path.dirname(os.path.abspath(__file__)) # working directory with open('login.txt', 'r') as login: # read login user & pwd loginlines = login.read() user, pwd = loginlines.split('\n') site.login(username=user, password=pwd) # login to wiki -# read template files - +imgsjson_fn = os.path.join(wd, 'images.json') # read images.json file +with open(imgsjson_fn, 'r') as imgsjson_file: + images_info = json.load(imgsjson_file) SLASH = "\u2044" @@ -61,20 +64,43 @@ def rewritelinks (html): if href.startswith("/sandbox/itchwiki/index.php/"): new_href = filenameforlink(href) a.attrib['href'] = new_href - if args.local is True: - for img in t.findall(".//img[@src]"): - src = img.attrib.get("src") - if not src.startswith('http'): - img.attrib['src'] = 'https://hub.xpub.nl' + src - html = ET.tostring(t, method="html", encoding="unicode") return html def rewriteimgs(html): t = html5lib.parseFragment(html, treebuilder = "etree", namespaceHTMLElements = False) - # remove the srcset value as it prevents images from displaying - for img in t.findall(".//img[@srcset]"): - img.attrib['srcset'] = "" + + # replace images url with local image in ../images + for img in t.findall(".//img[@src]"): + # imgsrc can be: + # remote: url remains + # archive f' images/{img_filename}' + # local: f'../../images/{img_filename}' + + if args.imgsrc == 'remote': + src = img.attrib.get("src") + if not src.startswith('http'): + img.attrib['src'] = 'https://hub.xpub.nl' + src + else: # local / archive imgsrc + img_alt = img.attrib.get("alt") # alt property has filename + img_page = f'File:{img_alt}' # find image it images.json + try: + # get its filename + img_filename = images_info[img_page]['filename'] + except KeyError: + print(Colors.WARNING, f"{printout_dict['page']} is not is missing from the local downloaded images") + print(Colors.GREEN, 'run python3 download_imgs.py to fix the issue', Colors.ENDC) + sys.exit() + if args.imgsrc == 'local': + # 2 dirs above HTML files dir: archive/ + img.attrib['src'] = f'../../images/{img_filename}' + if args.imgsrc == 'archive': + # same dir as HTML files: archive/ + img.attrib['src'] = f'./images/{img_filename}' + + img.attrib['srcset'] = "" # rm srcset value:it prevent imgs displaying + img.attrib['width'] = "" + img.attrib['height'] = "" html = ET.tostring(t, method="html", encoding="unicode") return html From 984a96156bf9648ba5c694ae8b484ff017b39154 Mon Sep 17 00:00:00 2001 From: Castro0o Date: Wed, 8 Apr 2020 19:19:15 +0200 Subject: [PATCH 08/13] removed --local --- README.md | 8 ++++---- download_imgs.py | 2 +- dumpwiki.py | 22 ++++++++-------------- 3 files changed, 13 insertions(+), 19 deletions(-) diff --git a/README.md b/README.md index 78cabea..34c024a 100644 --- a/README.md +++ b/README.md @@ -41,11 +41,11 @@ create archive folder: `mkdir archive` run script outputting to archive folder and **displaying the images from the wiki**: -`python3 dumpwiki.py --output archive --local --imgsrc remote` +`python3 dumpwiki.py --imgsrc remote` -run script outputting to archive folder and **displaying the images from local ../images**: +run script outputting to archive folder and **displaying the images from local ../archive/images**: * requires running `download_imgs.py` -`python3 dumpwiki.py --output archive --local --imgsrc local` +`python3 dumpwiki.py` @@ -88,7 +88,7 @@ Run scripts together with `./run.sh` 1 script at a time: `python3 download_imgs.py` -* Downloads all images from wiki to `../images/` directory +* Downloads all images from wiki to `../archive/images/` directory * and stores each image's metadata to `images.json` `python3 query2html.py` diff --git a/download_imgs.py b/download_imgs.py index 31bbc95..ddf997e 100644 --- a/download_imgs.py +++ b/download_imgs.py @@ -8,7 +8,7 @@ from functions import update_json, remove_nonwords site = Site(host='hub.xpub.nl/sandbox', path='/itchwiki/') wd = os.path.dirname(os.path.abspath(__file__)) # working directory parent_d = os.path.dirname(wd) # parent directory -imgdir = os.path.join(parent_d, 'images') +imgdir = os.path.join(parent_d, 'archive/images') os.makedirs(imgdir, exist_ok=True) # create images/ dir imgsjson_fn = os.path.join(wd, 'images.json') diff --git a/dumpwiki.py b/dumpwiki.py index 54c24bd..771d7ef 100644 --- a/dumpwiki.py +++ b/dumpwiki.py @@ -12,11 +12,10 @@ p = argparse.ArgumentParser(description="Dump wiki files to html", formatter_class=argparse.ArgumentDefaultsHelpFormatter) p.add_argument("--host", metavar='', default="hub.xpub.nl/sandbox", help='wiki host') p.add_argument("--path", metavar='', default="/itchwiki/", help="Wiki path. Should end with /") -p.add_argument("--output", default="/var/www/html/archive", help="Output path for pages") +p.add_argument("--output", default="../archive", help="Output path for pages") p.add_argument("--one", default=False, action="store_true", help="Output one page from each category only") -p.add_argument("--local", default=False, action="store_true", help="When creating a local archives. Add full URL to images") p.add_argument("--imgsrc", default='archive', - choices=['archive', 'local', 'remote'], + choices=['archive', 'remote'], help="What is the source of the images?") args = p.parse_args() @@ -25,7 +24,7 @@ print(args) site = Site(host=args.host, path=args.path) wd = os.path.dirname(os.path.abspath(__file__)) # working directory - +wd_name = os.path.split(wd)[-1] # name of dir running script with open('login.txt', 'r') as login: # read login user & pwd loginlines = login.read() user, pwd = loginlines.split('\n') @@ -91,12 +90,8 @@ def rewriteimgs(html): print(Colors.WARNING, f"{printout_dict['page']} is not is missing from the local downloaded images") print(Colors.GREEN, 'run python3 download_imgs.py to fix the issue', Colors.ENDC) sys.exit() - if args.imgsrc == 'local': - # 2 dirs above HTML files dir: archive/ - img.attrib['src'] = f'../../images/{img_filename}' - if args.imgsrc == 'archive': - # same dir as HTML files: archive/ - img.attrib['src'] = f'./images/{img_filename}' + # same dir as HTML files: archive/ + img.attrib['src'] = f'./images/{img_filename}' img.attrib['srcset'] = "" # rm srcset value:it prevent imgs displaying img.attrib['width'] = "" @@ -122,10 +117,9 @@ for cat in publish.members(): htmlsrc = rewritelinks(htmlsrc) htmlsrc = rewriteimgs(htmlsrc) - if args.local is True: - html = template.render(page=p, body=htmlsrc, staticpath='..') - else: - html = template.render(page=p, body=htmlsrc, staticpath='0') + # TODO: ANdre structure of archive: from ./archive/0 to: ./archive ./0 + + html = template.render(page=p, body=htmlsrc, staticpath=f'../{wd_name}') with open(os.path.join(args.output, filenameforpage(p)), 'w') as f: f.write(html) From fa837a379100e6496482d5784b53aedfcfb1ff38 Mon Sep 17 00:00:00 2001 From: Michael Murtaugh Date: Thu, 9 Apr 2020 11:07:25 +0200 Subject: [PATCH 09/13] added back missing ET.tostring in rewritelinks --- dumpwiki.py | 1 + 1 file changed, 1 insertion(+) diff --git a/dumpwiki.py b/dumpwiki.py index 771d7ef..94bf171 100644 --- a/dumpwiki.py +++ b/dumpwiki.py @@ -63,6 +63,7 @@ def rewritelinks (html): if href.startswith("/sandbox/itchwiki/index.php/"): new_href = filenameforlink(href) a.attrib['href'] = new_href + html = ET.tostring(t, method="html", encoding="unicode") return html From 27cc21a58a5c17be1b639288862fe83298f76906 Mon Sep 17 00:00:00 2001 From: Castro0o Date: Thu, 9 Apr 2020 16:27:41 +0200 Subject: [PATCH 10/13] disabeling links to wiki File pages --- dumpwiki.py | 8 ++++++++ static/archive.css | 4 +++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/dumpwiki.py b/dumpwiki.py index 94bf171..7324d8e 100644 --- a/dumpwiki.py +++ b/dumpwiki.py @@ -53,6 +53,14 @@ def filenameforlink(href): def rewritelinks (html): t = html5lib.parseFragment(html, treebuilder = "etree", namespaceHTMLElements = False) + + # remove links to wiki File: pages + for a in t.findall(".//a[@class='image']"): # select img wrapping a + href = a.attrib.get('href') + if a.findall(".//img") and 'File:' in href: # ensure a has child: img + a.attrib['href'] = 'javascript:void(0);' # disable href + print('a wrapping img:', ET.tostring(a)) + for a in t.findall(".//*[@href]"): linkclass = a.attrib.get("class", "") href = a.attrib.get("href") diff --git a/static/archive.css b/static/archive.css index 9229388..fd416eb 100644 --- a/static/archive.css +++ b/static/archive.css @@ -11,4 +11,6 @@ a, a:visited{ a:hover { text-decoration: none; color: blue; -} \ No newline at end of file +} + +a.image {cursor: default!important;} /* KEEP THIS: it is important to avoid images to seeming like links */ \ No newline at end of file From 245b52c6285befb326af143e4386d28e614ebdb6 Mon Sep 17 00:00:00 2001 From: Castro0o Date: Thu, 9 Apr 2020 16:29:09 +0200 Subject: [PATCH 11/13] removing unecessary hyphen replacement (which i introduced) --- dumpwiki.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/dumpwiki.py b/dumpwiki.py index 7324d8e..d28893d 100644 --- a/dumpwiki.py +++ b/dumpwiki.py @@ -36,7 +36,6 @@ with open(imgsjson_fn, 'r') as imgsjson_file: SLASH = "\u2044" -HYPHEN = "\u2010" def filenameforpage(p): @@ -47,7 +46,7 @@ def filenameforlink(href): href = urlunquote(href) if href.startswith("/sandbox/itchwiki/index.php/"): href = href[len("/sandbox/itchwiki/index.php/"):] - href = href.replace(' ','_').replace('/', SLASH).replace('‐', HYPHEN) + '.html' + href = href.replace(' ','_').replace('/', SLASH) + '.html' href = urlquote(href) return href From fc1562bbd10dfb351077d2d14a6e62c7637fc11f Mon Sep 17 00:00:00 2001 From: Castro0o Date: Thu, 9 Apr 2020 17:39:45 +0200 Subject: [PATCH 12/13] timeline css: horizontal divs --- static/timeline.css | 36 +++++++++++++++++++----------------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/static/timeline.css b/static/timeline.css index 65ceedf..de18c58 100644 --- a/static/timeline.css +++ b/static/timeline.css @@ -1,13 +1,20 @@ +body{ width: max-content;} + +div#body{ width: max-content;} + .mw-parser-output{ - position: left top; - display: hidden; + /*! position: left top; */ + /*! display: hidden; */ + /*! display: inline; */ + /*! vertical-align: top; */ } +.mw-parser-output > p{ +display: inline-block; +} .content { border-style: solid 1px black; - transform: rotate(90deg); - transform-origin: right top; background-color: red; color: #444; cursor: pointer; @@ -24,26 +31,23 @@ width: 20vw; height: 80vw; overflow-y: auto; - overflow-x: hidden; - transform: rotate(-90deg) /*translateY(-100px)*/; - transform-origin: right top; - + overflow-x: hidden; } .event { text-align-last: auto; - height: 10vw; - transform: rotate(90deg); - transform-origin: right top; font-family: Arial; font-size: 10px; - padding-top: 5vw; - padding-left: 5vw; +/* padding-top: 5vw; */ + padding-left: 5vw; + display: inline-block; + width:400px; + vertical-align: top; } .thumbborder { /* display: none;*/ - width: 20%; + width: 30%; height: auto; } @@ -86,6 +90,4 @@ border: none; .axis-x div, .axis-y { text-align: center; font-weight: bold; -} - - +} \ No newline at end of file From 64d738bcded417a2a171524134623e95f27b73d5 Mon Sep 17 00:00:00 2001 From: Castro0o Date: Thu, 9 Apr 2020 18:03:16 +0200 Subject: [PATCH 13/13] removed print --- dumpwiki.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/dumpwiki.py b/dumpwiki.py index d28893d..1f883ed 100644 --- a/dumpwiki.py +++ b/dumpwiki.py @@ -55,10 +55,9 @@ def rewritelinks (html): # remove links to wiki File: pages for a in t.findall(".//a[@class='image']"): # select img wrapping a - href = a.attrib.get('href') + href = a.attrib.get('href') if a.findall(".//img") and 'File:' in href: # ensure a has child: img a.attrib['href'] = 'javascript:void(0);' # disable href - print('a wrapping img:', ET.tostring(a)) for a in t.findall(".//*[@href]"): linkclass = a.attrib.get("class", "")