Your Name 5 years ago
commit b0d77bf931

@ -39,9 +39,14 @@ mypassword
### locally on your own machine:
create archive folder: `mkdir archive`
run script outputting to archive folder and displaying the images from the wiki:
run script outputting to archive folder and **displaying the images from the wiki**:
`python3 dumpwiki.py --imgsrc remote`
run script outputting to archive folder and **displaying the images from local ../archive/images**:
* requires running `download_imgs.py`
`python3 dumpwiki.py`
`python3 dumpwiki.py --output archive --local`
### Categories and Templates:
@ -83,7 +88,7 @@ Run scripts together with `./run.sh`
1 script at a time:
`python3 download_imgs.py`
* Downloads all images from wiki to `images/` directory
* Downloads all images from wiki to `../archive/images/` directory
* and stores each image's metadata to `images.json`
`python3 query2html.py`

@ -7,8 +7,8 @@ from functions import update_json, remove_nonwords
site = Site(host='hub.xpub.nl/sandbox', path='/itchwiki/')
wd = os.path.dirname(os.path.abspath(__file__)) # working directory
imgdir = os.path.join(wd, 'images')
parent_d = os.path.dirname(wd) # parent directory
imgdir = os.path.join(parent_d, 'archive/images')
os.makedirs(imgdir, exist_ok=True) # create images/ dir
imgsjson_fn = os.path.join(wd, 'images.json')

@ -1,8 +1,6 @@
import os, json, sys, urllib
import os, json, sys
from mwclient import Site
from pprint import pprint
from jinja2 import Template
from functions import unpack_response, clean_dir, remove_nonwords
import html5lib
from functions import Colors
import argparse
@ -14,27 +12,32 @@ p = argparse.ArgumentParser(description="Dump wiki files to html",
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
p.add_argument("--host", metavar='', default="hub.xpub.nl/sandbox", help='wiki host')
p.add_argument("--path", metavar='', default="/itchwiki/", help="Wiki path. Should end with /")
p.add_argument("--output", default="/var/www/html/archive", help="Output path for pages")
p.add_argument("--output", default="../archive", help="Output path for pages")
p.add_argument("--one", default=False, action="store_true", help="Output one page from each category only")
p.add_argument("--local", default=False, action="store_true", help="When creating a local archives. Add full URL to images")
p.add_argument("--imgsrc", default='archive',
choices=['archive', 'remote'],
help="What is the source of the images?")
args = p.parse_args()
print(args)
# site and login
site = Site(host=args.host, path=args.path)
wd = os.path.dirname(os.path.abspath(__file__)) # working directory
wd_name = os.path.split(wd)[-1] # name of dir running script
with open('login.txt', 'r') as login: # read login user & pwd
loginlines = login.read()
user, pwd = loginlines.split('\n')
site.login(username=user, password=pwd) # login to wiki
# read template files
imgsjson_fn = os.path.join(wd, 'images.json') # read images.json file
with open(imgsjson_fn, 'r') as imgsjson_file:
images_info = json.load(imgsjson_file)
SLASH = "\u2044"
def filenameforpage(p):
f = p.name.replace(' ','_').replace('/', SLASH) + '.html'
return f
@ -49,6 +52,13 @@ def filenameforlink(href):
def rewritelinks (html):
t = html5lib.parseFragment(html, treebuilder = "etree", namespaceHTMLElements = False)
# remove links to wiki File: pages
for a in t.findall(".//a[@class='image']"): # select img wrapping a
href = a.attrib.get('href')
if a.findall(".//img") and 'File:' in href: # ensure a has child: img
a.attrib['href'] = 'javascript:void(0);' # disable href
for a in t.findall(".//*[@href]"):
linkclass = a.attrib.get("class", "")
href = a.attrib.get("href")
@ -59,20 +69,40 @@ def rewritelinks (html):
if href.startswith("/sandbox/itchwiki/index.php/"):
new_href = filenameforlink(href)
a.attrib['href'] = new_href
if args.local is True:
for img in t.findall(".//img[@src]"):
src = img.attrib.get("src")
if not src.startswith('http'):
img.attrib['src'] = 'https://hub.xpub.nl' + src
html = ET.tostring(t, method="html", encoding="unicode")
return html
def rewriteimgs(html):
t = html5lib.parseFragment(html, treebuilder = "etree", namespaceHTMLElements = False)
# remove the srcset value as it prevents images from displaying
for img in t.findall(".//img[@srcset]"):
img.attrib['srcset'] = ""
# replace images url with local image in ../images
for img in t.findall(".//img[@src]"):
# imgsrc can be:
# remote: url remains
# archive f' images/{img_filename}'
# local: f'../../images/{img_filename}'
if args.imgsrc == 'remote':
src = img.attrib.get("src")
if not src.startswith('http'):
img.attrib['src'] = 'https://hub.xpub.nl' + src
else: # local / archive imgsrc
img_alt = img.attrib.get("alt") # alt property has filename
img_page = f'File:{img_alt}' # find image it images.json
try:
# get its filename
img_filename = images_info[img_page]['filename']
except KeyError:
print(Colors.WARNING, f"{printout_dict['page']} is not is missing from the local downloaded images")
print(Colors.GREEN, 'run python3 download_imgs.py to fix the issue', Colors.ENDC)
sys.exit()
# same dir as HTML files: archive/
img.attrib['src'] = f'./images/{img_filename}'
img.attrib['srcset'] = "" # rm srcset value:it prevent imgs displaying
img.attrib['width'] = ""
img.attrib['height'] = ""
html = ET.tostring(t, method="html", encoding="unicode")
return html
@ -94,13 +124,13 @@ for cat in publish.members():
htmlsrc = rewritelinks(htmlsrc)
htmlsrc = rewriteimgs(htmlsrc)
if args.local is True:
html = template.render(page=p, body=htmlsrc, staticpath='..')
else:
html = template.render(page=p, body=htmlsrc, staticpath='0')
# TODO: ANdre structure of archive: from ./archive/0 to: ./archive ./0
html = template.render(page=p, body=htmlsrc, staticpath=f'../{wd_name}')
with open(os.path.join(args.output, filenameforpage(p)), 'w') as f:
print(html, file=f)
f.write(html)
# print(html, file=f)
if args.one:
break

@ -12,3 +12,5 @@ a:hover {
text-decoration: none;
color: blue;
}
a.image {cursor: default!important;} /* KEEP THIS: it is important to avoid images to seeming like links */

@ -1,3 +1,93 @@
body { background: red;
color: white;
body{ width: max-content;}
div#body{ width: max-content;}
.mw-parser-output{
/*! position: left top; */
/*! display: hidden; */
/*! display: inline; */
/*! vertical-align: top; */
}
.mw-parser-output > p{
display: inline-block;
}
.content {
border-style: solid 1px black;
background-color: red;
color: #444;
cursor: pointer;
padding: 5px;
position: fixed;
font-size: 15px;
z-index: +1;
font-family: Times New Roman;
}
.horizontal-scroll-wrapper {
display: inline-table;
margin-top: 10vw;
width: 20vw;
height: 80vw;
overflow-y: auto;
overflow-x: hidden;
}
.event {
text-align-last: auto;
font-family: Arial;
font-size: 10px;
/* padding-top: 5vw; */
padding-left: 5vw;
display: inline-block;
width:400px;
vertical-align: top;
}
.thumbborder {
/* display: none;*/
width: 30%;
height: auto;
}
.thumbborder:hover {
width:100%;
transition:0.5s;
height: auto;
}
.bar-chart {
display: table;
margin-top: 20px;
}
.row {
display: table-row;
}
.row div {
display: table-cell;
width: 60px;
height: 44px;
border-right: 2px solid rgb(255, 255, 255);
}
.row .axis-y {
width: 96px;
border-right: 1px solid #000;
vertical-align: top;
}
.axis-x div {
border-top: 1px solid #000;
}
.axis-x .axis-y {
border: none;
}
.axis-x div, .axis-y {
text-align: center;
font-weight: bold;
}
Loading…
Cancel
Save