images: remote, local, archoive

pull/7/head
Castro0o 5 years ago
parent 5de30ed9ca
commit bf66a2d572

@ -39,9 +39,14 @@ mypassword
### locally on your own machine: ### locally on your own machine:
create archive folder: `mkdir archive` create archive folder: `mkdir archive`
run script outputting to archive folder and displaying the images from the wiki: run script outputting to archive folder and **displaying the images from the wiki**:
`python3 dumpwiki.py --output archive --local --imgsrc remote`
run script outputting to archive folder and **displaying the images from local ../images**:
* requires running `download_imgs.py`
`python3 dumpwiki.py --output archive --local --imgsrc local`
`python3 dumpwiki.py --output archive --local`
### Categories and Templates: ### Categories and Templates:

@ -1,8 +1,6 @@
import os, json, sys, urllib import os, json, sys
from mwclient import Site from mwclient import Site
from pprint import pprint
from jinja2 import Template from jinja2 import Template
from functions import unpack_response, clean_dir, remove_nonwords
import html5lib import html5lib
from functions import Colors from functions import Colors
import argparse import argparse
@ -17,20 +15,25 @@ p.add_argument("--path", metavar='', default="/itchwiki/", help="Wiki path. Shou
p.add_argument("--output", default="/var/www/html/archive", help="Output path for pages") p.add_argument("--output", default="/var/www/html/archive", help="Output path for pages")
p.add_argument("--one", default=False, action="store_true", help="Output one page from each category only") p.add_argument("--one", default=False, action="store_true", help="Output one page from each category only")
p.add_argument("--local", default=False, action="store_true", help="When creating a local archives. Add full URL to images") p.add_argument("--local", default=False, action="store_true", help="When creating a local archives. Add full URL to images")
p.add_argument("--imgsrc", default='archive',
choices=['archive', 'local', 'remote'],
help="What is the source of the images?")
args = p.parse_args() args = p.parse_args()
print(args) print(args)
# site and login # site and login
site = Site(host=args.host, path=args.path) site = Site(host=args.host, path=args.path)
wd = os.path.dirname(os.path.abspath(__file__)) # working directory
with open('login.txt', 'r') as login: # read login user & pwd with open('login.txt', 'r') as login: # read login user & pwd
loginlines = login.read() loginlines = login.read()
user, pwd = loginlines.split('\n') user, pwd = loginlines.split('\n')
site.login(username=user, password=pwd) # login to wiki site.login(username=user, password=pwd) # login to wiki
# read template files imgsjson_fn = os.path.join(wd, 'images.json') # read images.json file
with open(imgsjson_fn, 'r') as imgsjson_file:
images_info = json.load(imgsjson_file)
SLASH = "\u2044" SLASH = "\u2044"
@ -61,20 +64,43 @@ def rewritelinks (html):
if href.startswith("/sandbox/itchwiki/index.php/"): if href.startswith("/sandbox/itchwiki/index.php/"):
new_href = filenameforlink(href) new_href = filenameforlink(href)
a.attrib['href'] = new_href a.attrib['href'] = new_href
if args.local is True:
for img in t.findall(".//img[@src]"):
src = img.attrib.get("src")
if not src.startswith('http'):
img.attrib['src'] = 'https://hub.xpub.nl' + src
html = ET.tostring(t, method="html", encoding="unicode")
return html return html
def rewriteimgs(html): def rewriteimgs(html):
t = html5lib.parseFragment(html, treebuilder = "etree", namespaceHTMLElements = False) t = html5lib.parseFragment(html, treebuilder = "etree", namespaceHTMLElements = False)
# remove the srcset value as it prevents images from displaying
for img in t.findall(".//img[@srcset]"): # replace images url with local image in ../images
img.attrib['srcset'] = "" for img in t.findall(".//img[@src]"):
# imgsrc can be:
# remote: url remains
# archive f' images/{img_filename}'
# local: f'../../images/{img_filename}'
if args.imgsrc == 'remote':
src = img.attrib.get("src")
if not src.startswith('http'):
img.attrib['src'] = 'https://hub.xpub.nl' + src
else: # local / archive imgsrc
img_alt = img.attrib.get("alt") # alt property has filename
img_page = f'File:{img_alt}' # find image it images.json
try:
# get its filename
img_filename = images_info[img_page]['filename']
except KeyError:
print(Colors.WARNING, f"{printout_dict['page']} is not is missing from the local downloaded images")
print(Colors.GREEN, 'run python3 download_imgs.py to fix the issue', Colors.ENDC)
sys.exit()
if args.imgsrc == 'local':
# 2 dirs above HTML files dir: archive/
img.attrib['src'] = f'../../images/{img_filename}'
if args.imgsrc == 'archive':
# same dir as HTML files: archive/
img.attrib['src'] = f'./images/{img_filename}'
img.attrib['srcset'] = "" # rm srcset value:it prevent imgs displaying
img.attrib['width'] = ""
img.attrib['height'] = ""
html = ET.tostring(t, method="html", encoding="unicode") html = ET.tostring(t, method="html", encoding="unicode")
return html return html

Loading…
Cancel
Save