You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
134 lines
5.8 KiB
Python
134 lines
5.8 KiB
Python
import os, json, sys, urllib
|
|
from mwclient import Site
|
|
from pprint import pprint
|
|
from jinja2 import Template
|
|
from functions import pandoc, unpack_response, clean_dir, remove_nonwords
|
|
|
|
from functions import Colors
|
|
import argparse
|
|
|
|
p = argparse.ArgumentParser(description="From smw ask string generate HTML pages with resulting results.",
|
|
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
|
p.add_argument("--host", metavar='', default="hub.xpub.nl/sandbox", help='wiki host')
|
|
p.add_argument("--path", metavar='', default="/itchwiki/", help="Wiki path. Should end with /")
|
|
p.add_argument("--conditions", "-c", metavar='',
|
|
default='[[File:+]][[Title::+]][[Part::+]][[Date::+]]',
|
|
help='The query conditions')
|
|
p.add_argument("--printouts", "-p", metavar='',
|
|
default='?Title|?Date|?Part|?Partof|?Creator|?Organization|?Format|?Event|?Topic|?Language',
|
|
help='Selection of properties to printout')
|
|
p.add_argument("--sort", "-s", metavar='',
|
|
default='Date,Title,Part',
|
|
help='Sorting according to conditions')
|
|
p.add_argument("--order", "-o", metavar='',
|
|
default='asc,asc,asc',
|
|
help='Order of sorting conditions. Should same amount as the --sort properties')
|
|
p.add_argument('--limit', '-l', help='(optional) Limit the number of returned '
|
|
'items')
|
|
p.add_argument('--dry', '-d', action='store_true',
|
|
help='dry-run: will only show the query but not run it')
|
|
|
|
args = p.parse_args()
|
|
|
|
if len(args.sort.split(',')) != len(args.order.split(',')):
|
|
print(Colors.FAIL, 'Invalid query:',
|
|
Colors.WARNING, '--sort and --order do not have the same amount of elements', Colors.ENDC)
|
|
print('Script exiting now')
|
|
sys.exit()
|
|
query = f'{args.conditions}|{args.printouts}|sort={args.sort}|order={args.order}'
|
|
if args.limit:
|
|
limit_str = f'|limit={args.limit}'
|
|
query += limit_str
|
|
print('query:', Colors.GREEN, query, Colors.ENDC)
|
|
query_unquoted = urllib.parse.quote(query)
|
|
query_url = f'https://{args.host}{args.path}api.php?action=ask&query={query_unquoted}&format=json'
|
|
print('query URL:', query_url)
|
|
|
|
if args.dry is True:
|
|
sys.exit()
|
|
|
|
|
|
# site and login
|
|
|
|
site = Site(host=args.host, path=args.path)
|
|
|
|
wd = os.path.dirname(os.path.abspath(__file__)) # working directory
|
|
imgdir = os.path.join(wd, 'images')
|
|
imgsjson_fn = os.path.join(wd, 'images.json')
|
|
with open(imgsjson_fn, 'r') as imgsjson_file:
|
|
images_info = json.load(imgsjson_file)
|
|
static_html = os.path.join(wd, 'static_html')
|
|
os.makedirs(static_html, exist_ok=True) # create static_html/ dir
|
|
clean_dir(static_html) # if static_html exists and has files or dirs: clean it
|
|
|
|
with open(os.path.join(wd, 'login.txt'), 'r') as login: # read login user & pwd
|
|
loginlines = login.read()
|
|
user, pwd = loginlines.split('\n')
|
|
site.login(username=user, password=pwd) # login to wiki
|
|
|
|
# read template files
|
|
with open(os.path.join(wd, 'templates/index.html')) as document_html:
|
|
index_template = Template(document_html.read())
|
|
|
|
with open(os.path.join(wd, 'templates/document.html')) as document_html:
|
|
document_template = Template(document_html.read())
|
|
|
|
with open(os.path.join(wd, 'templates/document_part.html')) as document_html:
|
|
document_part_template = Template(document_html.read())
|
|
|
|
all_document_parts = '' # to append all content
|
|
documentslist = []
|
|
for answer in site.ask(query):
|
|
# publication_title = ''
|
|
# print(answer, answer.keys())
|
|
page, printout_dict, fullurl = unpack_response(answer)
|
|
print(page)
|
|
try:
|
|
img_info = images_info[printout_dict['page']] # find corresponding image in images.json
|
|
except KeyError:
|
|
print(Colors.WARNING, f"{printout_dict['page']} is not is missing from the local downloaded images")
|
|
print(Colors.GREEN, 'run python3 download_imgs.py to fix the issue', Colors.ENDC)
|
|
sys.exit()
|
|
|
|
page = site.pages[[printout_dict['page']]] # request that page from wiki
|
|
pagetext = page.text()
|
|
pagetext_html = pandoc(pwd=wd, content=pagetext, format_in='mediawiki', format_out='html')
|
|
img_local = os.path.join(imgdir, img_info.get('filename'))
|
|
|
|
# RENDER document part
|
|
document_part_html = document_part_template.render(
|
|
printout_dict=printout_dict,
|
|
imgsrc=os.path.join(imgdir, img_info.get('filename')),
|
|
text=pagetext_html,
|
|
fullurl=fullurl,)
|
|
all_document_parts += document_part_html # append resulting html from document part to the previous parts
|
|
|
|
if printout_dict['Part'] == printout_dict['Partof']:
|
|
# RENDER DOCUMENT
|
|
# by passing all_document_parts html to document_template content
|
|
document_html = document_template.render(
|
|
title=printout_dict.get('Title'),
|
|
date=printout_dict.get('Date'),
|
|
content=all_document_parts) # render document template
|
|
htmlpage_fn = "{}.html".format(
|
|
remove_nonwords(printout_dict.get('Title')[0])
|
|
)
|
|
with open(os.path.join(static_html, htmlpage_fn), 'w') as htmlfile:
|
|
htmlfile.write(document_html)
|
|
all_document_parts = '' # Reset all_document_parts
|
|
|
|
# add info to documentslist for index creation
|
|
# TODO: possibly needs to be a SortedDict
|
|
documentslist.append({'file': htmlpage_fn,
|
|
'title': printout_dict.get('Title'),
|
|
'date': printout_dict.get('Date'),
|
|
'creator': printout_dict.get('Creator')
|
|
})
|
|
|
|
# RENDER index.html from documentslist
|
|
index_html = index_template.render(index='Index',
|
|
query=query,
|
|
documentslist=documentslist)
|
|
with open(os.path.join(static_html, 'index.html'), 'w') as htmlfile:
|
|
htmlfile.write(index_html)
|