You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

106 lines
4.5 KiB

import os, json, sys, urllib
from mwclient import Site
from pprint import pprint
from jinja2 import Template
from functions import pandoc, page_props, unpack_response, clean_dir
from functions import Colors
import argparse
p = argparse.ArgumentParser(description="From smw ask string generate HTML pages with resulting results.",
p.add_argument("--host", metavar='', default="", help='wiki host')
p.add_argument("--path", metavar='', default="/itchwiki/", help="Wiki path. Should end with /")
p.add_argument("--conditions", "-c", metavar='',
help='The query conditions')
p.add_argument("--printouts", "-p", metavar='',
help='Selection of properties to printout')
p.add_argument("--sort", "-s", metavar='',
help='Sorting according to conditions')
p.add_argument("--order", "-o", metavar='',
help='Order of sorting conditions. Should same amount as the --sort properties')
p.add_argument('--dry', '-d', action='store_true',
help='dry-run: will only show the query but not run it')
args = p.parse_args()
if len(args.sort.split(',')) != len(args.order.split(',')):
print(Colors.FAIL, 'Invalid query:',
Colors.WARNING, '--sort and --order do not have the same amount of elements', Colors.ENDC)
print('Script exiting now')
query = f'{args.conditions}|{args.printouts}|sort={args.sort}|order={args.order}'
print('query:', Colors.GREEN, query, Colors.ENDC)
query_unquoted = urllib.parse.quote(query)
query_url = f'https://{}{args.path}api.php?action=ask&query={query_unquoted}&format=json'
print('query URL:', query_url)
if args.dry is True:
# site and login
site = Site(, path=args.path)
wd = os.path.dirname(os.path.abspath(__file__)) # working directory
imgdir = os.path.join(wd, 'images')
imgsjson_fn = os.path.join(wd, 'images.json')
with open(imgsjson_fn, 'r') as imgsjson_file:
images_info = json.load(imgsjson_file)
static_html = os.path.join(wd, 'static_html')
os.makedirs(static_html, exist_ok=True) # create static_html/ dir
clean_dir(static_html) # if static_html exists and has files or dirs: clean it
with open(os.path.join(wd, 'login.txt'), 'r') as login: # read login user & pwd
loginlines =
user, pwd = loginlines.split('\n')
site.login(username=user, password=pwd) # login to wiki
# read template files
with open(os.path.join(wd, 'templates/document.html')) as pub_html:
pub_template = Template(
with open(os.path.join(wd, 'templates/document_part.html')) as pub_html:
pub_part_template = Template(
all_document_parts = '' # to append all content
for answer in site.ask(query):
publication_title = ''
print(answer, answer.keys())
printout_dict = unpack_response(answer)
# pprint(printout_dict)
img_info = images_info[printout_dict['page']] # find corresponding image in images.json
page = site.pages[[printout_dict['page']]] # request that page from wiki
pagetext = page.text()
pagetext_html = pandoc(pwd=wd, content=pagetext, format_in='mediawiki', format_out='html')
img_local = os.path.join(imgdir, img_info.get('filename'))
# render html for that part of the document
document_part_html = pub_part_template.render(
imgsrc=os.path.join(imgdir, img_info.get('filename')),
all_document_parts += document_part_html # append resulting html from document part to the previous parts
if printout_dict['Part'] == printout_dict['Partof']: # when Part == Partof
# pass all_document_parts html to pub_template content
pub_html = pub_template.render(title=printout_dict.get('Title'),
content=all_document_parts) # render document template
htmlpage_fn = "{}.html".format(printout_dict.get('Title').replace(" ", ""))
with open(os.path.join(static_html, htmlpage_fn), 'w') as htmlfile:
all_document_parts = '' # Reset all_document_parts
# TODO: include Creator Property value
# TODO: address Dates:
# * date values coming from mw with timestamp, the missing valus are one
# This a MW issue!