You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
special-issue-11-wiki2html/query2html.py

113 lines
4.5 KiB
Python

import os, json, sys, urllib
from mwclient import Site
from pprint import pprint
from jinja2 import Template
from functions import pandoc, page_props, unpack_response
from functions import Colors
import argparse
p = argparse.ArgumentParser(description="From smw ask string generate HTML pages with resulting results.",
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
p.add_argument("--host", metavar='', default="hub.xpub.nl/sandbox", help='wiki host')
p.add_argument("--path", metavar='', default="/itchwiki/", help="Wiki path. Should end with /")
p.add_argument("--conditions", "-c", metavar='',
default='[[File:+]][[Title::+]][[Part::+]][[Date::+]]',
help='The query conditions')
p.add_argument("--printouts", "-p", metavar='',
default='?Title|?Date|?Part|?Partof',
help='Selection of properties to printout')
p.add_argument("--sort", "-s", metavar='',
default='Date,Title,Part',
help='Sorting according to conditions')
p.add_argument("--order", "-o", metavar='',
default='asc,asc,asc',
help='Order of sorting conditions. Should same amount as the --sort properties')
p.add_argument('--dry', '-d', action='store_true',
help='dry-run: will only show the query but not run it')
args = p.parse_args()
if len(args.sort.split(',')) != len(args.order.split(',')):
print(Colors.FAIL, 'Invalid query:',
Colors.WARNING, '--sort and --order do not have the same amount of elements', Colors.ENDC)
print('Script exiting now')
sys.exit()
query = f'{args.conditions}|{args.printouts}|sort={args.sort}|order={args.order}'
print('query:', Colors.GREEN, query, Colors.ENDC)
query_unquoted = urllib.parse.quote(query)
query_url = f'https://{args.host}{args.path}api.php?action=ask&query={query_unquoted}&format=json'
print('query URL:', query_url)
if args.dry is True:
sys.exit()
# site and login
site = Site(host=args.host, path=args.path)
wd = os.path.dirname(os.path.abspath(__file__)) # working directory
imgdir = os.path.join(wd, 'images')
imgsjson_fn = os.path.join(wd, 'images.json')
with open(imgsjson_fn, 'r') as imgsjson_file:
images_info = json.load(imgsjson_file)
static_html = os.path.join(wd, 'static_html')
os.makedirs(static_html, exist_ok=True) # create images/ dir
with open(os.path.join(wd, 'login.txt'), 'r') as login: # read login user & pwd
loginlines = login.read()
user, pwd = loginlines.split('\n')
site.login(username=user, password=pwd) # login to wiki
# read template files
with open(os.path.join(wd, 'templates/publication.html')) as pub_html:
pub_template = Template(pub_html.read())
with open(os.path.join(wd, 'templates/publication_part.html')) as pub_html:
pub_part_template = Template(pub_html.read())
pub_parts_html = '' # to append all content
for answer in site.ask(query):
publication_title = ''
print(answer, answer.keys())
printout_dict = unpack_response(answer)
# pprint(printout_dict)
img_info = images_info[printout_dict['page']]
page = site.pages[[printout_dict['page']]]
pagetext = page.text()
pagetext_html = pandoc(pwd=wd, content=pagetext, format_in='mediawiki', format_out='html')
img_local = os.path.join(imgdir, img_info.get('filename'))
# pprint(img_info)
# print(img_local)
# import pdb; pdb.set_trace()
pub_part_html = pub_part_template.render(
imgsrc=os.path.join(imgdir, img_info.get('filename')),
text=pagetext_html,
part=printout_dict.get('Part'),
partof=printout_dict.get('Partof'))
pub_parts_html += pub_part_html # append resulting publication part to the previous parts
if printout_dict['Part'] == printout_dict['Partof']: # when Part == Partof
# Assemble all parts and save to HTML file
pub_html = pub_template.render(title=printout_dict.get('Title'),
date=printout_dict.get('Date'),
content=pub_parts_html) # render publication template
htmlpage_fn = "{}.html".format(printout_dict.get('Title').replace(" ", ""))
with open(os.path.join(static_html, htmlpage_fn), 'w') as htmlfile:
htmlfile.write(pub_html)
# print(pub_html, '\n')
pub_parts_html = '' # Reset pub_parts_html
# TODO: queries without Part?
# TODO: include Creator Property value
# TODO: address Dates:
# * date values coming from mw with timestamp, the missing valus are one
# This a MW issue!