import os, json, sys, urllib from mwclient import Site from pprint import pprint from jinja2 import Template from functions import pandoc, unpack_response, clean_dir, remove_nonwords from functions import Colors import argparse p = argparse.ArgumentParser(description="From smw ask string generate HTML pages with resulting results.", formatter_class=argparse.ArgumentDefaultsHelpFormatter) p.add_argument("--host", metavar='', default="hub.xpub.nl/sandbox", help='wiki host') p.add_argument("--path", metavar='', default="/itchwiki/", help="Wiki path. Should end with /") p.add_argument("--conditions", "-c", metavar='', default='[[File:+]][[Title::+]][[Part::+]][[Date::+]]', help='The query conditions') p.add_argument("--printouts", "-p", metavar='', default='?Title|?Date|?Part|?Partof|?Creator|?Organization|?Format|?Event|?Topic|?Language', help='Selection of properties to printout') p.add_argument("--sort", "-s", metavar='', default='Date,Title,Part', help='Sorting according to conditions') p.add_argument("--order", "-o", metavar='', default='asc,asc,asc', help='Order of sorting conditions. Should same amount as the --sort properties') p.add_argument('--limit', '-l', help='(optional) Limit the number of returned ' 'items') # TODO: GET limit to work.Perhaps with a site.raw_api method p.add_argument('--dry', '-d', action='store_true', help='dry-run: will only show the query but not run it') args = p.parse_args() if len(args.sort.split(',')) != len(args.order.split(',')): print(Colors.FAIL, 'Invalid query:', Colors.WARNING, '--sort and --order do not have the same amount of elements', Colors.ENDC) print('Script exiting now') sys.exit() query = f'{args.conditions}|{args.printouts}|sort={args.sort}|order={args.order}' if args.limit: limit_str = f'|limit={args.limit}' query += limit_str print('query:', Colors.GREEN, query, Colors.ENDC) query_unquoted = urllib.parse.quote(query) query_url = f'https://{args.host}{args.path}api.php?action=ask&query={query_unquoted}&format=json' print('query URL:', query_url) if args.dry is True: sys.exit() # site and login site = Site(host=args.host, path=args.path) wd = os.path.dirname(os.path.abspath(__file__)) # working directory imgdir = os.path.join(wd, 'images') imgsjson_fn = os.path.join(wd, 'images.json') with open(imgsjson_fn, 'r') as imgsjson_file: images_info = json.load(imgsjson_file) static_html = os.path.join(wd, 'static_html') os.makedirs(static_html, exist_ok=True) # create static_html/ dir clean_dir(static_html) # if static_html exists and has files or dirs: clean it with open(os.path.join(wd, 'login.txt'), 'r') as login: # read login user & pwd loginlines = login.read() user, pwd = loginlines.split('\n') site.login(username=user, password=pwd) # login to wiki # read template files with open(os.path.join(wd, 'templates/index.html')) as document_html: index_template = Template(document_html.read()) with open(os.path.join(wd, 'templates/document.html')) as document_html: document_template = Template(document_html.read()) with open(os.path.join(wd, 'templates/document_part.html')) as document_html: document_part_template = Template(document_html.read()) all_document_parts = '' # to append all content documentslist = [] for answer in site.ask(query): # publication_title = '' # print(answer, answer.keys()) page, printout_dict, fullurl = unpack_response(answer) print(page) try: img_info = images_info[printout_dict['page']] # find corresponding image in images.json except KeyError: print(Colors.WARNING, f"{printout_dict['page']} is not is missing from the local downloaded images") print(Colors.GREEN, 'run python3 download_imgs.py to fix the issue', Colors.ENDC) sys.exit() page = site.pages[[printout_dict['page']]] # request that page from wiki pagetext = page.text() pagetext_html = pandoc(pwd=wd, content=pagetext, format_in='mediawiki', format_out='html') img_local = os.path.join(imgdir, img_info.get('filename')) # Todo: Create list of all images from document # TODO: join document_part + document # TODO: look into the template structure of images : columns and rows # RENDER document part document_part_html = document_part_template.render( printout_dict=printout_dict, imgsrc=os.path.join(imgdir, img_info.get('filename')), text=pagetext_html, fullurl=fullurl,) all_document_parts += document_part_html # append resulting html from document part to the previous parts if printout_dict['Part'] == printout_dict['Partof']: # RENDER DOCUMENT # by passing all_document_parts html to document_template content document_html = document_template.render( title=printout_dict.get('Title'), date=printout_dict.get('Date'), content=all_document_parts) # render document template htmlpage_fn = "{}.html".format( remove_nonwords(printout_dict.get('Title')[0]) ) with open(os.path.join(static_html, htmlpage_fn), 'w') as htmlfile: htmlfile.write(document_html) all_document_parts = '' # Reset all_document_parts # add info to documentslist for index creation documentslist.append({'file': htmlpage_fn, 'title': printout_dict.get('Title'), 'date': printout_dict.get('Date'), 'creator': printout_dict.get('Creator') }) # RENDER index.html from documentslist index_html = index_template.render(index='Index', query=query, documentslist=documentslist) with open(os.path.join(static_html, 'index.html'), 'w') as htmlfile: htmlfile.write(index_html)