import os, json from mwclient import Site from pprint import pprint from jinja2 import Template from functions import pandoc, page_props, unpack_response from argparse import ArgumentParser p = ArgumentParser(description="From smw ask string generate HTML pages with resulting results.") p.add_argument("--host", metavar='', default="hub.xpub.nl/sandbox") p.add_argument("--path", metavar='', default="/itchwiki/", help="Wiki path. Should end with /") p.add_argument("--ask", "-a", metavar='', default='[[File:+]][[Title::+]][[Date::+]]|?Title|?Date|?Part|?Partof|sort=Title,Part|order=asc,asc', help="Ask query to be sent to the wiki API.") args = p.parse_args() site = Site(host=args.host, path=args.path) wd = os.path.dirname(os.path.abspath(__file__)) # working directory imgdir = os.path.join(wd, 'images') imgsjson_fn = os.path.join(wd, 'images.json') with open(imgsjson_fn, 'r') as imgsjson_file: images_info = json.load(imgsjson_file) static_html = os.path.join(wd, 'static_html') os.makedirs(static_html, exist_ok=True) # create images/ dir with open(os.path.join(wd, 'login.txt'), 'r') as login: # read login user & pwd loginlines = login.read() user, pwd = loginlines.split('\n') site.login(username=user, password=pwd) # login to wiki with open(os.path.join(wd, 'templates/publication.html')) as pub_html: pub_template = Template(pub_html.read()) with open(os.path.join(wd, 'templates/publication_part.html')) as pub_html: pub_part_template = Template(pub_html.read()) pub_parts_html = '' # to append all content for answer in site.ask(args.ask): publication_title = '' print(answer, answer.keys()) printout_dict = unpack_response(answer) # pprint(printout_dict) img_info = images_info[printout_dict['page']] page = site.pages[[printout_dict['page']]] pagetext = page.text() pagetext_html = pandoc(pwd=wd, content=pagetext, format_in='mediawiki', format_out='html') img_local = os.path.join(imgdir, img_info.get('filename')) # pprint(img_info) # print(img_local) # import pdb; pdb.set_trace() pub_part_html = pub_part_template.render( imgsrc=os.path.join(imgdir, img_info.get('filename')), text=pagetext_html, part=printout_dict.get('Part'), partof=printout_dict.get('Partof')) pub_parts_html += pub_part_html # append resulting publication part to the previous parts if printout_dict['Part'] == printout_dict['Partof']: # when Part == Partof # Assemble all parts and save to HTML file pub_html = pub_template.render(title=printout_dict.get('Title'), date=printout_dict.get('Date'), content=pub_parts_html) # render publication template htmlpage_fn = "{}.html".format(printout_dict.get('Title').replace(" ", "")) with open(os.path.join(static_html, htmlpage_fn), 'w') as htmlfile: htmlfile.write(pub_html) # print(pub_html, '\n') pub_parts_html = '' # Reset pub_parts_html # TODO: queries without Part? # TODO: include Creator Property value # TODO: address Dates: # * date values coming from mw with timestamp, the missing valus are one # This a MW issue!