special-issue-11-wiki2html/publication2html.py

import os, json
from mwclient import Site
from pprint import pprint
from jinja2 import Template
from functions import pandoc, page_props, unpack_response

site = Site(host='hub.xpub.nl/sandbox', path='/itchwiki/')
wd = os.path.dirname(os.path.abspath(__file__)) # working directory
imgdir = os.path.join(wd, 'images')
imgsjson_fn = os.path.join(wd, 'images.json')
with open(imgsjson_fn, 'r') as imgsjson_file:
    images_info = json.load(imgsjson_file)

static_html = os.path.join(wd, 'static_html')
os.makedirs(static_html, exist_ok=True) # create images/ dir

with open(os.path.join(wd, 'login.txt'), 'r') as login:  # read login user & pwd
    loginlines = login.read()
    user, pwd = loginlines.split('\n')
    site.login(username=user, password=pwd)  # login to wiki


query = ('[[File:+]][[Title::+]][[Date::+]]|?Title|?Date|?Part|?Partof|sort=Title,Part|order=asc,asc')
print('Query:', query)


with open(os.path.join(wd, 'templates/publication.html')) as pub_html:
    pub_template = Template(pub_html.read())

with open(os.path.join(wd, 'templates/publication_part.html')) as pub_html:
    pub_part_template = Template(pub_html.read())

pub_parts_html = '' # to append all content


for answer in site.ask(query):
    publication_title = ''
    # print(answer, answer.keys())
    printout_dict = unpack_response(answer)
    # pprint(printout_dict)
    img_info = images_info[printout_dict['page']]
    page = site.pages[[printout_dict['page']]]
    pagetext = page.text()
    # TODO: fix pandoc conversion
    pagetext_html = pagetext # pandoc(content=pagetext, format_in='mediawiki', format_out='html')
    img_local = os.path.join(imgdir, img_info.get('filename'))
    # pprint(img_info)
    print(img_local)
    pub_part_html = pub_part_template.render(
        imgsrc=os.path.join(imgdir, img_info.get('filename')),
        text=pagetext_html,
        part=printout_dict.get('Part'),
        partof=printout_dict.get('Partof'))
    pub_parts_html += pub_part_html  # append resulting publication part to the previous parts
    if printout_dict['Part'] == printout_dict['Partof']:  # when Part == Partof
        # Assemble all parts and save to HTML file

        pub_html = pub_template.render(title=printout_dict.get('Title'),
                                       date=printout_dict.get('Date'),
                                       content=pub_parts_html)  # render publication template

        htmlpage_fn = "{}.html".format(printout_dict.get('Title').replace(" ", ""))

        with open(os.path.join(static_html, htmlpage_fn), 'w') as htmlfile:
            htmlfile.write(pub_html)

        print(pub_html, '\n')
        pub_parts_html = ''  # Reset pub_parts_html


#
#
#
# page_html_template = '''
# <!DOCTYPE html>
# <html lang="en">
# <head>
#     <meta charset="utf-8">
#     <link rel="stylesheet" href="../static/style.css" />
#     <title>{{title}}</title>
# </head>
# <body>
#     <h1>{{ title }}</h1>
#     <p><time datetime="{{date}}">{{date}}</time></p>
#     <div id="img">
#         <img src="{{ imgsrc }}" />
#     </div>
#     <div id="content">
#         {{ content }}
#     </div>
#     <footer>
#         Part {{part}} of {{partof}}
#     </footer>
# </body>
# </html>
# '''
# page_template = Template(page_html_template)
#
#
# for img_info in images_info.values():
#     print(img_info)
#     page_name = img_info['name']
#     page = site.pages[page_name]
#     # print(page)
#     # pprint(page.__dict__)
#     # print(dir(page))
#     pagetext = page.text()
#     pageproperties = page_props(wikicontent=pagetext)
#     print(pageproperties)
#
#     if pageproperties.get('Title'):
#         pagetext_html = pandoc(content=pagetext, format_in='mediawiki', format_out='html')
#         # print('pagetext', pagetext)
#         # print('pagetext_html', pagetext_html)
#         page_html = page_template.render(title=pageproperties.get('Title'),
#                                          date=pageproperties.get('Date'),
#                                          imgsrc=os.path.join(imgdir, img_info.get('filename')),
#                                          content=pagetext_html,
#                                          part=pageproperties.get('Part'),
#                                          partof=pageproperties.get('Partof'))
#         htmlpage_fn = "{}_{}.html".format(
#             pageproperties.get('Title').replace(" ", ""),
#             pageproperties.get('Part').zfill(3)
#         )
#         print(htmlpage_fn)