diff --git a/.gitignore b/.gitignore index 6a5eccb..932d111 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,8 @@ static_html/ login.txt imgs_info.py images.json +.mediawiki_content +.html_content # venv dirs & files .idea/ diff --git a/functions.py b/functions.py index eeabbde..85823be 100644 --- a/functions.py +++ b/functions.py @@ -1,13 +1,26 @@ -import os, json, re +import os, json, re, shlex import subprocess from datetime import datetime +def pandoc(pwd, content, format_in, format_out): + # print('HTML content file:', wiki_content_f.name) -def pandoc(content, format_in, format_out): - pandoc_cmd = "echo '{}' | pandoc -f {} -t {}".format( - content, format_in, format_out) - output = subprocess.check_output(pandoc_cmd, shell=True) - return output.decode('utf8') + # tmp files + mw_tmp_fn = os.path.join(pwd, '.mediawiki_content') + html_tmp_fn = os.path.join(pwd, '.html_content') # TODO: join with pw + for fn in [mw_tmp_fn, html_tmp_fn ]: + if os.path.isfile(fn) is False: + os.mknod(fn) # create them if not in dir + with open(mw_tmp_fn, 'w') as mw_tmp_fn_: + mw_tmp_fn_.write(content) + + pandoc_cmd = f"pandoc {mw_tmp_fn} -f {format_in} -t {format_out} -o {html_tmp_fn}" + subprocess.call(shlex.split(pandoc_cmd)) + + with open(html_tmp_fn, 'r') as html_tmp_fn_: + output = html_tmp_fn_.read() + + return output def page_props(wikicontent): diff --git a/images2html.py b/images2html.py index d195687..b6de07a 100644 --- a/images2html.py +++ b/images2html.py @@ -58,7 +58,7 @@ for img_info in images_info.values(): print(pageproperties) if pageproperties.get('Title'): - pagetext_html = pandoc(content=pagetext, format_in='mediawiki', format_out='html') + pagetext_html = pandoc(pwd=wd ,content=pagetext, format_in='mediawiki', format_out='html') # print('pagetext', pagetext) # print('pagetext_html', pagetext_html) page_html = page_template.render(title=pageproperties.get('Title'), diff --git a/publication2html.py b/publication2html.py index dd29292..f7fe9af 100644 --- a/publication2html.py +++ b/publication2html.py @@ -15,12 +15,12 @@ p.add_argument("--ask", "-a", metavar='', args = p.parse_args() site = Site(host=args.host, path=args.path) + wd = os.path.dirname(os.path.abspath(__file__)) # working directory imgdir = os.path.join(wd, 'images') imgsjson_fn = os.path.join(wd, 'images.json') with open(imgsjson_fn, 'r') as imgsjson_file: images_info = json.load(imgsjson_file) - static_html = os.path.join(wd, 'static_html') os.makedirs(static_html, exist_ok=True) # create images/ dir @@ -29,8 +29,6 @@ with open(os.path.join(wd, 'login.txt'), 'r') as login: # read login user & pwd user, pwd = loginlines.split('\n') site.login(username=user, password=pwd) # login to wiki - - with open(os.path.join(wd, 'templates/publication.html')) as pub_html: pub_template = Template(pub_html.read()) @@ -39,9 +37,6 @@ with open(os.path.join(wd, 'templates/publication_part.html')) as pub_html: pub_parts_html = '' # to append all content - - - for answer in site.ask(args.ask): publication_title = '' # print(answer, answer.keys()) @@ -50,11 +45,11 @@ for answer in site.ask(args.ask): img_info = images_info[printout_dict['page']] page = site.pages[[printout_dict['page']]] pagetext = page.text() - # TODO: fix pandoc conversion - pagetext_html = pagetext # pandoc(content=pagetext, format_in='mediawiki', format_out='html') + pagetext_html = pandoc(pwd=wd, content=pagetext, format_in='mediawiki', format_out='html') + print(pagetext_html) img_local = os.path.join(imgdir, img_info.get('filename')) # pprint(img_info) - print(img_local) + # print(img_local) pub_part_html = pub_part_template.render( imgsrc=os.path.join(imgdir, img_info.get('filename')), text=pagetext_html, @@ -73,62 +68,8 @@ for answer in site.ask(args.ask): with open(os.path.join(static_html, htmlpage_fn), 'w') as htmlfile: htmlfile.write(pub_html) - print(pub_html, '\n') + # print(pub_html, '\n') pub_parts_html = '' # Reset pub_parts_html - -# -# -# -# page_html_template = ''' -# -# -# -# -# -# {{title}} -# -# -#

{{ title }}

-#

-#
-# -#
-#
-# {{ content }} -#
-# -# -# -# ''' -# page_template = Template(page_html_template) -# -# -# for img_info in images_info.values(): -# print(img_info) -# page_name = img_info['name'] -# page = site.pages[page_name] -# # print(page) -# # pprint(page.__dict__) -# # print(dir(page)) -# pagetext = page.text() -# pageproperties = page_props(wikicontent=pagetext) -# print(pageproperties) -# -# if pageproperties.get('Title'): -# pagetext_html = pandoc(content=pagetext, format_in='mediawiki', format_out='html') -# # print('pagetext', pagetext) -# # print('pagetext_html', pagetext_html) -# page_html = page_template.render(title=pageproperties.get('Title'), -# date=pageproperties.get('Date'), -# imgsrc=os.path.join(imgdir, img_info.get('filename')), -# content=pagetext_html, -# part=pageproperties.get('Part'), -# partof=pageproperties.get('Partof')) -# htmlpage_fn = "{}_{}.html".format( -# pageproperties.get('Title').replace(" ", ""), -# pageproperties.get('Part').zfill(3) -# ) -# print(htmlpage_fn) +# TODO: queries wihout Part? +# TODO: include Creator Property value \ No newline at end of file diff --git a/templates/publication_part.html b/templates/publication_part.html index c3c3428..1f5be00 100644 --- a/templates/publication_part.html +++ b/templates/publication_part.html @@ -1,9 +1,11 @@ -
- -
-
- {{ text }} -
- Part {{part}} of {{partof}} +
+ +
+
+ {{ text | safe }} +
+
+ Part {{part}} of {{partof}} +
\ No newline at end of file