fixed issues with pandoc, using local tmp files

andre
Castro0o 5 years ago
parent 5302c5766e
commit c6a4e6ca0c

2
.gitignore vendored

@ -3,6 +3,8 @@ static_html/
login.txt login.txt
imgs_info.py imgs_info.py
images.json images.json
.mediawiki_content
.html_content
# venv dirs & files # venv dirs & files
.idea/ .idea/

@ -1,13 +1,26 @@
import os, json, re import os, json, re, shlex
import subprocess import subprocess
from datetime import datetime from datetime import datetime
def pandoc(pwd, content, format_in, format_out):
# print('HTML content file:', wiki_content_f.name)
def pandoc(content, format_in, format_out): # tmp files
pandoc_cmd = "echo '{}' | pandoc -f {} -t {}".format( mw_tmp_fn = os.path.join(pwd, '.mediawiki_content')
content, format_in, format_out) html_tmp_fn = os.path.join(pwd, '.html_content') # TODO: join with pw
output = subprocess.check_output(pandoc_cmd, shell=True) for fn in [mw_tmp_fn, html_tmp_fn ]:
return output.decode('utf8') if os.path.isfile(fn) is False:
os.mknod(fn) # create them if not in dir
with open(mw_tmp_fn, 'w') as mw_tmp_fn_:
mw_tmp_fn_.write(content)
pandoc_cmd = f"pandoc {mw_tmp_fn} -f {format_in} -t {format_out} -o {html_tmp_fn}"
subprocess.call(shlex.split(pandoc_cmd))
with open(html_tmp_fn, 'r') as html_tmp_fn_:
output = html_tmp_fn_.read()
return output
def page_props(wikicontent): def page_props(wikicontent):

@ -58,7 +58,7 @@ for img_info in images_info.values():
print(pageproperties) print(pageproperties)
if pageproperties.get('Title'): if pageproperties.get('Title'):
pagetext_html = pandoc(content=pagetext, format_in='mediawiki', format_out='html') pagetext_html = pandoc(pwd=wd ,content=pagetext, format_in='mediawiki', format_out='html')
# print('pagetext', pagetext) # print('pagetext', pagetext)
# print('pagetext_html', pagetext_html) # print('pagetext_html', pagetext_html)
page_html = page_template.render(title=pageproperties.get('Title'), page_html = page_template.render(title=pageproperties.get('Title'),

@ -15,12 +15,12 @@ p.add_argument("--ask", "-a", metavar='',
args = p.parse_args() args = p.parse_args()
site = Site(host=args.host, path=args.path) site = Site(host=args.host, path=args.path)
wd = os.path.dirname(os.path.abspath(__file__)) # working directory wd = os.path.dirname(os.path.abspath(__file__)) # working directory
imgdir = os.path.join(wd, 'images') imgdir = os.path.join(wd, 'images')
imgsjson_fn = os.path.join(wd, 'images.json') imgsjson_fn = os.path.join(wd, 'images.json')
with open(imgsjson_fn, 'r') as imgsjson_file: with open(imgsjson_fn, 'r') as imgsjson_file:
images_info = json.load(imgsjson_file) images_info = json.load(imgsjson_file)
static_html = os.path.join(wd, 'static_html') static_html = os.path.join(wd, 'static_html')
os.makedirs(static_html, exist_ok=True) # create images/ dir os.makedirs(static_html, exist_ok=True) # create images/ dir
@ -29,8 +29,6 @@ with open(os.path.join(wd, 'login.txt'), 'r') as login: # read login user & pwd
user, pwd = loginlines.split('\n') user, pwd = loginlines.split('\n')
site.login(username=user, password=pwd) # login to wiki site.login(username=user, password=pwd) # login to wiki
with open(os.path.join(wd, 'templates/publication.html')) as pub_html: with open(os.path.join(wd, 'templates/publication.html')) as pub_html:
pub_template = Template(pub_html.read()) pub_template = Template(pub_html.read())
@ -39,9 +37,6 @@ with open(os.path.join(wd, 'templates/publication_part.html')) as pub_html:
pub_parts_html = '' # to append all content pub_parts_html = '' # to append all content
for answer in site.ask(args.ask): for answer in site.ask(args.ask):
publication_title = '' publication_title = ''
# print(answer, answer.keys()) # print(answer, answer.keys())
@ -50,11 +45,11 @@ for answer in site.ask(args.ask):
img_info = images_info[printout_dict['page']] img_info = images_info[printout_dict['page']]
page = site.pages[[printout_dict['page']]] page = site.pages[[printout_dict['page']]]
pagetext = page.text() pagetext = page.text()
# TODO: fix pandoc conversion pagetext_html = pandoc(pwd=wd, content=pagetext, format_in='mediawiki', format_out='html')
pagetext_html = pagetext # pandoc(content=pagetext, format_in='mediawiki', format_out='html') print(pagetext_html)
img_local = os.path.join(imgdir, img_info.get('filename')) img_local = os.path.join(imgdir, img_info.get('filename'))
# pprint(img_info) # pprint(img_info)
print(img_local) # print(img_local)
pub_part_html = pub_part_template.render( pub_part_html = pub_part_template.render(
imgsrc=os.path.join(imgdir, img_info.get('filename')), imgsrc=os.path.join(imgdir, img_info.get('filename')),
text=pagetext_html, text=pagetext_html,
@ -73,62 +68,8 @@ for answer in site.ask(args.ask):
with open(os.path.join(static_html, htmlpage_fn), 'w') as htmlfile: with open(os.path.join(static_html, htmlpage_fn), 'w') as htmlfile:
htmlfile.write(pub_html) htmlfile.write(pub_html)
print(pub_html, '\n') # print(pub_html, '\n')
pub_parts_html = '' # Reset pub_parts_html pub_parts_html = '' # Reset pub_parts_html
# TODO: queries wihout Part?
# # TODO: include Creator Property value
#
#
# page_html_template = '''
# <!DOCTYPE html>
# <html lang="en">
# <head>
# <meta charset="utf-8">
# <link rel="stylesheet" href="../static/style.css" />
# <title>{{title}}</title>
# </head>
# <body>
# <h1>{{ title }}</h1>
# <p><time datetime="{{date}}">{{date}}</time></p>
# <div id="img">
# <img src="{{ imgsrc }}" />
# </div>
# <div id="content">
# {{ content }}
# </div>
# <footer>
# Part {{part}} of {{partof}}
# </footer>
# </body>
# </html>
# '''
# page_template = Template(page_html_template)
#
#
# for img_info in images_info.values():
# print(img_info)
# page_name = img_info['name']
# page = site.pages[page_name]
# # print(page)
# # pprint(page.__dict__)
# # print(dir(page))
# pagetext = page.text()
# pageproperties = page_props(wikicontent=pagetext)
# print(pageproperties)
#
# if pageproperties.get('Title'):
# pagetext_html = pandoc(content=pagetext, format_in='mediawiki', format_out='html')
# # print('pagetext', pagetext)
# # print('pagetext_html', pagetext_html)
# page_html = page_template.render(title=pageproperties.get('Title'),
# date=pageproperties.get('Date'),
# imgsrc=os.path.join(imgdir, img_info.get('filename')),
# content=pagetext_html,
# part=pageproperties.get('Part'),
# partof=pageproperties.get('Partof'))
# htmlpage_fn = "{}_{}.html".format(
# pageproperties.get('Title').replace(" ", ""),
# pageproperties.get('Part').zfill(3)
# )
# print(htmlpage_fn)

@ -1,9 +1,11 @@
<div class="part">
<div class="img"> <div class="img">
<img src="{{ imgsrc }}" /> <img src="{{ imgsrc }}" />
</div> </div>
<div class="text"> <div class="text">
{{ text }} {{ text | safe }}
</div> </div>
<div class="part"> <div class="part">
Part {{part}} of {{partof}} Part {{part}} of {{partof}}
</div> </div>
</div>
Loading…
Cancel
Save