|
|
@ -74,9 +74,14 @@ all_document_parts = '' # to append all content
|
|
|
|
for answer in site.ask(query):
|
|
|
|
for answer in site.ask(query):
|
|
|
|
publication_title = ''
|
|
|
|
publication_title = ''
|
|
|
|
# print(answer, answer.keys())
|
|
|
|
# print(answer, answer.keys())
|
|
|
|
printout_dict, fullurl = unpack_response(answer)
|
|
|
|
page, printout_dict, fullurl = unpack_response(answer)
|
|
|
|
# pprint(printout_dict)
|
|
|
|
print(page)
|
|
|
|
|
|
|
|
try:
|
|
|
|
img_info = images_info[printout_dict['page']] # find corresponding image in images.json
|
|
|
|
img_info = images_info[printout_dict['page']] # find corresponding image in images.json
|
|
|
|
|
|
|
|
except KeyError:
|
|
|
|
|
|
|
|
print(Colors.WARNING, f"{printout_dict['page']} is not is missing from the local downloaded images")
|
|
|
|
|
|
|
|
print(Colors.GREEN, 'run python3 download_imgs.py to fix the issue', Colors.ENDC)
|
|
|
|
|
|
|
|
sys.exit()
|
|
|
|
page = site.pages[[printout_dict['page']]] # request that page from wiki
|
|
|
|
page = site.pages[[printout_dict['page']]] # request that page from wiki
|
|
|
|
pagetext = page.text()
|
|
|
|
pagetext = page.text()
|
|
|
|
pagetext_html = pandoc(pwd=wd, content=pagetext, format_in='mediawiki', format_out='html')
|
|
|
|
pagetext_html = pandoc(pwd=wd, content=pagetext, format_in='mediawiki', format_out='html')
|
|
|
@ -98,9 +103,3 @@ for answer in site.ask(query):
|
|
|
|
htmlfile.write(pub_html)
|
|
|
|
htmlfile.write(pub_html)
|
|
|
|
|
|
|
|
|
|
|
|
all_document_parts = '' # Reset all_document_parts
|
|
|
|
all_document_parts = '' # Reset all_document_parts
|
|
|
|
|
|
|
|
|
|
|
|
# TODO: include Creator Property value
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# TODO: address Dates:
|
|
|
|
|
|
|
|
# * date values coming from mw with timestamp, the missing valus are one
|
|
|
|
|
|
|
|
# This a MW issue!
|
|
|
|
|
|
|
|