removing non-word chars from html filename; creator in index as string; cleaned old code form functions.py

populat_orgs
Castro0o 5 years ago
parent 5ba753199b
commit f4b9fae020

@ -43,11 +43,10 @@ def unpack_response(response):
printouts = response['printouts']
page = response['fulltext']
fullurl = response['fullurl']
# convert OrderDict to Dict json.dumps json.loads
printouts_dumps = json.dumps(printouts)
printouts_loads = json.loads(printouts_dumps)
printouts_loads['page'] = page
# printouts_loads['Date'] = datetime.fromtimestamp(
# int(printouts_loads['Date'][0]['timestamp']))
simplified_printouts = {}
for k, v in printouts_loads.items():
if k == 'Date':
@ -61,22 +60,6 @@ def unpack_response(response):
simplified_printouts[k] = []
for listitem in v:
simplified_printouts[k].append(listitem['fulltext'])
# for prop in printouts:
# p_item = response['printouts'][prop]
# for prop_val in p_item:
# if isinstance(prop_val, dict) is False:
# d[prop] = prop_val
# else:
# # if len(prop_val) > 0:
# props = list(prop_val.keys())
# if 'fulltext' in props:
# val = prop_val.get('fulltext')
# elif 'timestamp' in props:
# val = datetime.fromtimestamp(int(prop_val.get('timestamp')))
# else:
# val = list(prop_val.values())[0]
# d[prop] = val
return page, simplified_printouts, fullurl

@ -2,7 +2,7 @@ import os, json, sys, urllib
from mwclient import Site
from pprint import pprint
from jinja2 import Template
from functions import pandoc, page_props, unpack_response, clean_dir
from functions import pandoc, unpack_response, clean_dir, remove_nonwords
from functions import Colors
import argparse
@ -89,13 +89,7 @@ for answer in site.ask(query):
print(Colors.WARNING, f"{printout_dict['page']} is not is missing from the local downloaded images")
print(Colors.GREEN, 'run python3 download_imgs.py to fix the issue', Colors.ENDC)
sys.exit()
#
# # TODO: EXTRACT PROPERTIES THROUGH THE FOLLOWING ASK QUERY
# ask_page_props = f'[[File:{printout_dict["page"]}]]|?Title|?Date|?Part|?Partof|?Creator|?Organization|?Format|?Event|?Topic|?Language'
# print(ask_page_props)
# page_props = site.ask(ask_page_props)
# print(page_props)
# import pdb; pdb.set_trace()
page = site.pages[[printout_dict['page']]] # request that page from wiki
pagetext = page.text()
pagetext_html = pandoc(pwd=wd, content=pagetext, format_in='mediawiki', format_out='html')
@ -112,15 +106,13 @@ for answer in site.ask(query):
if printout_dict['Part'] == printout_dict['Partof']:
# RENDER DOCUMENT
# by passing all_document_parts html to document_template content
# TODO: EXPAND PROPERTIES IN TEMPLATE
document_html = document_template.render(
title=printout_dict.get('Title'),
date=printout_dict.get('Date'),
content=all_document_parts) # render document template
htmlpage_fn = "{}.html".format(
printout_dict.get('Title')[0].replace(" ", ""))
remove_nonwords(printout_dict.get('Title')[0])
)
with open(os.path.join(static_html, htmlpage_fn), 'w') as htmlfile:
htmlfile.write(document_html)
all_document_parts = '' # Reset all_document_parts

@ -10,7 +10,7 @@
{% for doc in documentslist %}
<li><a href="./{{ doc['file'] }}">{{ doc['title'][0] }}</a>
{{ doc['date'].year }}.{{ doc['date'].month }}.{{ doc['date'].day }}
{{doc['creator']}}
{{doc['creator'] | join(", ")}}
</li>
{% endfor %}
</ul>

Loading…
Cancel
Save