removing non-word chars from html filename; creator in index as string; cleaned old code form functions.py

populat_orgs
Castro0o 5 years ago
parent 5ba753199b
commit f4b9fae020

@ -43,11 +43,10 @@ def unpack_response(response):
printouts = response['printouts'] printouts = response['printouts']
page = response['fulltext'] page = response['fulltext']
fullurl = response['fullurl'] fullurl = response['fullurl']
# convert OrderDict to Dict json.dumps json.loads
printouts_dumps = json.dumps(printouts) printouts_dumps = json.dumps(printouts)
printouts_loads = json.loads(printouts_dumps) printouts_loads = json.loads(printouts_dumps)
printouts_loads['page'] = page printouts_loads['page'] = page
# printouts_loads['Date'] = datetime.fromtimestamp(
# int(printouts_loads['Date'][0]['timestamp']))
simplified_printouts = {} simplified_printouts = {}
for k, v in printouts_loads.items(): for k, v in printouts_loads.items():
if k == 'Date': if k == 'Date':
@ -61,22 +60,6 @@ def unpack_response(response):
simplified_printouts[k] = [] simplified_printouts[k] = []
for listitem in v: for listitem in v:
simplified_printouts[k].append(listitem['fulltext']) simplified_printouts[k].append(listitem['fulltext'])
# for prop in printouts:
# p_item = response['printouts'][prop]
# for prop_val in p_item:
# if isinstance(prop_val, dict) is False:
# d[prop] = prop_val
# else:
# # if len(prop_val) > 0:
# props = list(prop_val.keys())
# if 'fulltext' in props:
# val = prop_val.get('fulltext')
# elif 'timestamp' in props:
# val = datetime.fromtimestamp(int(prop_val.get('timestamp')))
# else:
# val = list(prop_val.values())[0]
# d[prop] = val
return page, simplified_printouts, fullurl return page, simplified_printouts, fullurl

@ -2,7 +2,7 @@ import os, json, sys, urllib
from mwclient import Site from mwclient import Site
from pprint import pprint from pprint import pprint
from jinja2 import Template from jinja2 import Template
from functions import pandoc, page_props, unpack_response, clean_dir from functions import pandoc, unpack_response, clean_dir, remove_nonwords
from functions import Colors from functions import Colors
import argparse import argparse
@ -89,13 +89,7 @@ for answer in site.ask(query):
print(Colors.WARNING, f"{printout_dict['page']} is not is missing from the local downloaded images") print(Colors.WARNING, f"{printout_dict['page']} is not is missing from the local downloaded images")
print(Colors.GREEN, 'run python3 download_imgs.py to fix the issue', Colors.ENDC) print(Colors.GREEN, 'run python3 download_imgs.py to fix the issue', Colors.ENDC)
sys.exit() sys.exit()
#
# # TODO: EXTRACT PROPERTIES THROUGH THE FOLLOWING ASK QUERY
# ask_page_props = f'[[File:{printout_dict["page"]}]]|?Title|?Date|?Part|?Partof|?Creator|?Organization|?Format|?Event|?Topic|?Language'
# print(ask_page_props)
# page_props = site.ask(ask_page_props)
# print(page_props)
# import pdb; pdb.set_trace()
page = site.pages[[printout_dict['page']]] # request that page from wiki page = site.pages[[printout_dict['page']]] # request that page from wiki
pagetext = page.text() pagetext = page.text()
pagetext_html = pandoc(pwd=wd, content=pagetext, format_in='mediawiki', format_out='html') pagetext_html = pandoc(pwd=wd, content=pagetext, format_in='mediawiki', format_out='html')
@ -112,15 +106,13 @@ for answer in site.ask(query):
if printout_dict['Part'] == printout_dict['Partof']: if printout_dict['Part'] == printout_dict['Partof']:
# RENDER DOCUMENT # RENDER DOCUMENT
# by passing all_document_parts html to document_template content # by passing all_document_parts html to document_template content
# TODO: EXPAND PROPERTIES IN TEMPLATE
document_html = document_template.render( document_html = document_template.render(
title=printout_dict.get('Title'), title=printout_dict.get('Title'),
date=printout_dict.get('Date'), date=printout_dict.get('Date'),
content=all_document_parts) # render document template content=all_document_parts) # render document template
htmlpage_fn = "{}.html".format( htmlpage_fn = "{}.html".format(
printout_dict.get('Title')[0].replace(" ", "")) remove_nonwords(printout_dict.get('Title')[0])
)
with open(os.path.join(static_html, htmlpage_fn), 'w') as htmlfile: with open(os.path.join(static_html, htmlpage_fn), 'w') as htmlfile:
htmlfile.write(document_html) htmlfile.write(document_html)
all_document_parts = '' # Reset all_document_parts all_document_parts = '' # Reset all_document_parts

@ -10,7 +10,7 @@
{% for doc in documentslist %} {% for doc in documentslist %}
<li><a href="./{{ doc['file'] }}">{{ doc['title'][0] }}</a> <li><a href="./{{ doc['file'] }}">{{ doc['title'][0] }}</a>
{{ doc['date'].year }}.{{ doc['date'].month }}.{{ doc['date'].day }} {{ doc['date'].year }}.{{ doc['date'].month }}.{{ doc['date'].day }}
{{doc['creator']}} {{doc['creator'] | join(", ")}}
</li> </li>
{% endfor %} {% endfor %}
</ul> </ul>

Loading…
Cancel
Save