From f4b9fae02076edbcdf6a98435ab146de2d33b4b3 Mon Sep 17 00:00:00 2001 From: Castro0o Date: Wed, 4 Mar 2020 12:11:47 +0100 Subject: [PATCH] removing non-word chars from html filename; creator in index as string; cleaned old code form functions.py --- functions.py | 19 +------------------ query2html.py | 16 ++++------------ templates/index.html | 2 +- 3 files changed, 6 insertions(+), 31 deletions(-) diff --git a/functions.py b/functions.py index 29aac39..ce6d4a2 100644 --- a/functions.py +++ b/functions.py @@ -43,11 +43,10 @@ def unpack_response(response): printouts = response['printouts'] page = response['fulltext'] fullurl = response['fullurl'] + # convert OrderDict to Dict json.dumps json.loads printouts_dumps = json.dumps(printouts) printouts_loads = json.loads(printouts_dumps) printouts_loads['page'] = page - # printouts_loads['Date'] = datetime.fromtimestamp( - # int(printouts_loads['Date'][0]['timestamp'])) simplified_printouts = {} for k, v in printouts_loads.items(): if k == 'Date': @@ -61,22 +60,6 @@ def unpack_response(response): simplified_printouts[k] = [] for listitem in v: simplified_printouts[k].append(listitem['fulltext']) - - # for prop in printouts: - # p_item = response['printouts'][prop] - # for prop_val in p_item: - # if isinstance(prop_val, dict) is False: - # d[prop] = prop_val - # else: - # # if len(prop_val) > 0: - # props = list(prop_val.keys()) - # if 'fulltext' in props: - # val = prop_val.get('fulltext') - # elif 'timestamp' in props: - # val = datetime.fromtimestamp(int(prop_val.get('timestamp'))) - # else: - # val = list(prop_val.values())[0] - # d[prop] = val return page, simplified_printouts, fullurl diff --git a/query2html.py b/query2html.py index 9e234d9..457f5f5 100644 --- a/query2html.py +++ b/query2html.py @@ -2,7 +2,7 @@ import os, json, sys, urllib from mwclient import Site from pprint import pprint from jinja2 import Template -from functions import pandoc, page_props, unpack_response, clean_dir +from functions import pandoc, unpack_response, clean_dir, remove_nonwords from functions import Colors import argparse @@ -89,13 +89,7 @@ for answer in site.ask(query): print(Colors.WARNING, f"{printout_dict['page']} is not is missing from the local downloaded images") print(Colors.GREEN, 'run python3 download_imgs.py to fix the issue', Colors.ENDC) sys.exit() - # - # # TODO: EXTRACT PROPERTIES THROUGH THE FOLLOWING ASK QUERY - # ask_page_props = f'[[File:{printout_dict["page"]}]]|?Title|?Date|?Part|?Partof|?Creator|?Organization|?Format|?Event|?Topic|?Language' - # print(ask_page_props) - # page_props = site.ask(ask_page_props) - # print(page_props) - # import pdb; pdb.set_trace() + page = site.pages[[printout_dict['page']]] # request that page from wiki pagetext = page.text() pagetext_html = pandoc(pwd=wd, content=pagetext, format_in='mediawiki', format_out='html') @@ -112,15 +106,13 @@ for answer in site.ask(query): if printout_dict['Part'] == printout_dict['Partof']: # RENDER DOCUMENT # by passing all_document_parts html to document_template content - - # TODO: EXPAND PROPERTIES IN TEMPLATE - document_html = document_template.render( title=printout_dict.get('Title'), date=printout_dict.get('Date'), content=all_document_parts) # render document template htmlpage_fn = "{}.html".format( - printout_dict.get('Title')[0].replace(" ", "")) + remove_nonwords(printout_dict.get('Title')[0]) + ) with open(os.path.join(static_html, htmlpage_fn), 'w') as htmlfile: htmlfile.write(document_html) all_document_parts = '' # Reset all_document_parts diff --git a/templates/index.html b/templates/index.html index af2286e..efe90e2 100644 --- a/templates/index.html +++ b/templates/index.html @@ -10,7 +10,7 @@ {% for doc in documentslist %}
  • {{ doc['title'][0] }} {{ doc['date'].year }}.{{ doc['date'].month }}.{{ doc['date'].day }} - {{doc['creator']}} + {{doc['creator'] | join(", ")}}
  • {% endfor %}