From 9b73ae25f515d4bbf82406d6540597a4fafdbc04 Mon Sep 17 00:00:00 2001 From: Castro0o Date: Sun, 9 Feb 2020 15:51:34 +0100 Subject: [PATCH] documentation --- README.md | 34 +++++++++++++++++++++++++--------- query2html.py | 26 +++++++++++++++----------- static/style.css | 4 +++- templates/index.html | 2 ++ 4 files changed, 45 insertions(+), 21 deletions(-) diff --git a/README.md b/README.md index 0c2430a..f07bb5f 100644 --- a/README.md +++ b/README.md @@ -49,16 +49,32 @@ Run scripts together with `./run.sh` * run dry `python3 query2html.py --dry` only printing request, not executing it * build custom query with arguments `--conditions --printouts --sort --order` * default query is: `[[File:+]][[Title::+]][[Part::+]][[Date::+]]|?Title|?Date|?Part|?Partof|sort=Date,Title,Part|order=asc,asc,asc` - * custom query `python3 query2html.py -c '[[Date::>=1970/01/01]][[Date::<=1979/12/31]]' -p '?Title|?Date|?Part|?Partof' -s 'Date,Title,Part' -o 'asc,asc,asc'` + * custom queries + * `python3 query2html.py --conditions '[[Date::>=1970/01/01]][[Date::<=1979/12/31]]'` + * `python3 query2html.py --conditions '[[Creator::~*task force*]]'` -* The results, with the same Title, are stored - * into 1 single HTML - * sorted by Part +Note: to avoid confusion or problems is better to leave the `--printouts` `--sort` `--order` arguments as the default. +Otherwise document parts will start to get grouped not according to their Title, hence creating documents made from different original parts. +## How does query2html.py work? + +Based on the query made: +MW API will send back a number of Page titles that match the query conditions, +together with its printouts (metadata proprety::value pairs). + +For each Page: +* its locally stored image is found +* its text retrieved from MW +* a fragment of html (`document_part_html`) is generated based on the `templates/document_part.html` + +All Pages that *share the same metadata's Title value*, will: +* gather all their html fragments in `all_document_parts` +* render `templates/document.html` with the content of `all_document_parts` +* save the render template to `'static_html/DocumentTitle.html'`, + +Each of the saved documents: +* render `templates/index.html` with the info on each document has been saved into `documentslist` +* resulting in `static_html/index.html` + -## TODO -* remove HTML files at each new query -* revise `def unpack_response()` so that it returns the values of all properties printed out -* revise template so that they include the values of all properties printed out \ -and do not break on missing values \ No newline at end of file diff --git a/query2html.py b/query2html.py index a115347..018eb9a 100644 --- a/query2html.py +++ b/query2html.py @@ -15,7 +15,7 @@ p.add_argument("--conditions", "-c", metavar='', default='[[File:+]][[Title::+]][[Part::+]][[Date::+]]', help='The query conditions') p.add_argument("--printouts", "-p", metavar='', - default='?Title|?Date|?Part|?Partof', + default='?Title|?Date|?Part|?Partof|?Creator', help='Selection of properties to printout') p.add_argument("--sort", "-s", metavar='', default='Date,Title,Part', @@ -89,7 +89,8 @@ for answer in site.ask(query): pagetext = page.text() pagetext_html = pandoc(pwd=wd, content=pagetext, format_in='mediawiki', format_out='html') img_local = os.path.join(imgdir, img_info.get('filename')) - # render html for that part of the document + + # RENDER document part document_part_html = document_part_template.render( printout_dict=printout_dict, imgsrc=os.path.join(imgdir, img_info.get('filename')), @@ -97,25 +98,28 @@ for answer in site.ask(query): fullurl=fullurl,) all_document_parts += document_part_html # append resulting html from document part to the previous parts - if printout_dict['Part'] == printout_dict['Partof']: # when Part == Partof - # pass all_document_parts html to document_template content + if printout_dict['Part'] == printout_dict['Partof']: + # RENDER DOCUMENT + # by passing all_document_parts html to document_template content document_html = document_template.render(title=printout_dict.get('Title'), date=printout_dict.get('Date'), content=all_document_parts) # render document template htmlpage_fn = "{}.html".format(printout_dict.get('Title').replace(" ", "")) + with open(os.path.join(static_html, htmlpage_fn), 'w') as htmlfile: + htmlfile.write(document_html) + all_document_parts = '' # Reset all_document_parts + # add info to documentslist for index creation + # TODO: possibly needs to be a SortedDict documentslist.append({'file': htmlpage_fn, 'title': printout_dict.get('Title'), - 'date': printout_dict.get('Date') + 'date': printout_dict.get('Date'), + 'creator': printout_dict.get('Creator') }) - # print(documentslist) - - with open(os.path.join(static_html, htmlpage_fn), 'w') as htmlfile: - htmlfile.write(document_html) - - all_document_parts = '' # Reset all_document_parts +# RENDER index.html from documentslist index_html = index_template.render(index='Index', + query=query, documentslist=documentslist) with open(os.path.join(static_html, 'index.html'), 'w') as htmlfile: htmlfile.write(index_html) diff --git a/static/style.css b/static/style.css index 92c607e..27fbd59 100644 --- a/static/style.css +++ b/static/style.css @@ -9,4 +9,6 @@ div#content img {width: 50%;} div.metadata span.key {color: red; - font-weight: bold;} \ No newline at end of file + font-weight: bold;} + +#orc {color:blue;} \ No newline at end of file diff --git a/templates/index.html b/templates/index.html index b08310d..55628bc 100644 --- a/templates/index.html +++ b/templates/index.html @@ -5,10 +5,12 @@ {{title}} +

Results from query:
{{query}}