From 94116713328c4fccb24b309437b01fc4e57f69d2 Mon Sep 17 00:00:00 2001
From: Castro0o <andre@andrecastro>
Date: Tue, 3 Mar 2020 14:07:12 +0100
Subject: [PATCH 1/6] removing deprecated images2html.py

---
 images2html.py | 76 --------------------------------------------------
 1 file changed, 76 deletions(-)
 delete mode 100644 images2html.py
diff --git a/images2html.py b/images2html.py
deleted file mode 100644
index b6de07a..0000000
--- a/images2html.py
+++ /dev/null
@@ -1,76 +0,0 @@
-import os, json
-from mwclient import Site
-from pprint import pprint
-from jinja2 import Template
-from functions import pandoc, page_props
-
-site = Site(host='hub.xpub.nl/sandbox', path='/itchwiki/')
-wd = os.path.dirname(os.path.abspath(__file__)) # working directory
-imgdir = os.path.join(wd, 'images')
-imgsjson_fn = os.path.join(wd, 'images.json')
-with open(imgsjson_fn, 'r') as imgsjson_file:
-    images_info = json.load(imgsjson_file)
-
-static_html = os.path.join(wd, 'static_html')
-os.makedirs(static_html, exist_ok=True) # create images/ dir
-
-with open(os.path.join(wd, 'login.txt'), 'r') as login:  # read login user & pwd
-    loginlines = login.read()
-    user, pwd = loginlines.split('\n')
-    site.login(username=user, password=pwd)  # login to wiki
-
-
-page_html_template = '''
-<!DOCTYPE html>
-<html lang="en">
-<head>
-    <meta charset="utf-8">
-    <link rel="stylesheet" href="../static/style.css" />
-    <title>{{title}}</title>
-</head>
-<body>
-    <h1>{{ title }}</h1>
-    <p><time datetime="{{date}}">{{date}}</time></p>
-    <div id="img">
-        <img src="{{ imgsrc }}" />
-    </div>
-    <div id="content">
-        {{ content }}
-    </div>
-    <footer>
-        Part {{part}} of {{partof}}
-    </footer>
-</body>
-</html>
-'''
-page_template = Template(page_html_template)
-
-
-for img_info in images_info.values():
-    print(img_info)
-    page_name = img_info['name']
-    page = site.pages[page_name]
-    # print(page)
-    # pprint(page.__dict__)
-    # print(dir(page))
-    pagetext = page.text()
-    pageproperties = page_props(wikicontent=pagetext)
-    print(pageproperties)
-
-    if pageproperties.get('Title'):
-        pagetext_html = pandoc(pwd=wd ,content=pagetext, format_in='mediawiki', format_out='html')
-        # print('pagetext', pagetext)
-        # print('pagetext_html', pagetext_html)
-        page_html = page_template.render(title=pageproperties.get('Title'),
-                                         date=pageproperties.get('Date'),
-                                         imgsrc=os.path.join(imgdir, img_info.get('filename')),
-                                         content=pagetext_html,
-                                         part=pageproperties.get('Part'),
-                                         partof=pageproperties.get('Partof'))
-        htmlpage_fn = "{}_{}.html".format(
-            pageproperties.get('Title').replace(" ", ""),
-            pageproperties.get('Part').zfill(3)
-        )
-        print(htmlpage_fn)
-        with open(os.path.join(static_html, htmlpage_fn), 'w') as htmlfile:
-            htmlfile.write(page_html)

From b441df8ba0d95bc934297187199d4998537779ab Mon Sep 17 00:00:00 2001
From: Castro0o <andre@andrecastro>
Date: Tue, 3 Mar 2020 16:55:57 +0100
Subject: [PATCH 2/6] downloading images with clean filenames and
 resizing:jpg,jpeg,png

---
 download_imgs.py | 32 ++++++++++++++++++--------------
 1 file changed, 18 insertions(+), 14 deletions(-)

diff --git a/download_imgs.py b/download_imgs.py
index 5ba671a..a8af106 100644
--- a/download_imgs.py
+++ b/download_imgs.py
@@ -1,8 +1,9 @@
 import os
 from mwclient import Site
 from pprint import pprint
-from functions import update_json
 from PIL import Image
+from functions import update_json, remove_nonwords
+
 
 site = Site(host='hub.xpub.nl/sandbox', path='/itchwiki/')
 wd = os.path.dirname(os.path.abspath(__file__)) # working directory
@@ -28,7 +29,7 @@ for img in site.allimages():
     # important img info to dictionary
     img_dict = {
         'name': img.name,
-        'filename': img.page_title,
+        'filename': remove_nonwords(img.page_title),
         'timestamp': img.imageinfo['timestamp'],
         'url': img.imageinfo['url'],
         'urldesc': img.imageinfo['descriptionshorturl'],
@@ -37,6 +38,7 @@ for img in site.allimages():
 
     # location of image storage
     img_fn = os.path.join(imgdir, img_dict['filename'])
+    print(img_fn)
     # function updates images.json and returns whether the img needs to be downloaded or not
     download = update_json(imgsjson_fn, img_dict, img_fn)
 
@@ -47,16 +49,18 @@ for img in site.allimages():
             img.download(destination=img_file)
 
         # resize image
-        pilimg = Image.open(img_fn)
-        pilimg_dim = list(pilimg._size)
-        pilimg_dim_sort = sorted(pilimg_dim) # smallest dimension 1st
-        img_ratio = pilimg_dim_sort[0] / pilimg_dim_sort[1]
-        if pilimg_dim == pilimg_dim_sort:
-            # if height was largest
-            new_dim = [(thumbnail_size * img_ratio), thumbnail_size]
-        else:
-            # if with was largest
-            new_dim = [thumbnail_size,(thumbnail_size * img_ratio)]
-        pilimg.thumbnail(new_dim)
-        pilimg.save(img_fn)
+        fn, ext = os.path.splitext(img_fn)
+        if ext.lower() in ['.jpg', '.jpeg', '.gif', '.png']:  # only img format
+            pilimg = Image.open(img_fn)
+            pilimg_dim = list(pilimg._size)
+            pilimg_dim_sort = sorted(pilimg_dim) # smallest dimension 1st
+            img_ratio = pilimg_dim_sort[0] / pilimg_dim_sort[1]
+            if pilimg_dim == pilimg_dim_sort:
+                # if height was largest
+                new_dim = [(thumbnail_size * img_ratio), thumbnail_size]
+            else:
+                # if with was largest
+                new_dim = [thumbnail_size,(thumbnail_size * img_ratio)]
+            pilimg.thumbnail(new_dim)
+            pilimg.save(img_fn)
     print('\n')

From 69fc3f2ec5dbaff0606febb714d751e4c194b0ab Mon Sep 17 00:00:00 2001
From: Castro0o <andre@andrecastro>
Date: Wed, 4 Mar 2020 08:53:52 +0100
Subject: [PATCH 3/6] remove non words

---
 functions.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/functions.py b/functions.py
index 4c6b438..5414223 100644
--- a/functions.py
+++ b/functions.py
@@ -3,6 +3,12 @@ import subprocess
 from datetime import datetime
 
 
+def remove_nonwords(imgname):
+    filename, ext = os.path.splitext(imgname)  # split into filename & extension
+    filename = re.sub(r'\W', '', filename)  # remove nonwoders from filename
+    return f'{filename}{ext}'  # join filename & ext'
+
+
 def pandoc(pwd, content, format_in, format_out):
     # print('HTML content file:', wiki_content_f.name)
 
@@ -95,6 +101,7 @@ def clean_dir(dirfullpath):
             os.remove(f)
 
 def print_colormsg(msg, level):
+    color_cmd = ''
     if level == 'fail':
         color_cmd = Colors.FAIL
     elif level == 'warning':

From 0d9ed8a2d4113c70546746f75d9ce0485dca3b1d Mon Sep 17 00:00:00 2001
From: Castro0o <andre@andrecastro>
Date: Wed, 4 Mar 2020 11:28:20 +0100
Subject: [PATCH 4/6] WIP:multi values properties

---
 functions.py                 | 52 ++++++++++++++++++++++++------------
 query2html.py                | 30 ++++++++++++++++-----
 templates/document.html      |  4 +--
 templates/document_part.html | 25 +++++++++++++----
 templates/index.html         |  2 +-
 5 files changed, 81 insertions(+), 32 deletions(-)

diff --git a/functions.py b/functions.py
index 5414223..29aac39 100644
--- a/functions.py
+++ b/functions.py
@@ -43,23 +43,41 @@ def unpack_response(response):
     printouts = response['printouts']
     page = response['fulltext']
     fullurl = response['fullurl']
-    d['page'] = page
-    for prop in printouts:
-        p_item = response['printouts'][prop]
-        for prop_val in p_item:
-            if isinstance(prop_val, dict) is False:
-                d[prop] = prop_val
-            else:
-                # if len(prop_val) > 0:
-                props = list(prop_val.keys())
-                if 'fulltext' in props:
-                    val = prop_val.get('fulltext')
-                elif 'timestamp' in props:
-                    val = datetime.fromtimestamp(int(prop_val.get('timestamp')))
-                else:
-                    val = list(prop_val.values())[0]
-                d[prop] = val
-    return page, d, fullurl
+    printouts_dumps = json.dumps(printouts)
+    printouts_loads = json.loads(printouts_dumps)
+    printouts_loads['page'] = page
+    # printouts_loads['Date'] = datetime.fromtimestamp(
+    #     int(printouts_loads['Date'][0]['timestamp']))
+    simplified_printouts = {}
+    for k, v in printouts_loads.items():
+        if k == 'Date':
+            simplified_printouts[k] = datetime.fromtimestamp(
+                int(v[0]['timestamp']))
+        # elif k == 'Title':
+        #     simplified_printouts[k] = v[0]['fulltext']
+        elif k in ['Part', 'Partof', 'page']:  # only 1 value for each
+            simplified_printouts[k] = v
+        else: # Possibly more than 1 value for the rest of properties
+            simplified_printouts[k] = []
+            for listitem in v:
+                simplified_printouts[k].append(listitem['fulltext'])
+
+    # for prop in printouts:
+    #     p_item = response['printouts'][prop]
+    #     for prop_val in p_item:
+    #         if isinstance(prop_val, dict) is False:
+    #             d[prop] = prop_val
+    #         else:
+    #             # if len(prop_val) > 0:
+    #             props = list(prop_val.keys())
+    #             if 'fulltext' in props:
+    #                 val = prop_val.get('fulltext')
+    #             elif 'timestamp' in props:
+    #                 val = datetime.fromtimestamp(int(prop_val.get('timestamp')))
+    #             else:
+    #                 val = list(prop_val.values())[0]
+    #             d[prop] = val
+    return page, simplified_printouts, fullurl
 
 
 def update_json(imgsjson_fn, img_dict, img_fn):
diff --git a/query2html.py b/query2html.py
index 018eb9a..9e234d9 100644
--- a/query2html.py
+++ b/query2html.py
@@ -15,7 +15,7 @@ p.add_argument("--conditions", "-c", metavar='',
                default='[[File:+]][[Title::+]][[Part::+]][[Date::+]]',
                help='The query conditions')
 p.add_argument("--printouts", "-p", metavar='',
-               default='?Title|?Date|?Part|?Partof|?Creator',
+               default='?Title|?Date|?Part|?Partof|?Creator|?Organization|?Format|?Event|?Topic|?Language',
                help='Selection of properties to printout')
 p.add_argument("--sort", "-s", metavar='',
                default='Date,Title,Part',
@@ -23,6 +23,8 @@ p.add_argument("--sort", "-s", metavar='',
 p.add_argument("--order", "-o", metavar='', 
                default='asc,asc,asc',
                help='Order of sorting conditions. Should same amount as the --sort properties')
+p.add_argument('--limit', '-l', help='(optional) Limit the number of returned '
+                                     'items')
 p.add_argument('--dry', '-d', action='store_true',
                help='dry-run: will only show the query but not run it')
 
@@ -33,8 +35,10 @@ if len(args.sort.split(',')) != len(args.order.split(',')):
           Colors.WARNING, '--sort and --order do not have the same amount of elements', Colors.ENDC)
     print('Script exiting now')
     sys.exit()
-
 query = f'{args.conditions}|{args.printouts}|sort={args.sort}|order={args.order}'
+if args.limit:
+    limit_str = f'|limit={args.limit}'
+    query += limit_str
 print('query:', Colors.GREEN, query, Colors.ENDC)
 query_unquoted = urllib.parse.quote(query)
 query_url = f'https://{args.host}{args.path}api.php?action=ask&query={query_unquoted}&format=json'
@@ -75,7 +79,7 @@ with open(os.path.join(wd, 'templates/document_part.html')) as document_html:
 all_document_parts = ''  # to append all content
 documentslist = []
 for answer in site.ask(query):
-    publication_title = ''
+    # publication_title = ''
     # print(answer, answer.keys())
     page, printout_dict, fullurl = unpack_response(answer)
     print(page)
@@ -85,6 +89,13 @@ for answer in site.ask(query):
         print(Colors.WARNING, f"{printout_dict['page']} is not is missing from the local downloaded images")
         print(Colors.GREEN, 'run python3 download_imgs.py to fix the issue', Colors.ENDC)
         sys.exit()
+    #
+    # # TODO: EXTRACT PROPERTIES THROUGH THE FOLLOWING ASK QUERY
+    # ask_page_props = f'[[File:{printout_dict["page"]}]]|?Title|?Date|?Part|?Partof|?Creator|?Organization|?Format|?Event|?Topic|?Language'
+    # print(ask_page_props)
+    # page_props = site.ask(ask_page_props)
+    # print(page_props)
+    # import pdb; pdb.set_trace()
     page = site.pages[[printout_dict['page']]]  # request that page from wiki
     pagetext = page.text()
     pagetext_html = pandoc(pwd=wd, content=pagetext, format_in='mediawiki', format_out='html')
@@ -101,10 +112,15 @@ for answer in site.ask(query):
     if printout_dict['Part'] == printout_dict['Partof']:
         # RENDER DOCUMENT
         # by passing all_document_parts html to document_template content
-        document_html = document_template.render(title=printout_dict.get('Title'),
-                                       date=printout_dict.get('Date'),
-                                       content=all_document_parts)  # render document template
-        htmlpage_fn = "{}.html".format(printout_dict.get('Title').replace(" ", ""))
+        
+        # TODO: EXPAND PROPERTIES IN TEMPLATE
+        
+        document_html = document_template.render(
+            title=printout_dict.get('Title'),
+            date=printout_dict.get('Date'),
+            content=all_document_parts)  # render document template
+        htmlpage_fn = "{}.html".format(
+            printout_dict.get('Title')[0].replace(" ", ""))
         with open(os.path.join(static_html, htmlpage_fn), 'w') as htmlfile:
             htmlfile.write(document_html)
         all_document_parts = ''  # Reset all_document_parts
diff --git a/templates/document.html b/templates/document.html
index 4748c77..72687fd 100644
--- a/templates/document.html
+++ b/templates/document.html
@@ -3,10 +3,10 @@
 <head>
     <meta charset="utf-8">
     <link rel="stylesheet" href="../static/style.css" />
-    <title>{{title}}</title>
+    <title>{{ title[0] }}</title>
 </head>
 <body>
-    <h1>{{ title }}</h1>
+    <h1>{{ title[0] }}</h1>
     <p><time datetime="{{date}}">{{ date.year }}.{{ date.month }}.{{ date.day }}   </time></p>
     <div id="content">
         {{ content }}
diff --git a/templates/document_part.html b/templates/document_part.html
index c435c5a..feb365a 100644
--- a/templates/document_part.html
+++ b/templates/document_part.html
@@ -11,17 +11,32 @@
 
     <div class="metadata">
         <h3>Metadata</h3>
-        {% for key, value in  printout_dict.items() %}
+        {% for key, valuelist in  printout_dict.items() %}
             {% if key == 'Date' %}
                 <div class="metadata_{{key}}">
                     <span class="key">{{key}}</span>
-                    <span class="value">{{value.year}} {{value.month}} {{value.day}}</span>
+                    <span class="value">{{valuelist.year}} {{valuelist.month}} {{valuelist.day}}</span>
+                    {# Note: in Date valuelist is not a list but a dateitme object #}
                 </div>
-            {% else %}
+
+
                 <div class="metadata_{{key}}">
-                    <span class="key">{{key|upper}}</span>
-                    <span class="value">{{value}}</span>
+                    {% if key == 'Date' %}
+                        <span class="key">{{key}}</span>
+                        <span class="value">{{valuelist.year}} {{valuelist.month}} {{valuelist.day}}</span>
+                    {% elif key == 'page' %}
+                        <span class="key">{{key|upper}}</span>
+                        <span class="key">{{valuelist}}</span>
+                    {% else %}
+                        <span class="key">{{key|upper}}</span>
+                        {% for val in valuelist %}
+                        <span class="key">{{key|upper}}</span>
+                        {% endfor %}
+                    {% endif %}
                 </div>
+
+                {# TODO: resolve all these metadata items #}
+
             {% endif %}
         {% endfor %}
     </div>
diff --git a/templates/index.html b/templates/index.html
index 55628bc..af2286e 100644
--- a/templates/index.html
+++ b/templates/index.html
@@ -8,7 +8,7 @@
 <h3>Results from query:<br/><code>{{query}}</code></h3>
     <ul>
     {% for doc in documentslist %}
-        <li><a href="./{{ doc['file'] }}">{{ doc['title'] }}</a>
+        <li><a href="./{{ doc['file'] }}">{{ doc['title'][0] }}</a>
        {{ doc['date'].year }}.{{ doc['date'].month }}.{{ doc['date'].day }}
         {{doc['creator']}}
         </li>

From 5ba753199bc41baca73c0ffe64a52b74d0f08a87 Mon Sep 17 00:00:00 2001
From: Castro0o <andre@andrecastro>
Date: Wed, 4 Mar 2020 11:56:15 +0100
Subject: [PATCH 5/6] correct document_part template

---
 templates/document_part.html | 33 ++++++++++-----------------------
 1 file changed, 10 insertions(+), 23 deletions(-)

diff --git a/templates/document_part.html b/templates/document_part.html
index feb365a..18bcebc 100644
--- a/templates/document_part.html
+++ b/templates/document_part.html
@@ -12,33 +12,20 @@
     <div class="metadata">
         <h3>Metadata</h3>
         {% for key, valuelist in  printout_dict.items() %}
-            {% if key == 'Date' %}
-                <div class="metadata_{{key}}">
+            <div class="metadata_{{key}}">
+                {% if key == 'Date' %}
                     <span class="key">{{key}}</span>
                     <span class="value">{{valuelist.year}} {{valuelist.month}} {{valuelist.day}}</span>
-                    {# Note: in Date valuelist is not a list but a dateitme object #}
-                </div>
-
-
-                <div class="metadata_{{key}}">
-                    {% if key == 'Date' %}
-                        <span class="key">{{key}}</span>
-                        <span class="value">{{valuelist.year}} {{valuelist.month}} {{valuelist.day}}</span>
-                    {% elif key == 'page' %}
-                        <span class="key">{{key|upper}}</span>
-                        <span class="key">{{valuelist}}</span>
-                    {% else %}
-                        <span class="key">{{key|upper}}</span>
-                        {% for val in valuelist %}
+                {% elif key == 'page' %}
+                    <span class="key">{{key|upper}}</span>
+                    <span class="value">{{valuelist}}</span>
+                {% else %}
+                    {% if valuelist|length > 0 %}
                         <span class="key">{{key|upper}}</span>
-                        {% endfor %}
+                        <span class="value">{{valuelist | join(", ")}}</span>
                     {% endif %}
-                </div>
-
-                {# TODO: resolve all these metadata items #}
-
-            {% endif %}
+                {% endif %}
+            </div>
         {% endfor %}
     </div>
-
 </div>

From f4b9fae02076edbcdf6a98435ab146de2d33b4b3 Mon Sep 17 00:00:00 2001
From: Castro0o <andre@andrecastro>
Date: Wed, 4 Mar 2020 12:11:47 +0100
Subject: [PATCH 6/6] removing non-word chars from html filename; creator in
 index as string; cleaned old code form functions.py

---
 functions.py         | 19 +------------------
 query2html.py        | 16 ++++------------
 templates/index.html |  2 +-
 3 files changed, 6 insertions(+), 31 deletions(-)

diff --git a/functions.py b/functions.py
index 29aac39..ce6d4a2 100644
--- a/functions.py
+++ b/functions.py
@@ -43,11 +43,10 @@ def unpack_response(response):
     printouts = response['printouts']
     page = response['fulltext']
     fullurl = response['fullurl']
+    # convert OrderDict to Dict json.dumps json.loads
     printouts_dumps = json.dumps(printouts)
     printouts_loads = json.loads(printouts_dumps)
     printouts_loads['page'] = page
-    # printouts_loads['Date'] = datetime.fromtimestamp(
-    #     int(printouts_loads['Date'][0]['timestamp']))
     simplified_printouts = {}
     for k, v in printouts_loads.items():
         if k == 'Date':
@@ -61,22 +60,6 @@ def unpack_response(response):
             simplified_printouts[k] = []
             for listitem in v:
                 simplified_printouts[k].append(listitem['fulltext'])
-
-    # for prop in printouts:
-    #     p_item = response['printouts'][prop]
-    #     for prop_val in p_item:
-    #         if isinstance(prop_val, dict) is False:
-    #             d[prop] = prop_val
-    #         else:
-    #             # if len(prop_val) > 0:
-    #             props = list(prop_val.keys())
-    #             if 'fulltext' in props:
-    #                 val = prop_val.get('fulltext')
-    #             elif 'timestamp' in props:
-    #                 val = datetime.fromtimestamp(int(prop_val.get('timestamp')))
-    #             else:
-    #                 val = list(prop_val.values())[0]
-    #             d[prop] = val
     return page, simplified_printouts, fullurl
 
 
diff --git a/query2html.py b/query2html.py
index 9e234d9..457f5f5 100644
--- a/query2html.py
+++ b/query2html.py
@@ -2,7 +2,7 @@ import os, json, sys, urllib
 from mwclient import Site
 from pprint import pprint
 from jinja2 import Template
-from functions import pandoc, page_props, unpack_response, clean_dir
+from functions import pandoc, unpack_response, clean_dir, remove_nonwords
 
 from functions import Colors
 import argparse
@@ -89,13 +89,7 @@ for answer in site.ask(query):
         print(Colors.WARNING, f"{printout_dict['page']} is not is missing from the local downloaded images")
         print(Colors.GREEN, 'run python3 download_imgs.py to fix the issue', Colors.ENDC)
         sys.exit()
-    #
-    # # TODO: EXTRACT PROPERTIES THROUGH THE FOLLOWING ASK QUERY
-    # ask_page_props = f'[[File:{printout_dict["page"]}]]|?Title|?Date|?Part|?Partof|?Creator|?Organization|?Format|?Event|?Topic|?Language'
-    # print(ask_page_props)
-    # page_props = site.ask(ask_page_props)
-    # print(page_props)
-    # import pdb; pdb.set_trace()
+
     page = site.pages[[printout_dict['page']]]  # request that page from wiki
     pagetext = page.text()
     pagetext_html = pandoc(pwd=wd, content=pagetext, format_in='mediawiki', format_out='html')
@@ -112,15 +106,13 @@ for answer in site.ask(query):
     if printout_dict['Part'] == printout_dict['Partof']:
         # RENDER DOCUMENT
         # by passing all_document_parts html to document_template content
-        
-        # TODO: EXPAND PROPERTIES IN TEMPLATE
-        
         document_html = document_template.render(
             title=printout_dict.get('Title'),
             date=printout_dict.get('Date'),
             content=all_document_parts)  # render document template
         htmlpage_fn = "{}.html".format(
-            printout_dict.get('Title')[0].replace(" ", ""))
+            remove_nonwords(printout_dict.get('Title')[0])
+        )
         with open(os.path.join(static_html, htmlpage_fn), 'w') as htmlfile:
             htmlfile.write(document_html)
         all_document_parts = ''  # Reset all_document_parts
diff --git a/templates/index.html b/templates/index.html
index af2286e..efe90e2 100644
--- a/templates/index.html
+++ b/templates/index.html
@@ -10,7 +10,7 @@
     {% for doc in documentslist %}
         <li><a href="./{{ doc['file'] }}">{{ doc['title'][0] }}</a>
        {{ doc['date'].year }}.{{ doc['date'].month }}.{{ doc['date'].day }}
-        {{doc['creator']}}
+        {{doc['creator'] | join(", ")}}
         </li>
     {% endfor %}
     </ul>