diff --git a/download_imgs.py b/download_imgs.py
index 5ba671a..a8af106 100644
--- a/download_imgs.py
+++ b/download_imgs.py
@@ -1,8 +1,9 @@
import os
from mwclient import Site
from pprint import pprint
-from functions import update_json
from PIL import Image
+from functions import update_json, remove_nonwords
+
site = Site(host='hub.xpub.nl/sandbox', path='/itchwiki/')
wd = os.path.dirname(os.path.abspath(__file__)) # working directory
@@ -28,7 +29,7 @@ for img in site.allimages():
# important img info to dictionary
img_dict = {
'name': img.name,
- 'filename': img.page_title,
+ 'filename': remove_nonwords(img.page_title),
'timestamp': img.imageinfo['timestamp'],
'url': img.imageinfo['url'],
'urldesc': img.imageinfo['descriptionshorturl'],
@@ -37,6 +38,7 @@ for img in site.allimages():
# location of image storage
img_fn = os.path.join(imgdir, img_dict['filename'])
+ print(img_fn)
# function updates images.json and returns whether the img needs to be downloaded or not
download = update_json(imgsjson_fn, img_dict, img_fn)
@@ -47,16 +49,18 @@ for img in site.allimages():
img.download(destination=img_file)
# resize image
- pilimg = Image.open(img_fn)
- pilimg_dim = list(pilimg._size)
- pilimg_dim_sort = sorted(pilimg_dim) # smallest dimension 1st
- img_ratio = pilimg_dim_sort[0] / pilimg_dim_sort[1]
- if pilimg_dim == pilimg_dim_sort:
- # if height was largest
- new_dim = [(thumbnail_size * img_ratio), thumbnail_size]
- else:
- # if with was largest
- new_dim = [thumbnail_size,(thumbnail_size * img_ratio)]
- pilimg.thumbnail(new_dim)
- pilimg.save(img_fn)
+ fn, ext = os.path.splitext(img_fn)
+ if ext.lower() in ['.jpg', '.jpeg', '.gif', '.png']: # only img format
+ pilimg = Image.open(img_fn)
+ pilimg_dim = list(pilimg._size)
+ pilimg_dim_sort = sorted(pilimg_dim) # smallest dimension 1st
+ img_ratio = pilimg_dim_sort[0] / pilimg_dim_sort[1]
+ if pilimg_dim == pilimg_dim_sort:
+ # if height was largest
+ new_dim = [(thumbnail_size * img_ratio), thumbnail_size]
+ else:
+ # if with was largest
+ new_dim = [thumbnail_size,(thumbnail_size * img_ratio)]
+ pilimg.thumbnail(new_dim)
+ pilimg.save(img_fn)
print('\n')
diff --git a/functions.py b/functions.py
index 4c6b438..ce6d4a2 100644
--- a/functions.py
+++ b/functions.py
@@ -3,6 +3,12 @@ import subprocess
from datetime import datetime
+def remove_nonwords(imgname):
+ filename, ext = os.path.splitext(imgname) # split into filename & extension
+ filename = re.sub(r'\W', '', filename) # remove nonwoders from filename
+ return f'{filename}{ext}' # join filename & ext'
+
+
def pandoc(pwd, content, format_in, format_out):
# print('HTML content file:', wiki_content_f.name)
@@ -37,23 +43,24 @@ def unpack_response(response):
printouts = response['printouts']
page = response['fulltext']
fullurl = response['fullurl']
- d['page'] = page
- for prop in printouts:
- p_item = response['printouts'][prop]
- for prop_val in p_item:
- if isinstance(prop_val, dict) is False:
- d[prop] = prop_val
- else:
- # if len(prop_val) > 0:
- props = list(prop_val.keys())
- if 'fulltext' in props:
- val = prop_val.get('fulltext')
- elif 'timestamp' in props:
- val = datetime.fromtimestamp(int(prop_val.get('timestamp')))
- else:
- val = list(prop_val.values())[0]
- d[prop] = val
- return page, d, fullurl
+ # convert OrderDict to Dict json.dumps json.loads
+ printouts_dumps = json.dumps(printouts)
+ printouts_loads = json.loads(printouts_dumps)
+ printouts_loads['page'] = page
+ simplified_printouts = {}
+ for k, v in printouts_loads.items():
+ if k == 'Date':
+ simplified_printouts[k] = datetime.fromtimestamp(
+ int(v[0]['timestamp']))
+ # elif k == 'Title':
+ # simplified_printouts[k] = v[0]['fulltext']
+ elif k in ['Part', 'Partof', 'page']: # only 1 value for each
+ simplified_printouts[k] = v
+ else: # Possibly more than 1 value for the rest of properties
+ simplified_printouts[k] = []
+ for listitem in v:
+ simplified_printouts[k].append(listitem['fulltext'])
+ return page, simplified_printouts, fullurl
def update_json(imgsjson_fn, img_dict, img_fn):
@@ -95,6 +102,7 @@ def clean_dir(dirfullpath):
os.remove(f)
def print_colormsg(msg, level):
+ color_cmd = ''
if level == 'fail':
color_cmd = Colors.FAIL
elif level == 'warning':
diff --git a/images2html.py b/images2html.py
deleted file mode 100644
index b6de07a..0000000
--- a/images2html.py
+++ /dev/null
@@ -1,76 +0,0 @@
-import os, json
-from mwclient import Site
-from pprint import pprint
-from jinja2 import Template
-from functions import pandoc, page_props
-
-site = Site(host='hub.xpub.nl/sandbox', path='/itchwiki/')
-wd = os.path.dirname(os.path.abspath(__file__)) # working directory
-imgdir = os.path.join(wd, 'images')
-imgsjson_fn = os.path.join(wd, 'images.json')
-with open(imgsjson_fn, 'r') as imgsjson_file:
- images_info = json.load(imgsjson_file)
-
-static_html = os.path.join(wd, 'static_html')
-os.makedirs(static_html, exist_ok=True) # create images/ dir
-
-with open(os.path.join(wd, 'login.txt'), 'r') as login: # read login user & pwd
- loginlines = login.read()
- user, pwd = loginlines.split('\n')
- site.login(username=user, password=pwd) # login to wiki
-
-
-page_html_template = '''
-
-
-
-
-
- {{title}}
-
-
- {{ title }}
-
-
-
-
-
- {{ content }}
-
-
-
-
-'''
-page_template = Template(page_html_template)
-
-
-for img_info in images_info.values():
- print(img_info)
- page_name = img_info['name']
- page = site.pages[page_name]
- # print(page)
- # pprint(page.__dict__)
- # print(dir(page))
- pagetext = page.text()
- pageproperties = page_props(wikicontent=pagetext)
- print(pageproperties)
-
- if pageproperties.get('Title'):
- pagetext_html = pandoc(pwd=wd ,content=pagetext, format_in='mediawiki', format_out='html')
- # print('pagetext', pagetext)
- # print('pagetext_html', pagetext_html)
- page_html = page_template.render(title=pageproperties.get('Title'),
- date=pageproperties.get('Date'),
- imgsrc=os.path.join(imgdir, img_info.get('filename')),
- content=pagetext_html,
- part=pageproperties.get('Part'),
- partof=pageproperties.get('Partof'))
- htmlpage_fn = "{}_{}.html".format(
- pageproperties.get('Title').replace(" ", ""),
- pageproperties.get('Part').zfill(3)
- )
- print(htmlpage_fn)
- with open(os.path.join(static_html, htmlpage_fn), 'w') as htmlfile:
- htmlfile.write(page_html)
diff --git a/query2html.py b/query2html.py
index 018eb9a..457f5f5 100644
--- a/query2html.py
+++ b/query2html.py
@@ -2,7 +2,7 @@ import os, json, sys, urllib
from mwclient import Site
from pprint import pprint
from jinja2 import Template
-from functions import pandoc, page_props, unpack_response, clean_dir
+from functions import pandoc, unpack_response, clean_dir, remove_nonwords
from functions import Colors
import argparse
@@ -15,7 +15,7 @@ p.add_argument("--conditions", "-c", metavar='',
default='[[File:+]][[Title::+]][[Part::+]][[Date::+]]',
help='The query conditions')
p.add_argument("--printouts", "-p", metavar='',
- default='?Title|?Date|?Part|?Partof|?Creator',
+ default='?Title|?Date|?Part|?Partof|?Creator|?Organization|?Format|?Event|?Topic|?Language',
help='Selection of properties to printout')
p.add_argument("--sort", "-s", metavar='',
default='Date,Title,Part',
@@ -23,6 +23,8 @@ p.add_argument("--sort", "-s", metavar='',
p.add_argument("--order", "-o", metavar='',
default='asc,asc,asc',
help='Order of sorting conditions. Should same amount as the --sort properties')
+p.add_argument('--limit', '-l', help='(optional) Limit the number of returned '
+ 'items')
p.add_argument('--dry', '-d', action='store_true',
help='dry-run: will only show the query but not run it')
@@ -33,8 +35,10 @@ if len(args.sort.split(',')) != len(args.order.split(',')):
Colors.WARNING, '--sort and --order do not have the same amount of elements', Colors.ENDC)
print('Script exiting now')
sys.exit()
-
query = f'{args.conditions}|{args.printouts}|sort={args.sort}|order={args.order}'
+if args.limit:
+ limit_str = f'|limit={args.limit}'
+ query += limit_str
print('query:', Colors.GREEN, query, Colors.ENDC)
query_unquoted = urllib.parse.quote(query)
query_url = f'https://{args.host}{args.path}api.php?action=ask&query={query_unquoted}&format=json'
@@ -75,7 +79,7 @@ with open(os.path.join(wd, 'templates/document_part.html')) as document_html:
all_document_parts = '' # to append all content
documentslist = []
for answer in site.ask(query):
- publication_title = ''
+ # publication_title = ''
# print(answer, answer.keys())
page, printout_dict, fullurl = unpack_response(answer)
print(page)
@@ -85,6 +89,7 @@ for answer in site.ask(query):
print(Colors.WARNING, f"{printout_dict['page']} is not is missing from the local downloaded images")
print(Colors.GREEN, 'run python3 download_imgs.py to fix the issue', Colors.ENDC)
sys.exit()
+
page = site.pages[[printout_dict['page']]] # request that page from wiki
pagetext = page.text()
pagetext_html = pandoc(pwd=wd, content=pagetext, format_in='mediawiki', format_out='html')
@@ -101,10 +106,13 @@ for answer in site.ask(query):
if printout_dict['Part'] == printout_dict['Partof']:
# RENDER DOCUMENT
# by passing all_document_parts html to document_template content
- document_html = document_template.render(title=printout_dict.get('Title'),
- date=printout_dict.get('Date'),
- content=all_document_parts) # render document template
- htmlpage_fn = "{}.html".format(printout_dict.get('Title').replace(" ", ""))
+ document_html = document_template.render(
+ title=printout_dict.get('Title'),
+ date=printout_dict.get('Date'),
+ content=all_document_parts) # render document template
+ htmlpage_fn = "{}.html".format(
+ remove_nonwords(printout_dict.get('Title')[0])
+ )
with open(os.path.join(static_html, htmlpage_fn), 'w') as htmlfile:
htmlfile.write(document_html)
all_document_parts = '' # Reset all_document_parts
diff --git a/templates/document.html b/templates/document.html
index 4748c77..72687fd 100644
--- a/templates/document.html
+++ b/templates/document.html
@@ -3,10 +3,10 @@
- {{title}}
+ {{ title[0] }}
- {{ title }}
+ {{ title[0] }}
{{ content }}
diff --git a/templates/document_part.html b/templates/document_part.html
index c435c5a..18bcebc 100644
--- a/templates/document_part.html
+++ b/templates/document_part.html
@@ -11,19 +11,21 @@
-
diff --git a/templates/index.html b/templates/index.html
index 55628bc..efe90e2 100644
--- a/templates/index.html
+++ b/templates/index.html
@@ -8,9 +8,9 @@
Results from query:
{{query}}
{% for doc in documentslist %}
- - {{ doc['title'] }}
+
- {{ doc['title'][0] }}
{{ doc['date'].year }}.{{ doc['date'].month }}.{{ doc['date'].day }}
- {{doc['creator']}}
+ {{doc['creator'] | join(", ")}}
{% endfor %}