|
|
|
import os, json, re, shlex, sys
|
|
|
|
import subprocess
|
|
|
|
from datetime import datetime
|
|
|
|
|
|
|
|
|
|
|
|
def pandoc(pwd, content, format_in, format_out):
|
|
|
|
# print('HTML content file:', wiki_content_f.name)
|
|
|
|
|
|
|
|
# tmp files
|
|
|
|
mw_tmp_fn = os.path.join(pwd, '.mediawiki_content')
|
|
|
|
html_tmp_fn = os.path.join(pwd, '.html_content') # TODO: join with pw
|
|
|
|
for fn in [mw_tmp_fn, html_tmp_fn ]:
|
|
|
|
if os.path.isfile(fn) is False:
|
|
|
|
os.mknod(fn) # create them if not in dir
|
|
|
|
with open(mw_tmp_fn, 'w') as mw_tmp_fn_:
|
|
|
|
mw_tmp_fn_.write(content)
|
|
|
|
|
|
|
|
pandoc_cmd = f"pandoc {mw_tmp_fn} -f {format_in} -t {format_out} -o {html_tmp_fn}"
|
|
|
|
subprocess.call(shlex.split(pandoc_cmd))
|
|
|
|
|
|
|
|
with open(html_tmp_fn, 'r') as html_tmp_fn_:
|
|
|
|
output = html_tmp_fn_.read()
|
|
|
|
|
|
|
|
return output
|
|
|
|
|
|
|
|
|
|
|
|
def page_props(wikicontent):
|
|
|
|
exp = re.compile(r'\|(\w*?)\=(.*)', flags=re.M)
|
|
|
|
found = re.findall(exp, wikicontent)
|
|
|
|
found_dict ={item[0]: item[1] for item in found}
|
|
|
|
return found_dict
|
|
|
|
|
|
|
|
|
|
|
|
def unpack_response(response):
|
|
|
|
# printout is ordered dict
|
|
|
|
d = {}
|
|
|
|
printouts = response['printouts']
|
|
|
|
page = response['fulltext']
|
|
|
|
fullurl = response['fullurl']
|
|
|
|
d['page'] = page
|
|
|
|
for prop in printouts:
|
|
|
|
p_item = response['printouts'][prop]
|
|
|
|
for prop_val in p_item:
|
|
|
|
if isinstance(prop_val, dict) is False:
|
|
|
|
d[prop] = prop_val
|
|
|
|
else:
|
|
|
|
# if len(prop_val) > 0:
|
|
|
|
props = list(prop_val.keys())
|
|
|
|
if 'fulltext' in props:
|
|
|
|
val = prop_val.get('fulltext')
|
|
|
|
elif 'timestamp' in props:
|
|
|
|
val = datetime.fromtimestamp(int(prop_val.get('timestamp')))
|
|
|
|
else:
|
|
|
|
val = list(prop_val.values())[0]
|
|
|
|
d[prop] = val
|
|
|
|
return page, d, fullurl
|
|
|
|
|
|
|
|
|
|
|
|
def update_json(imgsjson_fn, img_dict, img_fn):
|
|
|
|
# write img_dict to json file
|
|
|
|
if os.path.isfile(imgsjson_fn) is True: # if json exists
|
|
|
|
with open(imgsjson_fn, 'r') as imgsjson_file: # read its content
|
|
|
|
imgsjson_dict = json.load(imgsjson_file)
|
|
|
|
# print(imgsjson_dict)
|
|
|
|
else: # other wise
|
|
|
|
imgsjson_dict = {} # imgsjson_dict will be an empty dictionary
|
|
|
|
|
|
|
|
# is file already in dict
|
|
|
|
if img_dict['name'] in imgsjson_dict.keys():
|
|
|
|
# check if
|
|
|
|
# file is locally stored
|
|
|
|
img_issaved = os.path.isfile(img_fn)
|
|
|
|
# timestamp in json is same as in img object
|
|
|
|
img_samets = imgsjson_dict[img_dict['name']]['timestamp'] == img_dict['timestamp']
|
|
|
|
if all([img_issaved, img_samets]) is False: # if one or more is False
|
|
|
|
# ask it to download again
|
|
|
|
download = True
|
|
|
|
imgsjson_dict[img_dict['name']] = img_dict # add img_dict to imgsjson_dict under the key of the img.name
|
|
|
|
else:
|
|
|
|
download = False
|
|
|
|
else:
|
|
|
|
download = True
|
|
|
|
imgsjson_dict[img_dict['name']] = img_dict # add img_dict to imgsjson_dict under the key of the img.name
|
|
|
|
|
|
|
|
with open(imgsjson_fn, 'w') as imgsjson_file:
|
|
|
|
json.dump(imgsjson_dict, imgsjson_file, indent=4)
|
|
|
|
|
|
|
|
return download
|
|
|
|
|
|
|
|
|
|
|
|
def clean_dir(dirfullpath):
|
|
|
|
for f in os.listdir(dirfullpath):
|
|
|
|
f = os.path.join(dirfullpath, f)
|
|
|
|
if os.path.isfile(f):
|
|
|
|
os.remove(f)
|
|
|
|
|
|
|
|
def print_colormsg(msg, level):
|
|
|
|
if level == 'fail':
|
|
|
|
print(Colors.FAIL)
|
|
|
|
elif level == 'warning':
|
|
|
|
print(Colors.WARNING)
|
|
|
|
elif level == 'ok':
|
|
|
|
print(Colors.BLUE)
|
|
|
|
print(msg)
|
|
|
|
print(Colors.ENDC)
|
|
|
|
|
|
|
|
|
|
|
|
class Colors:
|
|
|
|
HEADER = '\033[95m'
|
|
|
|
BLUE = '\033[94m'
|
|
|
|
GREEN = '\033[92m'
|
|
|
|
WARNING = '\033[93m'
|
|
|
|
FAIL = '\033[91m'
|
|
|
|
ENDC = '\033[0m'
|
|
|
|
BOLD = '\033[1m'
|
|
|
|
UNDERLINE = '\033[4m'
|
|
|
|
|
|
|
|
# image upload function
|
|
|
|
|
|
|
|
def listimgs(dir):
|
|
|
|
lsimgs = [_file for _file in os.listdir(dir) if
|
|
|
|
(os.path.splitext(_file)[-1]).lower() in
|
|
|
|
['.jpg', '.jpeg', '.png']]
|
|
|
|
lsimgs.sort()
|
|
|
|
return lsimgs
|
|
|
|
|
|
|
|
|
|
|
|
def reorder_imgs(dir, dry):
|
|
|
|
# does zero pad file numbers
|
|
|
|
# and returns correct order of files
|
|
|
|
lsimgs = listimgs(dir)
|
|
|
|
for img in lsimgs:
|
|
|
|
img_name, img_ext = os.path.splitext(img)
|
|
|
|
# does file follow \d{1,}\.img_ext
|
|
|
|
numb_exp = re.compile(
|
|
|
|
r'(?P<name>.*?)(?P<num>\d+)(?P<ext>%s)'% re.escape(img_ext))
|
|
|
|
match = re.search(numb_exp, img)
|
|
|
|
if not match:
|
|
|
|
print(f'Image {img} Filename is not suitable for bulk upload.'
|
|
|
|
f'Filename pattern dn\'t match 1.jpg 01.jpg something01.jpg'
|
|
|
|
f'You have to DO IT MANUALLY')
|
|
|
|
sys.exit()
|
|
|
|
else:
|
|
|
|
# only change name of single digit numbers
|
|
|
|
if len(match.groupdict()['num']) == 1:
|
|
|
|
name = match.groupdict()['name']
|
|
|
|
num = match.groupdict()['num'].zfill(3) # pad with 0s
|
|
|
|
ext = match.groupdict()['ext']
|
|
|
|
new_img = name + num + ext
|
|
|
|
src_img = os.path.join(dir, img)
|
|
|
|
dst_img = os.path.join(dir, new_img)
|
|
|
|
print(f'Renaming: {img} >>>>> {new_img}')
|
|
|
|
if dry == False:
|
|
|
|
os.replace(src_img, dst_img)
|
|
|
|
return listimgs(dir) # update list w/ renamed imgs
|
|
|
|
|
|
|
|
smw_propval_template = '''
|
|
|
|
\{\{ImageMetadata
|
|
|
|
|Title={title}
|
|
|
|
|Date={date}
|
|
|
|
|Part={part}
|
|
|
|
|Partof={partof}
|
|
|
|
|Creator={creator}
|
|
|
|
|Organization={organization}
|
|
|
|
|Format={format}
|
|
|
|
|Event={event}
|
|
|
|
|Topic={topic}
|
|
|
|
\}\}
|
|
|
|
[[Template:ImageMetadata]]
|
|
|
|
'''
|