import os, json, re, shlex, sys import subprocess from datetime import datetime def pandoc(pwd, content, format_in, format_out): # print('HTML content file:', wiki_content_f.name) # tmp files mw_tmp_fn = os.path.join(pwd, '.mediawiki_content') html_tmp_fn = os.path.join(pwd, '.html_content') # TODO: join with pw for fn in [mw_tmp_fn, html_tmp_fn ]: if os.path.isfile(fn) is False: os.mknod(fn) # create them if not in dir with open(mw_tmp_fn, 'w') as mw_tmp_fn_: mw_tmp_fn_.write(content) pandoc_cmd = f"pandoc {mw_tmp_fn} -f {format_in} -t {format_out} -o {html_tmp_fn}" subprocess.call(shlex.split(pandoc_cmd)) with open(html_tmp_fn, 'r') as html_tmp_fn_: output = html_tmp_fn_.read() return output def page_props(wikicontent): exp = re.compile(r'\|(\w*?)\=(.*)', flags=re.M) found = re.findall(exp, wikicontent) found_dict ={item[0]: item[1] for item in found} return found_dict def unpack_response(response): # printout is ordered dict d = {} printouts = response['printouts'] page = response['fulltext'] fullurl = response['fullurl'] d['page'] = page for prop in printouts: p_item = response['printouts'][prop] for prop_val in p_item: if isinstance(prop_val, dict) is False: d[prop] = prop_val else: # if len(prop_val) > 0: props = list(prop_val.keys()) if 'fulltext' in props: val = prop_val.get('fulltext') elif 'timestamp' in props: val = datetime.fromtimestamp(int(prop_val.get('timestamp'))) else: val = list(prop_val.values())[0] d[prop] = val return page, d, fullurl def update_json(imgsjson_fn, img_dict, img_fn): # write img_dict to json file if os.path.isfile(imgsjson_fn) is True: # if json exists with open(imgsjson_fn, 'r') as imgsjson_file: # read its content imgsjson_dict = json.load(imgsjson_file) # print(imgsjson_dict) else: # other wise imgsjson_dict = {} # imgsjson_dict will be an empty dictionary # is file already in dict if img_dict['name'] in imgsjson_dict.keys(): # check if # file is locally stored img_issaved = os.path.isfile(img_fn) # timestamp in json is same as in img object img_samets = imgsjson_dict[img_dict['name']]['timestamp'] == img_dict['timestamp'] if all([img_issaved, img_samets]) is False: # if one or more is False # ask it to download again download = True imgsjson_dict[img_dict['name']] = img_dict # add img_dict to imgsjson_dict under the key of the img.name else: download = False else: download = True imgsjson_dict[img_dict['name']] = img_dict # add img_dict to imgsjson_dict under the key of the img.name with open(imgsjson_fn, 'w') as imgsjson_file: json.dump(imgsjson_dict, imgsjson_file, indent=4) return download def clean_dir(dirfullpath): for f in os.listdir(dirfullpath): f = os.path.join(dirfullpath, f) if os.path.isfile(f): os.remove(f) def print_colormsg(msg, level): if level == 'fail': print(Colors.FAIL) elif level == 'warning': print(Colors.WARNING) elif level == 'ok': print(Colors.BLUE) print(msg) print(Colors.ENDC) class Colors: HEADER = '\033[95m' BLUE = '\033[94m' GREEN = '\033[92m' WARNING = '\033[93m' FAIL = '\033[91m' ENDC = '\033[0m' BOLD = '\033[1m' UNDERLINE = '\033[4m' # image upload function def listimgs(dir): lsimgs = [_file for _file in os.listdir(dir) if (os.path.splitext(_file)[-1]).lower() in ['.jpg', '.jpeg', '.png']] lsimgs.sort() return lsimgs def reorder_imgs(dir, dry): # does zero pad file numbers # and returns correct order of files lsimgs = listimgs(dir) for img in lsimgs: img_name, img_ext = os.path.splitext(img) # does file follow \d{1,}\.img_ext numb_exp = re.compile( r'(?P.*?)(?P\d+)(?P%s)'% re.escape(img_ext)) match = re.search(numb_exp, img) if not match: print(f'Image {img} Filename is not suitable for bulk upload.' f'Filename pattern dn\'t match 1.jpg 01.jpg something01.jpg' f'You have to DO IT MANUALLY') sys.exit() else: # only change name of single digit numbers if len(match.groupdict()['num']) == 1: name = match.groupdict()['name'] num = match.groupdict()['num'].zfill(3) # pad with 0s ext = match.groupdict()['ext'] new_img = name + num + ext src_img = os.path.join(dir, img) dst_img = os.path.join(dir, new_img) print(f'Renaming: {img} >>>>> {new_img}') if dry == False: os.replace(src_img, dst_img) return listimgs(dir) # update list w/ renamed imgs smw_propval_template = ''' \{\{ImageMetadata |Title={title} |Date={date} |Part={part} |Partof={partof} |Creator={creator} |Organization={organization} |Format={format} |Event={event} |Topic={topic} \}\} [[Template:ImageMetadata]] '''