import os, json, re import subprocess from datetime import datetime def pandoc(content, format_in, format_out): pandoc_cmd = "echo '{}' | pandoc -f {} -t {}".format( content, format_in, format_out) output = subprocess.check_output(pandoc_cmd, shell=True) return output.decode('utf8') def page_props(wikicontent): exp = re.compile(r'\|(\w*?)\=(.*)', flags=re.M) found = re.findall(exp, wikicontent) found_dict ={item[0]: item[1] for item in found} return found_dict def unpack_response(response): # printout is ordered dict d = {} printouts = response['printouts'] page = response['fulltext'] d['page'] = page for prop in printouts: p_item = response['printouts'][prop] for prop_val in p_item: if isinstance(prop_val, dict) is False: d[prop] = prop_val else: # if len(prop_val) > 0: props = list(prop_val.keys()) if 'fulltext' in props: val = prop_val.get('fulltext') elif 'timestamp' in props: val = datetime.fromtimestamp(int(prop_val.get('timestamp'))) else: val = list(prop_val.values())[0] d[prop] = val return(d) def update_json(imgsjson_fn, img_dict, img_fn): # write img_dict to json file if os.path.isfile(imgsjson_fn) is True: # if json exists with open(imgsjson_fn, 'r') as imgsjson_file: # read its content imgsjson_dict = json.load(imgsjson_file) # print(imgsjson_dict) else: # other wise imgsjson_dict = {} # imgsjson_dict will be an empty dictionary # is file already in dict if img_dict['name'] in imgsjson_dict.keys(): # check if # file is locally stored img_issaved = os.path.isfile(img_fn) # timestamp in json is same as in img object img_samets = imgsjson_dict[img_dict['name']]['timestamp'] == img_dict['timestamp'] if all([img_issaved, img_samets]) is False: # if one or more is False # ask it to download again download = True imgsjson_dict[img_dict['name']] = img_dict # add img_dict to imgsjson_dict under the key of the img.name else: download = False else: download = True imgsjson_dict[img_dict['name']] = img_dict # add img_dict to imgsjson_dict under the key of the img.name with open(imgsjson_fn, 'w') as imgsjson_file: json.dump(imgsjson_dict, imgsjson_file, indent=4) return download