import os, json, re import subprocess def pandoc(content, format_in, format_out): pandoc_cmd = "echo '{}' | pandoc -f {} -t {}".format( content, format_in, format_out) output = subprocess.check_output(pandoc_cmd, shell=True) return output.decode('utf8') def page_props(wikicontent): exp = re.compile(r'\|(\w*?)\=(.*)', flags=re.M) found = re.findall(exp, wikicontent) found_dict ={item[0]: item[1] for item in found} return found_dict def update_json(imgsjson_fn, img_dict, img_fn): # write img_dict to json file if os.path.isfile(imgsjson_fn) is True: # if json exists with open(imgsjson_fn, 'r') as imgsjson_file: # read its content imgsjson_dict = json.load(imgsjson_file) # print(imgsjson_dict) else: # other wise imgsjson_dict = {} # imgsjson_dict will be an empty dictionary # is file already in dict if img_dict['name'] in imgsjson_dict.keys(): # check if # file is locally stored img_issaved = os.path.isfile(img_fn) # timestamp in json is same as in img object img_samets = imgsjson_dict[img_dict['name']]['timestamp'] == img_dict['timestamp'] if all([img_issaved, img_samets]) is False: # if one or more is False # ask it to download again download = True imgsjson_dict[img_dict['name']] = img_dict # add img_dict to imgsjson_dict under the key of the img.name else: download = False else: download = True imgsjson_dict[img_dict['name']] = img_dict # add img_dict to imgsjson_dict under the key of the img.name with open(imgsjson_fn, 'w') as imgsjson_file: json.dump(imgsjson_dict, imgsjson_file, indent=4) return download