|
|
|
import os, json, re
|
|
|
|
import subprocess
|
|
|
|
from datetime import datetime
|
|
|
|
|
|
|
|
|
|
|
|
def pandoc(content, format_in, format_out):
|
|
|
|
pandoc_cmd = "echo '{}' | pandoc -f {} -t {}".format(
|
|
|
|
content, format_in, format_out)
|
|
|
|
output = subprocess.check_output(pandoc_cmd, shell=True)
|
|
|
|
return output.decode('utf8')
|
|
|
|
|
|
|
|
|
|
|
|
def page_props(wikicontent):
|
|
|
|
exp = re.compile(r'\|(\w*?)\=(.*)', flags=re.M)
|
|
|
|
found = re.findall(exp, wikicontent)
|
|
|
|
found_dict ={item[0]: item[1] for item in found}
|
|
|
|
return found_dict
|
|
|
|
|
|
|
|
|
|
|
|
def unpack_response(response):
|
|
|
|
# printout is ordered dict
|
|
|
|
d = {}
|
|
|
|
printouts = response['printouts']
|
|
|
|
page = response['fulltext']
|
|
|
|
d['page'] = page
|
|
|
|
for prop in printouts:
|
|
|
|
p_item = response['printouts'][prop]
|
|
|
|
for prop_val in p_item:
|
|
|
|
if isinstance(prop_val, dict) is False:
|
|
|
|
d[prop] = prop_val
|
|
|
|
else:
|
|
|
|
# if len(prop_val) > 0:
|
|
|
|
props = list(prop_val.keys())
|
|
|
|
if 'fulltext' in props:
|
|
|
|
val = prop_val.get('fulltext')
|
|
|
|
elif 'timestamp' in props:
|
|
|
|
val = datetime.fromtimestamp(int(prop_val.get('timestamp')))
|
|
|
|
else:
|
|
|
|
val = list(prop_val.values())[0]
|
|
|
|
d[prop] = val
|
|
|
|
return(d)
|
|
|
|
|
|
|
|
|
|
|
|
def update_json(imgsjson_fn, img_dict, img_fn):
|
|
|
|
# write img_dict to json file
|
|
|
|
if os.path.isfile(imgsjson_fn) is True: # if json exists
|
|
|
|
with open(imgsjson_fn, 'r') as imgsjson_file: # read its content
|
|
|
|
imgsjson_dict = json.load(imgsjson_file)
|
|
|
|
# print(imgsjson_dict)
|
|
|
|
else: # other wise
|
|
|
|
imgsjson_dict = {} # imgsjson_dict will be an empty dictionary
|
|
|
|
|
|
|
|
# is file already in dict
|
|
|
|
if img_dict['name'] in imgsjson_dict.keys():
|
|
|
|
# check if
|
|
|
|
# file is locally stored
|
|
|
|
img_issaved = os.path.isfile(img_fn)
|
|
|
|
# timestamp in json is same as in img object
|
|
|
|
img_samets = imgsjson_dict[img_dict['name']]['timestamp'] == img_dict['timestamp']
|
|
|
|
if all([img_issaved, img_samets]) is False: # if one or more is False
|
|
|
|
# ask it to download again
|
|
|
|
download = True
|
|
|
|
imgsjson_dict[img_dict['name']] = img_dict # add img_dict to imgsjson_dict under the key of the img.name
|
|
|
|
else:
|
|
|
|
download = False
|
|
|
|
else:
|
|
|
|
download = True
|
|
|
|
imgsjson_dict[img_dict['name']] = img_dict # add img_dict to imgsjson_dict under the key of the img.name
|
|
|
|
|
|
|
|
with open(imgsjson_fn, 'w') as imgsjson_file:
|
|
|
|
json.dump(imgsjson_dict, imgsjson_file, indent=4)
|
|
|
|
|
|
|
|
return download
|