You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

45 lines
1.1 KiB
Python

'''
Development Notes
the program temporarily works with word.json to test the viability of
working with large corpus organized as json.
'''
import json
from pprint import pprint
def parse_json(filename):
data = json.load(open(filename,'r'))
return data
def save_json(data):
with open('corpus/corpus_cn.json','w', encoding='utf-8') as w_file:
json.dump(data,w_file, indent=4, ensure_ascii = False)
def intify(data):
for i in data:
print("converting to int")
i["strokes"] = int(i["strokes"])
print(type(i["strokes"]))
return data
# data sciency methods are not all that interesting, cautiously stay away.
# think of what do people do with a dictionary
# look for a radical; return results with that radical; like Bok.
# some general usages of using dictionary
# def remix_lookup():
# def radical_lookup():
# def stroke_lookup():
# save corpus as separate file to work with
# browse methods to parse large quantity of corpus
if __name__== "__main__":
parsed_json = parse_json(filename="corpus/corpus_cn.json")
intified_data = intify(parsed_json)
save_json(data = intified_data)