''' Development Notes the program temporarily works with word.json to test the viability of working with large corpus organized as json. ''' import json from pprint import pprint def parse_json(filename): data = json.load(open(filename,'r')) return data def save_json(data): with open('corpus/corpus_cn.json','w', encoding='utf-8') as w_file: json.dump(data,w_file, indent=4, ensure_ascii = False) def intify(data): for i in data: print("converting to int") i["strokes"] = int(i["strokes"]) print(type(i["strokes"])) return data # data sciency methods are not all that interesting, cautiously stay away. # think of what do people do with a dictionary # look for a radical; return results with that radical; like Bok. # some general usages of using dictionary # def remix_lookup(): # def radical_lookup(): # def stroke_lookup(): # save corpus as separate file to work with # browse methods to parse large quantity of corpus if __name__== "__main__": parsed_json = parse_json(filename="corpus/corpus_cn.json") intified_data = intify(parsed_json) save_json(data = intified_data)