made corpus stroke / no entries into integers, do interesting remix operation with integers.
parent
4779ee09c0
commit
0ef587164a
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,26 @@
|
||||
[
|
||||
{
|
||||
"glyph": "hong",
|
||||
"dept-no": "silk-1",
|
||||
"radical": "silk",
|
||||
"no": 3
|
||||
},
|
||||
{
|
||||
"glyph": "jiao",
|
||||
"dept-no": "silk-2",
|
||||
"radical": "silk",
|
||||
"no": 4
|
||||
},
|
||||
{
|
||||
"glyph": "zhu",
|
||||
"dept-no": "silk-3",
|
||||
"radical": "silk",
|
||||
"no": 5
|
||||
},
|
||||
{
|
||||
"glyph": "du",
|
||||
"dept-no": "earth-1",
|
||||
"radical": "earth",
|
||||
"no": 6
|
||||
}
|
||||
]
|
@ -1,6 +1,44 @@
|
||||
'''
|
||||
Development Notes
|
||||
the program temporarily works with word.json to test the viability of
|
||||
working with large corpus organized as json.
|
||||
|
||||
'''
|
||||
# choose a corpus to work with
|
||||
# look at corpus structure
|
||||
# browse methods to parse large quantity of corpus
|
||||
import json
|
||||
from pprint import pprint
|
||||
|
||||
def parse_json(filename):
|
||||
data = json.load(open(filename,'r'))
|
||||
return data
|
||||
|
||||
def save_json(data):
|
||||
with open('corpus/corpus_cn.json','w', encoding='utf-8') as w_file:
|
||||
json.dump(data,w_file, indent=4, ensure_ascii = False)
|
||||
|
||||
def intify(data):
|
||||
for i in data:
|
||||
print("converting to int")
|
||||
i["strokes"] = int(i["strokes"])
|
||||
print(type(i["strokes"]))
|
||||
return data
|
||||
|
||||
# data sciency methods are not all that interesting, cautiously stay away.
|
||||
# think of what do people do with a dictionary
|
||||
# look for a radical; return results with that radical; like Bok.
|
||||
# some general usages of using dictionary
|
||||
|
||||
# def remix_lookup():
|
||||
|
||||
# def radical_lookup():
|
||||
|
||||
# def stroke_lookup():
|
||||
|
||||
# save corpus as separate file to work with
|
||||
|
||||
|
||||
# browse methods to parse large quantity of corpus
|
||||
|
||||
if __name__== "__main__":
|
||||
parsed_json = parse_json(filename="corpus/corpus_cn.json")
|
||||
intified_data = intify(parsed_json)
|
||||
save_json(data = intified_data)
|
||||
|
Loading…
Reference in New Issue