added main

2 years ago · 0bf552f31c
parent d83167c953
commit 0bf552f31c
2 changed files with 71 additions and 43 deletions
--- a/dict_test.py
+++ b/dict_test.py
@ -3,25 +3,25 @@ Development Notes
 JSON is a common format used to represent structured text.   

 The essence of the program is to introduce noise to disrupt the 
-mapping relation of the dictionary. 
+mapping relation present in the dictionary. 

-Learning rules is also essential to machine learning. 
+Rules are also present in machine learning. 

-When the rule is disrupted, when I query the dictionary again, 
+The mapping rule is disrupted, when I query the dictionary again, 
 the message is disrupted. 

 The rule is disrupted by linear arithemetic operation, 
-are there more disruptive and complex rules? 
+are there more non-linear and nuanced rules? 

 Prototype to translate a system to text into dictionary
-as a type of structured text
+as a type of structured text. 

 str int conversion is important to debug the program

-why is it that noise is perceived as adverse?  
+I am still hostile to the concept and etymology of noise, rename
+the concept into something else. 

 '''
-
 import numpy as np
 import json

@ -34,49 +34,77 @@ import json

 # disrupted message idenfified by "no" field

-data = json.load(open('seed.json', 'r'))
-
-print("before disrupting, the message is: ")
-
-for i in data:
-    print(i)
-    if i["no"] == "2":
-        print(i["glyph"] + " " + i["radical"] )
-    if i["no"] == "3":
-        print(i["glyph"] + " " + i["radical"] )
-    
-# write as a function, input are codes, output are a {} of glyphs
-
-noise = np.random.randint(1,3)
-
-for i in data:
-    i["no"] = int(i["no"]) 
-    i["no"] += noise

-with open('noised.json','w') as w_file:
-    json.dump(data,w_file, indent=4)
-
-print("after disrupting, the message is: ")
-
-# use noised json to decrypt 
-
-noise_data = json.load(open('noised.json','r')) 
-for i in noise_data:
-    print(i)
-    print(type(i["no"]))
-    # comparing integers, the noise_data no fields are previously
-    # dumped as integers
-    if i["no"] == 2:
-        print(i["glyph"] + " " + i["radical"] )
-    if i["no"] == 3:
-        print(i["glyph"] + " " + i["radical"] )
-
-# at this point the interferences are not so apparent 
-
-# test with a large corpus 
+def parse_json(filename):
+    data = json.load(open(filename,'r'))
+    return data
+
+def in_msg(data):
+    message_i = []
+    message_full_i = []
+
+    for i in data:
+        if i["no"] == "2":
+            message_i.append(i["glyph"])
+            message_full_i.append(i)
+        if i["no"] == "3":
+            message_i.append(i["glyph"])
+            message_full_i.append(i)
+
+    print("message prior to disruption contains: ")
+    for s in message_i:
+        print(s)
+    
+def disrupt(data):
+    noise = np.random.randint(1,3)
+    for i in data:
+        i["no"] = int(i["no"]) 
+        i["no"] += noise
+    return data
+
+def save_json(data):
+    with open('noised.json','w', encoding='utf-8') as w_file:
+        json.dump(data,w_file, indent=4, ensure_ascii = False)
+
+def out_msg(noise_data):
+    message_o = []
+    message_full_o = []
+
+    for i in noise_data:
+        #comparing integers, the noise_data no fields are previously
+        #dumped as integers
+        if i["no"] == 2:
+            message_o.append(i["glyph"])
+            message_full_o.append(i)
+        if i["no"] == 3:
+            message_o.append(i["glyph"])
+            message_full_o.append(i)
+    
+    print("message after disruption contains: ")
+
+    for s in message_o:
+        print(s)
+
+# at this point the interferences are somewhat apparent 
+# how can i present the interference to be more apparent? 
+    # todo 
+# input chinese blocks in here
+
+
+if __name__ == "__main__":
+    parsed_json = parse_json(filename = "seed.json")
+    in_msg(data = parsed_json)
+    disrupted_data = disrupt(data = parsed_json)
+    save_json(data = disrupted_data)
+    parsed_noise_json = parse_json(filename = "noised.json")
+    out_msg(noise_data = parsed_noise_json)
+
+# test with a large corpus in separate program

 # try with corpuses of different language

 # try a chinese dictionary and a latin dictionary

 # and any other types of dictionary structures, remix!
+
+# main section looks really ugly
--- a/noised.json
+++ b/noised.json
@ -3,24 +3,24 @@
        "glyph": "hong",
        "dept-no": "silk-1",
        "radical": "silk",
-        "no": 2
+        "no": 3
    },
    {
        "glyph": "jiao",
        "dept-no": "silk-2",
        "radical": "silk",
-        "no": 3
+        "no": 4
    },
    {
        "glyph": "zhu",
        "dept-no": "silk-3",
        "radical": "silk",
-        "no": 4
+        "no": 5
    },
    {
        "glyph": "du",
        "dept-no": "earth-1",
        "radical": "earth",
-        "no": 5
+        "no": 6
    }
 ]