''' Development Notes JSON is a common format used to represent structured text. The essence of the program is to introduce noise to disrupt the mapping relation present in the dictionary. Rules are also present in machine learning. The mapping rule is disrupted, when I query the dictionary again, the message is disrupted. The rule is disrupted by linear arithemetic operation, are there more non-linear and nuanced rules? Prototype to translate a system to text into dictionary as a type of structured text. str int conversion is important to debug the program I am still hostile to the concept and etymology of noise, rename the concept into something else. ''' import numpy as np import json # 1 <= key <= 3, silk; 4 <= key <= 6, earth; 7 <= key <= 9, water # perform message decryption process via this mini corpus # "no" field is similar to ascii/morse code/unicode coding protocols # original message identified by "no" field, no 2 & 3 # first wrd # disrupted message idenfified by "no" field def parse_json(filename): data = json.load(open(filename,'r')) return data def intify(data): for i in data: i["no"] = int(i["no"]) return data def in_msg(data): message_i = [] message_full_i = [] for i in data: if i["no"] == 2: message_i.append(i["glyph"]) message_full_i.append(i) if i["no"] == 3: message_i.append(i["glyph"]) message_full_i.append(i) print("message prior to disruption contains: ") for s in message_i: print(s) def disrupt(data): noise = np.random.randint(1,3) for i in data: i["no"] += noise return data def save_json(data): with open('corpus_cn.json','w', encoding='utf-8') as w_file: json.dump(data,w_file, indent=4, ensure_ascii = False) def out_msg(noise_data): message_o = [] message_full_o = [] for i in noise_data: #comparing integers, the noise_data no fields are previously #dumped as integers if i["no"] == 2: message_o.append(i["glyph"]) message_full_o.append(i) if i["no"] == 3: message_o.append(i["glyph"]) message_full_o.append(i) print("message after disruption contains: ") for s in message_o: print(s) # at this point the interferences are somewhat apparent # how can i present the interference to be more apparent? # do i need to work with a really large corpus to make it apparent? # todo # input chinese blocks in here if __name__ == "__main__": parsed_json = parse_json(filename = "seed.json") intified_data = intify(data = parsed_json) in_msg(data = intified_data) disrupted_data = disrupt(data = intified_data) save_json(data = disrupted_data) parsed_noise_json = parse_json(filename = "noised.json") out_msg(noise_data = parsed_noise_json) # test with a large corpus in separate program # try with corpuses of different language # try a chinese dictionary and a latin dictionary # and any other types of dictionary structures, remix! # main section looks really ugly