You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

116 lines
3.1 KiB
Python

'''
Development Notes
JSON is a common format used to represent structured text.
The essence of the program is to introduce noise to disrupt the
2 years ago
mapping relation present in the dictionary.
2 years ago
Rules are also present in machine learning.
2 years ago
The mapping rule is disrupted, when I query the dictionary again,
the message is disrupted.
The rule is disrupted by linear arithemetic operation,
2 years ago
are there more non-linear and nuanced rules?
Prototype to translate a system to text into dictionary
2 years ago
as a type of structured text.
str int conversion is important to debug the program
2 years ago
I am still hostile to the concept and etymology of noise, rename
the concept into something else.
'''
2 years ago
import numpy as np
import json
# 1 <= key <= 3, silk; 4 <= key <= 6, earth; 7 <= key <= 9, water
# perform message decryption process via this mini corpus
# "no" field is similar to ascii/morse code/unicode coding protocols
# original message identified by "no" field, no 2 & 3
# first wrd
2 years ago
# disrupted message idenfified by "no" field
2 years ago
2 years ago
def parse_json(filename):
data = json.load(open(filename,'r'))
return data
2 years ago
def intify(data):
for i in data:
i["no"] = int(i["no"])
return data
2 years ago
def in_msg(data):
message_i = []
message_full_i = []
2 years ago
2 years ago
for i in data:
if i["no"] == 2:
2 years ago
message_i.append(i["glyph"])
message_full_i.append(i)
if i["no"] == 3:
2 years ago
message_i.append(i["glyph"])
message_full_i.append(i)
2 years ago
2 years ago
print("message prior to disruption contains: ")
for s in message_i:
print(s)
def disrupt(data):
noise = np.random.randint(1,3)
for i in data:
i["no"] += noise
return data
def save_json(data):
with open('corpus_cn.json','w', encoding='utf-8') as w_file:
2 years ago
json.dump(data,w_file, indent=4, ensure_ascii = False)
def out_msg(noise_data):
message_o = []
message_full_o = []
for i in noise_data:
#comparing integers, the noise_data no fields are previously
#dumped as integers
if i["no"] == 2:
message_o.append(i["glyph"])
message_full_o.append(i)
if i["no"] == 3:
message_o.append(i["glyph"])
message_full_o.append(i)
print("message after disruption contains: ")
2 years ago
2 years ago
for s in message_o:
print(s)
2 years ago
2 years ago
# at this point the interferences are somewhat apparent
# how can i present the interference to be more apparent?
# do i need to work with a really large corpus to make it apparent?
2 years ago
# todo
# input chinese blocks in here
2 years ago
2 years ago
if __name__ == "__main__":
parsed_json = parse_json(filename = "seed.json")
intified_data = intify(data = parsed_json)
in_msg(data = intified_data)
disrupted_data = disrupt(data = intified_data)
2 years ago
save_json(data = disrupted_data)
parsed_noise_json = parse_json(filename = "noised.json")
out_msg(noise_data = parsed_noise_json)
2 years ago
# test with a large corpus in separate program
# try with corpuses of different language
# try a chinese dictionary and a latin dictionary
2 years ago
2 years ago
# and any other types of dictionary structures, remix!
# main section looks really ugly