diff --git a/sample_dict_json/dictionary.json b/corpus/dictionary.json
similarity index 100%
rename from sample_dict_json/dictionary.json
rename to corpus/dictionary.json
diff --git a/sample_dict_json/word.json b/corpus/word.json
similarity index 100%
rename from sample_dict_json/word.json
rename to corpus/word.json
diff --git a/sample_dict_json/xinhua.csv b/corpus/xinhua.csv
similarity index 100%
rename from sample_dict_json/xinhua.csv
rename to corpus/xinhua.csv
diff --git a/large_corpus.py b/large_corpus.py
new file mode 100644
index 0000000..9ea2de4
--- /dev/null
+++ b/large_corpus.py
@@ -0,0 +1,6 @@
+'''
+
+'''
+# choose a corpus to work with
+# look at corpus structure 
+# browse methods to parse large quantity of corpus 
\ No newline at end of file
diff --git a/dict_test.py b/mini_corpus.py
similarity index 96%
rename from dict_test.py
rename to mini_corpus.py
index 1d60774..5e4a1e3 100644
--- a/dict_test.py
+++ b/mini_corpus.py
@@ -87,6 +87,7 @@ def out_msg(noise_data):
 
 # at this point the interferences are somewhat apparent 
 # how can i present the interference to be more apparent? 
+# do i need to work with a really large corpus to make it apparent?
     # todo 
 # input chinese blocks in here
 
diff --git a/run.sh b/run.sh
index d53017c..d17f4a0 100755
--- a/run.sh
+++ b/run.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
 # run python script
-python3 dict_test.py
+python3 mini_corpus.py
 # cat result
 cat new.json
\ No newline at end of file