From 4779ee09c0cbfe34af9b037f66e6da7567fb8f55 Mon Sep 17 00:00:00 2001 From: onebigear Date: Wed, 15 Jun 2022 09:21:55 -0600 Subject: [PATCH] renanmed files and move towards working with a large corpus --- {sample_dict_json => corpus}/dictionary.json | 0 {sample_dict_json => corpus}/word.json | 0 {sample_dict_json => corpus}/xinhua.csv | 0 large_corpus.py | 6 ++++++ dict_test.py => mini_corpus.py | 1 + run.sh | 2 +- 6 files changed, 8 insertions(+), 1 deletion(-) rename {sample_dict_json => corpus}/dictionary.json (100%) rename {sample_dict_json => corpus}/word.json (100%) rename {sample_dict_json => corpus}/xinhua.csv (100%) create mode 100644 large_corpus.py rename dict_test.py => mini_corpus.py (96%) diff --git a/sample_dict_json/dictionary.json b/corpus/dictionary.json similarity index 100% rename from sample_dict_json/dictionary.json rename to corpus/dictionary.json diff --git a/sample_dict_json/word.json b/corpus/word.json similarity index 100% rename from sample_dict_json/word.json rename to corpus/word.json diff --git a/sample_dict_json/xinhua.csv b/corpus/xinhua.csv similarity index 100% rename from sample_dict_json/xinhua.csv rename to corpus/xinhua.csv diff --git a/large_corpus.py b/large_corpus.py new file mode 100644 index 0000000..9ea2de4 --- /dev/null +++ b/large_corpus.py @@ -0,0 +1,6 @@ +''' + +''' +# choose a corpus to work with +# look at corpus structure +# browse methods to parse large quantity of corpus \ No newline at end of file diff --git a/dict_test.py b/mini_corpus.py similarity index 96% rename from dict_test.py rename to mini_corpus.py index 1d60774..5e4a1e3 100644 --- a/dict_test.py +++ b/mini_corpus.py @@ -87,6 +87,7 @@ def out_msg(noise_data): # at this point the interferences are somewhat apparent # how can i present the interference to be more apparent? +# do i need to work with a really large corpus to make it apparent? # todo # input chinese blocks in here diff --git a/run.sh b/run.sh index d53017c..d17f4a0 100755 --- a/run.sh +++ b/run.sh @@ -1,5 +1,5 @@ #!/bin/bash # run python script -python3 dict_test.py +python3 mini_corpus.py # cat result cat new.json \ No newline at end of file