diff --git a/Makefile b/Makefile index 9dc3d55..6b1809f 100644 --- a/Makefile +++ b/Makefile @@ -37,6 +37,7 @@ myscript: tesseract cat $(outputocr) | python3 src/myscript.py > output/a-new-file.txt wordtagger: tesseract + cat $(outputocr) | python3 src/wordtagger.py > output/tagged-words.txt talktochatbot: tesseract diff --git a/README b/README index 44f7302..1a6bfd5 100644 --- a/README +++ b/README @@ -6,4 +6,6 @@ Makefile: * `make tessaract` **dependency: tesseract** * `make myscript` * `make visualization`: **dependency: mplayer** creates visualization of images/ dir, by cating the images content into mplayer. See more option ins [shiftop](https://git.bleu255.com/shiftop/file/shiftop.html) -* `make talktochatbot`: **dependency: ChatterBot** talkes with the ocred file \ No newline at end of file +* `make wordtagger`: **dependency: tesseract** Uses scanned pages as an input, tags each word for their wordtype (noun, verb etc) and saves it in a text file. +* `make talktochatbot`: **dependency: ChatterBot** talkes with the ocred file + diff --git a/src/91K nouns.txt b/src/91K_nouns.txt similarity index 100% rename from src/91K nouns.txt rename to src/91K_nouns.txt diff --git a/src/n_7.py b/src/n_7.py index 0f29e88..1ac7688 100644 --- a/src/n_7.py +++ b/src/n_7.py @@ -3,7 +3,7 @@ from sys import stdin, stdout def seven(text): - fpath = open('91K nouns.txt') + fpath = open('src/91K_nouns.txt') nouns = fpath.readlines() separated = text.split() #use nltk tokenize instead #print(separated)