Merge branch 'master' of git.xpub.nl:/var/www/git.xpub.nl/repos/OuNoPo-make

7 years ago · 8dacfd1ccf
parent d86c273ddb 980830c613
commit 8dacfd1ccf
5 changed files with 91045 additions and 0 deletions
--- a/9
+++ b/9
@ -36,6 +36,15 @@ myscript: tesseract
 wordtagger: tesseract
 	cat output/plain.txt | python3 src/wordtagger.py > output/tagged-words.txt
 	
+talktochatbot: tesseract
+	cat output/plain.txt | python3 src/textbotconversation.py
+
+n+7: tesseract
+	cat output/plain.txt | python3 src/n_7.py > output/blah.txt
+
+n+7: tesseract
+	cat output/plain.txt | python3 src/n_7.py > output/blah.txt
+
 visualization: $(images) $(tmpfile) #requires mplayer
 	@echo $(tmpfile)
 	for i in $(images); do \
--- a/2
+++ b/2
@ -7,3 +7,5 @@ Makefile:
 * `make myscript`
 * `make visualization`: **dependency: mplayer**  creates visualization of images/ dir, by cating the images content into mplayer. See more option ins [shiftop](https://git.bleu255.com/shiftop/file/shiftop.html)
 * `make wordtagger`: **dependency: tesseract** Uses scanned pages as an input, tags each word for their wordtype (noun, verb etc) and saves it in a text file.
+* `make talktochatbot`: **dependency: ChatterBot** talkes with the ocred file
+
--- a/src/91K_nouns.txt
+++ b/src/91K_nouns.txt
--- a/src/n_7.py
+++ b/src/n_7.py
@ -0,0 +1,37 @@
+import sys
+from sys import stdin, stdout
+
+
+def seven(text):
+    fpath = open('src/91K_nouns.txt')
+    nouns = fpath.readlines()
+    separated = text.split()    #use nltk tokenize instead
+    #print(separated)
+    new_separated = []
+    for word in separated:
+        word = word.lower() + '\n'
+        if word in nouns:
+            position = nouns.index(word)
+            new_word = nouns[position + 7]
+            #print(" replacing", new_position)
+            new_separated.append(new_word.strip())
+        else:
+            #print("notinlist")
+            #print("adding to new_separated ", word)
+            new_separated.append(word.strip())
+    #print(new_separated)
+    return ' '.join(new_separated)
+
+text =  stdin.read()
+output = seven(text)
+print(output)
+
+#sentence = input('What is your sentence? ')
+#print(seven(sentence))
+
+
+# pytest requires that you name your tests with test_<your-name>
+# run with the 'pytest' command in your terminal
+#def test_seven():
+#    assert seven('Baboons') == 'babushkas'
+#    assert seven('Baboons,') == 'babushkas'
--- a/src/textbotconversation.py
+++ b/src/textbotconversation.py
@ -0,0 +1,34 @@
+
+from chatterbot import ChatBot
+from sys import stdin, stderr, stdout
+import nltk.data
+
+
+text = stdin.read()
+
+sent_detector = nltk.data.load('tokenizers/punkt/english.pickle')
+sentences = sent_detector.tokenize(text.strip())
+
+ns = []
+
+chatbot = ChatBot(
+    'Ron Obvious',
+    trainer='chatterbot.trainers.ChatterBotCorpusTrainer',
+    output_format="text"
+)
+
+# Train based on the english corpus
+chatbot.train("chatterbot.corpus.english")
+
+for sen in sentences:
+    # Get a response to an input statement
+    response=chatbot.get_response(sen)
+    ns.append(sen)
+    ns.append(response.text)
+
+
+file = open('output/whatdoesthechatbotsay.txt','w')
+
+file.write("\n".join(ns))
+
+file.close()