diff --git a/Makefile b/Makefile index 76d79d2..6bb516c 100644 --- a/Makefile +++ b/Makefile @@ -35,6 +35,9 @@ myscript: tesseract wordtagger: tesseract cat output/plain.txt | python3 src/wordtagger.py > output/tagged-words.txt + +talktochatbot: tesseract + cat output/plain.txt | python3 src/textbotconversation.py visualization: $(images) $(tmpfile) #requires mplayer @echo $(tmpfile) diff --git a/src/textbotconversation.py b/src/textbotconversation.py new file mode 100644 index 0000000..02d402c --- /dev/null +++ b/src/textbotconversation.py @@ -0,0 +1,34 @@ + +from chatterbot import ChatBot +from sys import stdin, stderr, stdout +import nltk.data + + +text = stdin.read() + +sent_detector = nltk.data.load('tokenizers/punkt/english.pickle') +sentences = sent_detector.tokenize(text.strip()) + +ns = [] + +chatbot = ChatBot( + 'Ron Obvious', + trainer='chatterbot.trainers.ChatterBotCorpusTrainer', + output_format="text" +) + +# Train based on the english corpus +chatbot.train("chatterbot.corpus.english") + +for sen in sentences: + # Get a response to an input statement + response=chatbot.get_response(sen) + ns.append(sen) + ns.append(response.text) + + +file = open('output/whatdoesthechatbotsay.txt','w') + +file.write("\n".join(ns)) + +file.close()