added chatbook and resources

7 years ago · 08ff1142f9
parent bf80ff5c16
commit 08ff1142f9
5 changed files with 119 additions and 2 deletions
--- a/4
+++ b/4
@ -49,6 +49,7 @@ ocr/output.txt:  ## ocr with tesseract
 	echo $(listimgs) > $(@D)/list.txt
 	@echo $(basename $@ .txt)
 	tesseract $(@D)/list.txt $(basename $@ .txt)
 	python3 src/build_database.py $(@)
@ -92,3 +93,6 @@ tts: output/chatbot.txt ocr/output.txt    ## text to speech. Dependencies: espea
 ttssr-human-only: ocr/output.txt ## Loop: text to speech-speech recognition. Dependencies: espeak, pocketsphinx
 	bash src/ttssr-loop-human-only.sh ocr/output.txt
 chatbook:
 	python3 src/chatbook.py
--- a/index.json
+++ b/index.json
--- a/src/build_database.py
+++ b/src/build_database.py
@ -0,0 +1,38 @@
 import json
 import argparse
 import sys
 from nltk.tokenize import sent_tokenize, word_tokenize
 from rake_nltk import Rake
 r= Rake()
 ap = argparse.ArgumentParser("JSON Dumper")
 ap.add_argument("text", nargs="+", help="text sources")
 args=ap.parse_args()
 with open('src/index.json') as f:
    try:
        index = json.load(f)
    except:
        index={}
 # build the index of sentences organized by keywords
 alltext = ""
 for n in args.text:
    text = open(n).read()
    text = text.replace("\n", " ")
    sentences = sent_tokenize(text)
    for sentence in sentences:
        r.extract_keywords_from_text(sentence)
        keys = r.get_ranked_phrases()
        for key in keys:
            if key not in index:
                index[key] = []
            index[key].append({'filename': n, 'sentence': sentence, 'key': key})
    alltext += text
 #print(index)
 with open('index.json', 'w') as outfile:
    json.dump(index, outfile)
--- a/src/chatbook.py
+++ b/src/chatbook.py
@ -0,0 +1,73 @@
 import irc.bot
 from rake_nltk import Rake
 import random
 from nltk.tokenize import sent_tokenize, word_tokenize
 import json
 #from thread import start_new_thread
 r= Rake()
 def chunks(l, n):
    for i in range(0, len(l), n):
        yield l[i:i+n]
 class HelloBot(irc.bot.SingleServerIRCBot):
    def __init__(self, channel, nickname, server, port=6667, index=None):
        irc.bot.SingleServerIRCBot.__init__(self, [(server, port)], nickname, nickname)
        self.channel = channel
        self.index = index
    def on_welcome(self, c, e):
        c.join(self.channel)
    def on_privmsg(self, c, e):
        pass
    def on_pubmsg(self, c, e):
        print(e.arguments, e.source)
        msg = e.arguments[0]
        r.extract_keywords_from_text(msg)
        #r.get_ranked_phrases_with_scores()
        listOfKeys = r.get_ranked_phrases()
        for keyWord in listOfKeys:
            if keyWord in self.index:
                msg = (index.get(keyWord)[0].get('sentence'))
                msg_where = "I found this in {}".format(index.get(keyWord)[0].get('filename'))
            else:
                msg = "I don't know anything about that"
                msg_where = ""
        for chunk in chunks(msg, 400):
            c.privmsg(self.channel, chunk)
        c.privmsg(self.channel, msg_where)
 if __name__ == "__main__":
    import argparse
    import sys
    ap = argparse.ArgumentParser("IRC Bot")
    ap.add_argument("--server", default="irc.freenode.net")
    ap.add_argument("--port", type=int, default=6667)
    ap.add_argument("--channel", default="#pzi")
    ap.add_argument("--nickname", default="scanbot")
    ap.add_argument("--text", nargs="+", help="database to use", default="index.json")
    args=ap.parse_args()
    # build the index of sentences organized by keywords
    with open("index.json") as f:
        try:
            index = json.load(f)
        except:
            index={}
    #print(index)
    bot = HelloBot(args.channel, args.nickname, args.server, args.port, index)
    bot.start()
--- a/src/index.json
+++ b/src/index.json