added chatbook and resources

master
Your Name 7 years ago
parent bf80ff5c16
commit 08ff1142f9

@ -49,6 +49,7 @@ ocr/output.txt: ## ocr with tesseract
echo $(listimgs) > $(@D)/list.txt echo $(listimgs) > $(@D)/list.txt
@echo $(basename $@ .txt) @echo $(basename $@ .txt)
tesseract $(@D)/list.txt $(basename $@ .txt) tesseract $(@D)/list.txt $(basename $@ .txt)
python3 src/build_database.py $(@)
@ -92,3 +93,6 @@ tts: output/chatbot.txt ocr/output.txt ## text to speech. Dependencies: espea
ttssr-human-only: ocr/output.txt ## Loop: text to speech-speech recognition. Dependencies: espeak, pocketsphinx ttssr-human-only: ocr/output.txt ## Loop: text to speech-speech recognition. Dependencies: espeak, pocketsphinx
bash src/ttssr-loop-human-only.sh ocr/output.txt bash src/ttssr-loop-human-only.sh ocr/output.txt
chatbook:
python3 src/chatbook.py

File diff suppressed because one or more lines are too long

@ -0,0 +1,38 @@
import json
import argparse
import sys
from nltk.tokenize import sent_tokenize, word_tokenize
from rake_nltk import Rake
r= Rake()
ap = argparse.ArgumentParser("JSON Dumper")
ap.add_argument("text", nargs="+", help="text sources")
args=ap.parse_args()
with open('src/index.json') as f:
try:
index = json.load(f)
except:
index={}
# build the index of sentences organized by keywords
alltext = ""
for n in args.text:
text = open(n).read()
text = text.replace("\n", " ")
sentences = sent_tokenize(text)
for sentence in sentences:
r.extract_keywords_from_text(sentence)
keys = r.get_ranked_phrases()
for key in keys:
if key not in index:
index[key] = []
index[key].append({'filename': n, 'sentence': sentence, 'key': key})
alltext += text
#print(index)
with open('index.json', 'w') as outfile:
json.dump(index, outfile)

@ -0,0 +1,73 @@
import irc.bot
from rake_nltk import Rake
import random
from nltk.tokenize import sent_tokenize, word_tokenize
import json
#from thread import start_new_thread
r= Rake()
def chunks(l, n):
for i in range(0, len(l), n):
yield l[i:i+n]
class HelloBot(irc.bot.SingleServerIRCBot):
def __init__(self, channel, nickname, server, port=6667, index=None):
irc.bot.SingleServerIRCBot.__init__(self, [(server, port)], nickname, nickname)
self.channel = channel
self.index = index
def on_welcome(self, c, e):
c.join(self.channel)
def on_privmsg(self, c, e):
pass
def on_pubmsg(self, c, e):
print(e.arguments, e.source)
msg = e.arguments[0]
r.extract_keywords_from_text(msg)
#r.get_ranked_phrases_with_scores()
listOfKeys = r.get_ranked_phrases()
for keyWord in listOfKeys:
if keyWord in self.index:
msg = (index.get(keyWord)[0].get('sentence'))
msg_where = "I found this in {}".format(index.get(keyWord)[0].get('filename'))
else:
msg = "I don't know anything about that"
msg_where = ""
for chunk in chunks(msg, 400):
c.privmsg(self.channel, chunk)
c.privmsg(self.channel, msg_where)
if __name__ == "__main__":
import argparse
import sys
ap = argparse.ArgumentParser("IRC Bot")
ap.add_argument("--server", default="irc.freenode.net")
ap.add_argument("--port", type=int, default=6667)
ap.add_argument("--channel", default="#pzi")
ap.add_argument("--nickname", default="scanbot")
ap.add_argument("--text", nargs="+", help="database to use", default="index.json")
args=ap.parse_args()
# build the index of sentences organized by keywords
with open("index.json") as f:
try:
index = json.load(f)
except:
index={}
#print(index)
bot = HelloBot(args.channel, args.nickname, args.server, args.port, index)
bot.start()

File diff suppressed because one or more lines are too long
Loading…
Cancel
Save