added chatbook and resources
parent
bf80ff5c16
commit
08ff1142f9
File diff suppressed because one or more lines are too long
@ -0,0 +1,38 @@
|
||||
import json
|
||||
import argparse
|
||||
import sys
|
||||
from nltk.tokenize import sent_tokenize, word_tokenize
|
||||
from rake_nltk import Rake
|
||||
|
||||
r= Rake()
|
||||
|
||||
ap = argparse.ArgumentParser("JSON Dumper")
|
||||
ap.add_argument("text", nargs="+", help="text sources")
|
||||
args=ap.parse_args()
|
||||
|
||||
|
||||
with open('src/index.json') as f:
|
||||
try:
|
||||
index = json.load(f)
|
||||
except:
|
||||
index={}
|
||||
# build the index of sentences organized by keywords
|
||||
alltext = ""
|
||||
|
||||
for n in args.text:
|
||||
text = open(n).read()
|
||||
text = text.replace("\n", " ")
|
||||
sentences = sent_tokenize(text)
|
||||
for sentence in sentences:
|
||||
r.extract_keywords_from_text(sentence)
|
||||
keys = r.get_ranked_phrases()
|
||||
for key in keys:
|
||||
if key not in index:
|
||||
index[key] = []
|
||||
index[key].append({'filename': n, 'sentence': sentence, 'key': key})
|
||||
alltext += text
|
||||
|
||||
#print(index)
|
||||
|
||||
with open('index.json', 'w') as outfile:
|
||||
json.dump(index, outfile)
|
@ -0,0 +1,73 @@
|
||||
import irc.bot
|
||||
from rake_nltk import Rake
|
||||
import random
|
||||
from nltk.tokenize import sent_tokenize, word_tokenize
|
||||
import json
|
||||
#from thread import start_new_thread
|
||||
|
||||
r= Rake()
|
||||
|
||||
|
||||
def chunks(l, n):
|
||||
for i in range(0, len(l), n):
|
||||
yield l[i:i+n]
|
||||
|
||||
class HelloBot(irc.bot.SingleServerIRCBot):
|
||||
def __init__(self, channel, nickname, server, port=6667, index=None):
|
||||
irc.bot.SingleServerIRCBot.__init__(self, [(server, port)], nickname, nickname)
|
||||
self.channel = channel
|
||||
self.index = index
|
||||
|
||||
def on_welcome(self, c, e):
|
||||
c.join(self.channel)
|
||||
|
||||
def on_privmsg(self, c, e):
|
||||
pass
|
||||
|
||||
def on_pubmsg(self, c, e):
|
||||
print(e.arguments, e.source)
|
||||
msg = e.arguments[0]
|
||||
r.extract_keywords_from_text(msg)
|
||||
#r.get_ranked_phrases_with_scores()
|
||||
listOfKeys = r.get_ranked_phrases()
|
||||
|
||||
for keyWord in listOfKeys:
|
||||
if keyWord in self.index:
|
||||
msg = (index.get(keyWord)[0].get('sentence'))
|
||||
msg_where = "I found this in {}".format(index.get(keyWord)[0].get('filename'))
|
||||
else:
|
||||
msg = "I don't know anything about that"
|
||||
msg_where = ""
|
||||
|
||||
for chunk in chunks(msg, 400):
|
||||
c.privmsg(self.channel, chunk)
|
||||
|
||||
|
||||
c.privmsg(self.channel, msg_where)
|
||||
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import argparse
|
||||
import sys
|
||||
|
||||
ap = argparse.ArgumentParser("IRC Bot")
|
||||
ap.add_argument("--server", default="irc.freenode.net")
|
||||
ap.add_argument("--port", type=int, default=6667)
|
||||
ap.add_argument("--channel", default="#pzi")
|
||||
ap.add_argument("--nickname", default="scanbot")
|
||||
ap.add_argument("--text", nargs="+", help="database to use", default="index.json")
|
||||
args=ap.parse_args()
|
||||
|
||||
# build the index of sentences organized by keywords
|
||||
with open("index.json") as f:
|
||||
try:
|
||||
index = json.load(f)
|
||||
except:
|
||||
index={}
|
||||
|
||||
#print(index)
|
||||
|
||||
bot = HelloBot(args.channel, args.nickname, args.server, args.port, index)
|
||||
bot.start()
|
File diff suppressed because one or more lines are too long
Loading…
Reference in New Issue