|
|
@ -27,6 +27,7 @@ from textblob import TextBlob
|
|
|
|
from textblob_aptagger import PerceptronTagger
|
|
|
|
from textblob_aptagger import PerceptronTagger
|
|
|
|
import string
|
|
|
|
import string
|
|
|
|
import re
|
|
|
|
import re
|
|
|
|
|
|
|
|
import json
|
|
|
|
import time
|
|
|
|
import time
|
|
|
|
import sys; from PIL import Image; import numpy as np
|
|
|
|
import sys; from PIL import Image; import numpy as np
|
|
|
|
chars = np.asarray(list(' .,:;irsXA253hMHGS#9B&@'))
|
|
|
|
chars = np.asarray(list(' .,:;irsXA253hMHGS#9B&@'))
|
|
|
@ -158,7 +159,7 @@ class HelloBot(irc.bot.SingleServerIRCBot):
|
|
|
|
self.poem = []
|
|
|
|
self.poem = []
|
|
|
|
self.keys = list(map(lambda x:re.compile(x[0], re.IGNORECASE),basicDialog))
|
|
|
|
self.keys = list(map(lambda x:re.compile(x[0], re.IGNORECASE),basicDialog))
|
|
|
|
self.values = list(map(lambda x:x[1],basicDialog))
|
|
|
|
self.values = list(map(lambda x:x[1],basicDialog))
|
|
|
|
self.POSSIBLE_STRUCTURES=[["adjective", "noun", "adverb", "verb", "adjective", "noun"], ["pronoun", "verb", "noun"]]
|
|
|
|
self.POSSIBLE_STRUCTURES=[["adjective", "noun", "adverb", "verb", "adjective", "noun"], ["pronoun", "verb", "adverb"]]
|
|
|
|
self.structure = random.choice(self.POSSIBLE_STRUCTURES)
|
|
|
|
self.structure = random.choice(self.POSSIBLE_STRUCTURES)
|
|
|
|
|
|
|
|
|
|
|
|
def translate(self,str,dict):
|
|
|
|
def translate(self,str,dict):
|
|
|
@ -455,10 +456,15 @@ class HelloBot(irc.bot.SingleServerIRCBot):
|
|
|
|
else:
|
|
|
|
else:
|
|
|
|
self.sentence.append(incoming_msg)
|
|
|
|
self.sentence.append(incoming_msg)
|
|
|
|
self.lastPOS +=1
|
|
|
|
self.lastPOS +=1
|
|
|
|
|
|
|
|
print(self.waitingfor + " " + incoming_msg)
|
|
|
|
|
|
|
|
self.index[self.waitingfor + "s"].append(incoming_msg)
|
|
|
|
|
|
|
|
with open('src/database.json', 'w') as outfile:
|
|
|
|
|
|
|
|
json.dump(self.index, outfile)
|
|
|
|
self.generateSentence(c,e)
|
|
|
|
self.generateSentence(c,e)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def cleanedText(text):
|
|
|
|
def cleanedText(text):
|
|
|
|
word_tokens = word_tokenize(text)
|
|
|
|
word_tokens = word_tokenize(text)
|
|
|
|
word_tokens = [word.lower() for word in word_tokens]
|
|
|
|
word_tokens = [word.lower() for word in word_tokens]
|
|
|
@ -482,77 +488,13 @@ if __name__ == "__main__":
|
|
|
|
ap.add_argument("--text", nargs="+", help="text sources")
|
|
|
|
ap.add_argument("--text", nargs="+", help="text sources")
|
|
|
|
args=ap.parse_args()
|
|
|
|
args=ap.parse_args()
|
|
|
|
|
|
|
|
|
|
|
|
# build the index
|
|
|
|
with open("src/database.json") as f:
|
|
|
|
index = {}
|
|
|
|
try:
|
|
|
|
|
|
|
|
index = json.load(f)
|
|
|
|
nouns = []
|
|
|
|
except:
|
|
|
|
verbs = []
|
|
|
|
print("I can't work with no knowledge")
|
|
|
|
adverbs = []
|
|
|
|
sys.exit()
|
|
|
|
pronouns = ["I", "you", "we", "they"]
|
|
|
|
|
|
|
|
adjectives = []
|
|
|
|
|
|
|
|
keywords = []
|
|
|
|
|
|
|
|
keywordspersentence = {}
|
|
|
|
|
|
|
|
alltext = ""
|
|
|
|
|
|
|
|
allcharacters = []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
n = "ocr/output.txt"
|
|
|
|
|
|
|
|
text = open(n).read()
|
|
|
|
|
|
|
|
rawtext = text.replace("\n", " ")
|
|
|
|
|
|
|
|
#get pos
|
|
|
|
|
|
|
|
for letter in list(rawtext):
|
|
|
|
|
|
|
|
if letter not in allcharacters and not letter.isalpha():
|
|
|
|
|
|
|
|
allcharacters.append(letter)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
alltext += rawtext
|
|
|
|
|
|
|
|
text = cleanedText(text)
|
|
|
|
|
|
|
|
thistext = TextBlob(text)
|
|
|
|
|
|
|
|
words = word_tokenize(text)
|
|
|
|
|
|
|
|
listWithPos = nltk.pos_tag(words)
|
|
|
|
|
|
|
|
print(thistext.tags)
|
|
|
|
|
|
|
|
for key, tag in thistext.tags:
|
|
|
|
|
|
|
|
if(tag == "VBP"):
|
|
|
|
|
|
|
|
verbs.append(key)
|
|
|
|
|
|
|
|
if(tag == "NN" or tag == "NNS"):
|
|
|
|
|
|
|
|
nouns.append(key)
|
|
|
|
|
|
|
|
if(tag == "RB"):
|
|
|
|
|
|
|
|
adverbs.append(key)
|
|
|
|
|
|
|
|
if(tag == "JJ"):
|
|
|
|
|
|
|
|
adjectives.append(key)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
print("Verbs: {}".format(verbs))
|
|
|
|
|
|
|
|
print("Nouns: {}".format(nouns))
|
|
|
|
|
|
|
|
print("Adverbs: {}".format(adverbs))
|
|
|
|
|
|
|
|
print("Adjectives: {}".format(adjectives))
|
|
|
|
|
|
|
|
#keywords
|
|
|
|
|
|
|
|
r.extract_keywords_from_text(rawtext)
|
|
|
|
|
|
|
|
phrases = r.get_ranked_phrases_with_scores()
|
|
|
|
|
|
|
|
for key, phrase in phrases:
|
|
|
|
|
|
|
|
if key > 2:
|
|
|
|
|
|
|
|
keywords.append(phrase)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
print("Keywords: {}".format(keywords))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# keywordsofsentences
|
|
|
|
|
|
|
|
sentences = sent_tokenize(text)
|
|
|
|
|
|
|
|
for sentence in sentences:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
r.extract_keywords_from_text(sentence)
|
|
|
|
|
|
|
|
keys = r.get_ranked_phrases()
|
|
|
|
|
|
|
|
for key in keys:
|
|
|
|
|
|
|
|
if key not in keywordspersentence:
|
|
|
|
|
|
|
|
keywordspersentence[key] = []
|
|
|
|
|
|
|
|
keywordspersentence[key].append({'filename': n, 'sentence': sentence, 'key': key})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
print(allcharacters)
|
|
|
|
|
|
|
|
index["nouns"]=nouns
|
|
|
|
|
|
|
|
index["verbs"]=verbs
|
|
|
|
|
|
|
|
index["pronouns"]=pronouns
|
|
|
|
|
|
|
|
index["adverbs"]=adverbs
|
|
|
|
|
|
|
|
index["adjectives"]=adjectives
|
|
|
|
|
|
|
|
index["keywords"]=keywords
|
|
|
|
|
|
|
|
index["keywordspersentence"]=keywordspersentence
|
|
|
|
|
|
|
|
index["rawtext"]=alltext
|
|
|
|
|
|
|
|
chars = np.asarray(allcharacters)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
chars = np.asarray(index["chars"])
|
|
|
|
bot = HelloBot(args.channel, args.nickname, args.server, args.port, index)
|
|
|
|
bot = HelloBot(args.channel, args.nickname, args.server, args.port, index)
|
|
|
|
bot.start()
|
|
|
|
bot.start()
|
|
|
|