Merge branch 'master' of git.xpub.nl:/var/www/git.xpub.nl/repos/OuNoPo-make

master
ange 7 years ago
commit 660f89544a

BIN
.DS_Store vendored

Binary file not shown.

@ -61,7 +61,7 @@ output/tagged-words.txt: ocr/output.txt ## Analyzes OCR'ed text using a Part of
# >>> import nltk # >>> import nltk
# >>> nltk.download('averaged_perceptron_tagger') # >>> nltk.download('averaged_perceptron_tagger')
output/chatbot.txt: ocr/output.txt ## DESCRIBE WHAT IT DOES. Dependencies: python3's chatterbot output/chatbot.txt: ocr/output.txt ## Comments a text with a simple chatbot. Dependencies: python3's chatterbot
cat $< | python3 src/textbotconversation.py $(@) cat $< | python3 src/textbotconversation.py $(@)

@ -1,71 +1,82 @@
import nltk import nltk
from sys import stdin, stdout from sys import stdin, stdout
# Step 1: define input and set up a list # Define input
input = stdin.read() input = stdin.read()
taggedwordlist = []
string = input # FILTER FUNCTIONS
words = nltk.word_tokenize(string) # This function cuts a string into words. Then runs a POS tagger for each word. Returns a list with tags
taggedwordlist = nltk.pos_tag(words) def postagger(string):
words = nltk.word_tokenize(string)
for word, pos in nltk.pos_tag(words):
taggedwordlist = nltk.pos_tag(words) taggedwordlist = nltk.pos_tag(words)
# print('{0} is a {1}'.format(word,pos)) # Command out to print the analysis step
for word, pos in nltk.pos_tag(words):
taggedwordlist = nltk.pos_tag(words)
#print('{0} is a {1}'.format(word,pos)) # Comment out to print the analysis step
taglist = [ pos for word,pos in taggedwordlist ]
#print(taglist)
return taglist;
taglist = [ pos for word,pos in taggedwordlist ] # This function changes the tags to readable equivalents (NNP to noun for example)
def postagger_readable(list):
readabletaglist = []
#print(taglist) for tag in list:
if tag in {"NNP","NNS","NN","NNPS"}:
readabletag = 'noun'
elif tag in {'VB','VBD','VBG','VBN','VBP','VBZ'}:
readabletag = 'verb'
elif tag in {'RB','RBR','RBS','WRB'}:
readabletag = 'adverb'
elif tag in {'PRP','PRP$'}:
readabletag = 'pronoun'
elif tag in {'JJ','JJR','JJS'}:
readabletag = 'adjective'
elif tag == 'IN':
readabletag = 'preposition'
elif tag == 'WDT':
readabletag = 'determiner'
elif tag in {'WP','WP$'}:
readabletag = 'pronoun'
elif tag == 'UH':
readabletag = 'interjection'
elif tag == 'POS':
readabletag = 'possesive ending'
elif tag == 'SYM':
readabletag = 'symbol'
elif tag == 'EX':
readabletag = 'existential there'
elif tag == 'DT':
readabletag = 'determiner'
elif tag == 'MD':
readabletag = 'modal'
elif tag == 'LS':
readabletag = 'list item marker'
elif tag == 'FW':
readabletag = 'foreign word'
elif tag == 'CC':
readabletag = 'coordinating conjunction '
elif tag == 'CD':
readabletag = 'cardinal number'
elif tag == 'TO':
readabletag = 'to'
elif tag == '.':
readabletag = 'line ending'
elif tag == ',':
readabletag = 'comma'
else:
readabletag = tag
readabletaglist = [] readabletaglist.append(readabletag)
return readabletaglist;
for tag in taglist:
if tag in {"NNP","NNS","NN","NNPS"}:
readabletag = 'noun'
elif tag in {'VB','VBD','VBG','VBN','VBP','VBZ'}:
readabletag = 'verb'
elif tag in {'RB','RBR','RBS','WRB'}:
readabletag = 'adverb'
elif tag in {'PRP','PRP$'}:
readabletag = 'pronoun'
elif tag in {'JJ','JJR','JJS'}:
readabletag = 'adjective'
elif tag == 'IN':
readabletag = 'preposition'
elif tag == 'WDT':
readabletag = 'determiner'
elif tag in {'WP','WP$'}:
readabletag = 'pronoun'
elif tag == 'UH':
readabletag = 'interjection'
elif tag == 'POS':
readabletag = 'possesive ending'
elif tag == 'SYM':
readabletag = 'symbol'
elif tag == 'EX':
readabletag = 'existential there'
elif tag == 'DT':
readabletag = 'determiner'
elif tag == 'MD':
readabletag = 'modal'
elif tag == 'LS':
readabletag = 'list item marker'
elif tag == 'FW':
readabletag = 'foreign word'
elif tag == 'CC':
readabletag = 'coordinating conjunction '
elif tag == 'CD':
readabletag = 'cardinal number'
elif tag == 'TO':
readabletag = 'to'
elif tag == '.':
readabletag = 'line ending'
elif tag == ',':
readabletag = 'comma'
else:
readabletag = tag
readabletaglist.append(readabletag) # This function creates the output
def main():
taglist = postagger(input)
readabletaglist = postagger_readable(taglist)
stdout.write(' '.join(readabletaglist))
stdout.write('\n')
stdout.write(' '.join(readabletaglist)) main()

Loading…
Cancel
Save