From 75302e0457b6633ebb369cd7961ecc4288047298 Mon Sep 17 00:00:00 2001 From: jvdhorst Date: Fri, 23 Feb 2018 15:18:40 +0100 Subject: [PATCH] Forgot to add my actual script --- src/wordtagger.py | 71 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) create mode 100644 src/wordtagger.py diff --git a/src/wordtagger.py b/src/wordtagger.py new file mode 100644 index 0000000..e0ccec4 --- /dev/null +++ b/src/wordtagger.py @@ -0,0 +1,71 @@ +import nltk +from sys import stdin, stdout + +# Step 1: define input and set up a list +input = stdin.read() +taggedwordlist = [] + +string = input +words = nltk.word_tokenize(string) +taggedwordlist = nltk.pos_tag(words) + +for word, pos in nltk.pos_tag(words): + taggedwordlist = nltk.pos_tag(words) + # print('{0} is a {1}'.format(word,pos)) # Command out to print the analysis step + + +taglist = [ pos for word,pos in taggedwordlist ] + +#print(taglist) + +readabletaglist = [] + +for tag in taglist: + if tag in {"NNP","NNS","NN","NNPS"}: + readabletag = 'noun' + elif tag in {'VB','VBD','VBG','VBN','VBP','VBZ'}: + readabletag = 'verb' + elif tag in {'RB','RBR','RBS','WRB'}: + readabletag = 'adverb' + elif tag in {'PRP','PRP$'}: + readabletag = 'pronoun' + elif tag in {'JJ','JJR','JJS'}: + readabletag = 'adjective' + elif tag == 'IN': + readabletag = 'preposition' + elif tag == 'WDT': + readabletag = 'determiner' + elif tag in {'WP','WP$'}: + readabletag = 'pronoun' + elif tag == 'UH': + readabletag = 'interjection' + elif tag == 'POS': + readabletag = 'possesive ending' + elif tag == 'SYM': + readabletag = 'symbol' + elif tag == 'EX': + readabletag = 'existential there' + elif tag == 'DT': + readabletag = 'determiner' + elif tag == 'MD': + readabletag = 'modal' + elif tag == 'LS': + readabletag = 'list item marker' + elif tag == 'FW': + readabletag = 'foreign word' + elif tag == 'CC': + readabletag = 'coordinating conjunction ' + elif tag == 'CD': + readabletag = 'cardinal number' + elif tag == 'TO': + readabletag = 'to' + elif tag == '.': + readabletag = 'line ending' + elif tag == ',': + readabletag = 'comma' + else: + readabletag = tag + + readabletaglist.append(readabletag) + +stdout.write(' '.join(readabletaglist))