|
|
|
@ -1,26 +1,28 @@
|
|
|
|
|
import nltk
|
|
|
|
|
from sys import stdin, stdout
|
|
|
|
|
|
|
|
|
|
# Step 1: define input and set up a list
|
|
|
|
|
# Define input
|
|
|
|
|
input = stdin.read()
|
|
|
|
|
taggedwordlist = []
|
|
|
|
|
|
|
|
|
|
string = input
|
|
|
|
|
words = nltk.word_tokenize(string)
|
|
|
|
|
taggedwordlist = nltk.pos_tag(words)
|
|
|
|
|
|
|
|
|
|
for word, pos in nltk.pos_tag(words):
|
|
|
|
|
# FILTER FUNCTIONS
|
|
|
|
|
# This function cuts a string into words. Then runs a POS tagger for each word. Returns a list with tags
|
|
|
|
|
def postagger(string):
|
|
|
|
|
words = nltk.word_tokenize(string)
|
|
|
|
|
taggedwordlist = nltk.pos_tag(words)
|
|
|
|
|
# print('{0} is a {1}'.format(word,pos)) # Command out to print the analysis step
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
taglist = [ pos for word,pos in taggedwordlist ]
|
|
|
|
|
for word, pos in nltk.pos_tag(words):
|
|
|
|
|
taggedwordlist = nltk.pos_tag(words)
|
|
|
|
|
#print('{0} is a {1}'.format(word,pos)) # Comment out to print the analysis step
|
|
|
|
|
|
|
|
|
|
#print(taglist)
|
|
|
|
|
taglist = [ pos for word,pos in taggedwordlist ]
|
|
|
|
|
#print(taglist)
|
|
|
|
|
return taglist;
|
|
|
|
|
|
|
|
|
|
readabletaglist = []
|
|
|
|
|
# This function changes the tags to readable equivalents (NNP to noun for example)
|
|
|
|
|
def postagger_readable(list):
|
|
|
|
|
readabletaglist = []
|
|
|
|
|
|
|
|
|
|
for tag in taglist:
|
|
|
|
|
for tag in list:
|
|
|
|
|
if tag in {"NNP","NNS","NN","NNPS"}:
|
|
|
|
|
readabletag = 'noun'
|
|
|
|
|
elif tag in {'VB','VBD','VBG','VBN','VBP','VBZ'}:
|
|
|
|
@ -67,5 +69,14 @@ for tag in taglist:
|
|
|
|
|
readabletag = tag
|
|
|
|
|
|
|
|
|
|
readabletaglist.append(readabletag)
|
|
|
|
|
return readabletaglist;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# This function creates the output
|
|
|
|
|
def main():
|
|
|
|
|
taglist = postagger(input)
|
|
|
|
|
readabletaglist = postagger_readable(taglist)
|
|
|
|
|
stdout.write(' '.join(readabletaglist))
|
|
|
|
|
stdout.write('\n')
|
|
|
|
|
|
|
|
|
|
stdout.write(' '.join(readabletaglist))
|
|
|
|
|
main()
|
|
|
|
|