Rewrote script to make use of functions.
parent
3cd30d75a5
commit
89e8436cd0
@ -1,71 +1,82 @@
|
||||
import nltk
|
||||
from sys import stdin, stdout
|
||||
|
||||
# Step 1: define input and set up a list
|
||||
# Define input
|
||||
input = stdin.read()
|
||||
taggedwordlist = []
|
||||
|
||||
string = input
|
||||
words = nltk.word_tokenize(string)
|
||||
taggedwordlist = nltk.pos_tag(words)
|
||||
|
||||
for word, pos in nltk.pos_tag(words):
|
||||
# FILTER FUNCTIONS
|
||||
# This function cuts a string into words. Then runs a POS tagger for each word. Returns a list with tags
|
||||
def postagger(string):
|
||||
words = nltk.word_tokenize(string)
|
||||
taggedwordlist = nltk.pos_tag(words)
|
||||
# print('{0} is a {1}'.format(word,pos)) # Command out to print the analysis step
|
||||
|
||||
for word, pos in nltk.pos_tag(words):
|
||||
taggedwordlist = nltk.pos_tag(words)
|
||||
#print('{0} is a {1}'.format(word,pos)) # Comment out to print the analysis step
|
||||
|
||||
taglist = [ pos for word,pos in taggedwordlist ]
|
||||
#print(taglist)
|
||||
return taglist;
|
||||
|
||||
taglist = [ pos for word,pos in taggedwordlist ]
|
||||
# This function changes the tags to readable equivalents (NNP to noun for example)
|
||||
def postagger_readable(list):
|
||||
readabletaglist = []
|
||||
|
||||
#print(taglist)
|
||||
for tag in list:
|
||||
if tag in {"NNP","NNS","NN","NNPS"}:
|
||||
readabletag = 'noun'
|
||||
elif tag in {'VB','VBD','VBG','VBN','VBP','VBZ'}:
|
||||
readabletag = 'verb'
|
||||
elif tag in {'RB','RBR','RBS','WRB'}:
|
||||
readabletag = 'adverb'
|
||||
elif tag in {'PRP','PRP$'}:
|
||||
readabletag = 'pronoun'
|
||||
elif tag in {'JJ','JJR','JJS'}:
|
||||
readabletag = 'adjective'
|
||||
elif tag == 'IN':
|
||||
readabletag = 'preposition'
|
||||
elif tag == 'WDT':
|
||||
readabletag = 'determiner'
|
||||
elif tag in {'WP','WP$'}:
|
||||
readabletag = 'pronoun'
|
||||
elif tag == 'UH':
|
||||
readabletag = 'interjection'
|
||||
elif tag == 'POS':
|
||||
readabletag = 'possesive ending'
|
||||
elif tag == 'SYM':
|
||||
readabletag = 'symbol'
|
||||
elif tag == 'EX':
|
||||
readabletag = 'existential there'
|
||||
elif tag == 'DT':
|
||||
readabletag = 'determiner'
|
||||
elif tag == 'MD':
|
||||
readabletag = 'modal'
|
||||
elif tag == 'LS':
|
||||
readabletag = 'list item marker'
|
||||
elif tag == 'FW':
|
||||
readabletag = 'foreign word'
|
||||
elif tag == 'CC':
|
||||
readabletag = 'coordinating conjunction '
|
||||
elif tag == 'CD':
|
||||
readabletag = 'cardinal number'
|
||||
elif tag == 'TO':
|
||||
readabletag = 'to'
|
||||
elif tag == '.':
|
||||
readabletag = 'line ending'
|
||||
elif tag == ',':
|
||||
readabletag = 'comma'
|
||||
else:
|
||||
readabletag = tag
|
||||
|
||||
readabletaglist = []
|
||||
readabletaglist.append(readabletag)
|
||||
return readabletaglist;
|
||||
|
||||
for tag in taglist:
|
||||
if tag in {"NNP","NNS","NN","NNPS"}:
|
||||
readabletag = 'noun'
|
||||
elif tag in {'VB','VBD','VBG','VBN','VBP','VBZ'}:
|
||||
readabletag = 'verb'
|
||||
elif tag in {'RB','RBR','RBS','WRB'}:
|
||||
readabletag = 'adverb'
|
||||
elif tag in {'PRP','PRP$'}:
|
||||
readabletag = 'pronoun'
|
||||
elif tag in {'JJ','JJR','JJS'}:
|
||||
readabletag = 'adjective'
|
||||
elif tag == 'IN':
|
||||
readabletag = 'preposition'
|
||||
elif tag == 'WDT':
|
||||
readabletag = 'determiner'
|
||||
elif tag in {'WP','WP$'}:
|
||||
readabletag = 'pronoun'
|
||||
elif tag == 'UH':
|
||||
readabletag = 'interjection'
|
||||
elif tag == 'POS':
|
||||
readabletag = 'possesive ending'
|
||||
elif tag == 'SYM':
|
||||
readabletag = 'symbol'
|
||||
elif tag == 'EX':
|
||||
readabletag = 'existential there'
|
||||
elif tag == 'DT':
|
||||
readabletag = 'determiner'
|
||||
elif tag == 'MD':
|
||||
readabletag = 'modal'
|
||||
elif tag == 'LS':
|
||||
readabletag = 'list item marker'
|
||||
elif tag == 'FW':
|
||||
readabletag = 'foreign word'
|
||||
elif tag == 'CC':
|
||||
readabletag = 'coordinating conjunction '
|
||||
elif tag == 'CD':
|
||||
readabletag = 'cardinal number'
|
||||
elif tag == 'TO':
|
||||
readabletag = 'to'
|
||||
elif tag == '.':
|
||||
readabletag = 'line ending'
|
||||
elif tag == ',':
|
||||
readabletag = 'comma'
|
||||
else:
|
||||
readabletag = tag
|
||||
|
||||
readabletaglist.append(readabletag)
|
||||
# This function creates the output
|
||||
def main():
|
||||
taglist = postagger(input)
|
||||
readabletaglist = postagger_readable(taglist)
|
||||
stdout.write(' '.join(readabletaglist))
|
||||
stdout.write('\n')
|
||||
|
||||
stdout.write(' '.join(readabletaglist))
|
||||
main()
|
||||
|
Loading…
Reference in New Issue