Rewrote script to make use of functions.

master
jvdhorst 7 years ago
parent 3cd30d75a5
commit 89e8436cd0

BIN
.DS_Store vendored

Binary file not shown.

@ -1,26 +1,28 @@
import nltk import nltk
from sys import stdin, stdout from sys import stdin, stdout
# Step 1: define input and set up a list # Define input
input = stdin.read() input = stdin.read()
taggedwordlist = []
string = input # FILTER FUNCTIONS
words = nltk.word_tokenize(string) # This function cuts a string into words. Then runs a POS tagger for each word. Returns a list with tags
taggedwordlist = nltk.pos_tag(words) def postagger(string):
words = nltk.word_tokenize(string)
for word, pos in nltk.pos_tag(words):
taggedwordlist = nltk.pos_tag(words) taggedwordlist = nltk.pos_tag(words)
# print('{0} is a {1}'.format(word,pos)) # Command out to print the analysis step
taglist = [ pos for word,pos in taggedwordlist ] for word, pos in nltk.pos_tag(words):
taggedwordlist = nltk.pos_tag(words)
#print('{0} is a {1}'.format(word,pos)) # Comment out to print the analysis step
#print(taglist) taglist = [ pos for word,pos in taggedwordlist ]
#print(taglist)
return taglist;
readabletaglist = [] # This function changes the tags to readable equivalents (NNP to noun for example)
def postagger_readable(list):
readabletaglist = []
for tag in taglist: for tag in list:
if tag in {"NNP","NNS","NN","NNPS"}: if tag in {"NNP","NNS","NN","NNPS"}:
readabletag = 'noun' readabletag = 'noun'
elif tag in {'VB','VBD','VBG','VBN','VBP','VBZ'}: elif tag in {'VB','VBD','VBG','VBN','VBP','VBZ'}:
@ -67,5 +69,14 @@ for tag in taglist:
readabletag = tag readabletag = tag
readabletaglist.append(readabletag) readabletaglist.append(readabletag)
return readabletaglist;
# This function creates the output
def main():
taglist = postagger(input)
readabletaglist = postagger_readable(taglist)
stdout.write(' '.join(readabletaglist))
stdout.write('\n')
stdout.write(' '.join(readabletaglist)) main()

Loading…
Cancel
Save