Rewrote script to make use of functions.

7 years ago · 89e8436cd0
parent 3cd30d75a5
commit 89e8436cd0
2 changed files with 69 additions and 58 deletions
--- a/.DS_Store
+++ b/.DS_Store
--- a/src/wordtagger.py
+++ b/src/wordtagger.py
@ -1,71 +1,82 @@
 import nltk
 from sys import stdin, stdout
-# Step 1: define input and set up a list
+# Define input
 input = stdin.read()
 taggedwordlist = []
-string = input
+# FILTER FUNCTIONS
-words = nltk.word_tokenize(string)
+# This function cuts a string into words. Then runs a POS tagger for each word. Returns a list with tags
-taggedwordlist = nltk.pos_tag(words)
+def postagger(string):
-
+    words = nltk.word_tokenize(string)
 for word, pos in nltk.pos_tag(words):
    taggedwordlist = nltk.pos_tag(words)
    # print('{0} is a {1}'.format(word,pos)) # Command out to print the analysis step
    for word, pos in nltk.pos_tag(words):
        taggedwordlist = nltk.pos_tag(words)
        #print('{0} is a {1}'.format(word,pos)) # Comment out to print the analysis step
    taglist = [ pos for word,pos in taggedwordlist ]
    #print(taglist)
    return taglist;
-taglist = [ pos for word,pos in taggedwordlist ]
+# This function changes the tags to readable equivalents (NNP to noun for example)
 def postagger_readable(list):
    readabletaglist = []
-#print(taglist)
+    for tag in list:
        if tag in {"NNP","NNS","NN","NNPS"}:
            readabletag = 'noun'
        elif tag in {'VB','VBD','VBG','VBN','VBP','VBZ'}:
            readabletag = 'verb'
        elif tag in {'RB','RBR','RBS','WRB'}:
            readabletag = 'adverb'
        elif tag in {'PRP','PRP$'}:
            readabletag = 'pronoun'
        elif tag in {'JJ','JJR','JJS'}:
            readabletag = 'adjective'
        elif tag == 'IN':
            readabletag = 'preposition'
        elif tag == 'WDT':
            readabletag = 'determiner'
        elif tag in {'WP','WP$'}:
            readabletag = 'pronoun'
        elif tag == 'UH':
            readabletag = 'interjection'
        elif tag == 'POS':
            readabletag = 'possesive ending'
        elif tag == 'SYM':
            readabletag = 'symbol'
        elif tag == 'EX':
            readabletag = 'existential there'
        elif tag == 'DT':
            readabletag = 'determiner'
        elif tag == 'MD':
            readabletag = 'modal'
        elif tag == 'LS':
            readabletag = 'list item marker'
        elif tag == 'FW':
            readabletag = 'foreign word'
        elif tag == 'CC':
            readabletag = 'coordinating conjunction '
        elif tag == 'CD':
            readabletag = 'cardinal number'
        elif tag == 'TO':
            readabletag = 'to'
        elif tag == '.':
            readabletag = 'line ending'
        elif tag == ',':
            readabletag = 'comma'
        else:
            readabletag = tag
-readabletaglist = []
+        readabletaglist.append(readabletag)
    return readabletaglist;
 for tag in taglist:
    if tag in {"NNP","NNS","NN","NNPS"}:
        readabletag = 'noun'
    elif tag in {'VB','VBD','VBG','VBN','VBP','VBZ'}:
        readabletag = 'verb'
    elif tag in {'RB','RBR','RBS','WRB'}:
        readabletag = 'adverb'
    elif tag in {'PRP','PRP$'}:
        readabletag = 'pronoun'
    elif tag in {'JJ','JJR','JJS'}:
        readabletag = 'adjective'
    elif tag == 'IN':
        readabletag = 'preposition'
    elif tag == 'WDT':
        readabletag = 'determiner'
    elif tag in {'WP','WP$'}:
        readabletag = 'pronoun'
    elif tag == 'UH':
        readabletag = 'interjection'
    elif tag == 'POS':
        readabletag = 'possesive ending'
    elif tag == 'SYM':
        readabletag = 'symbol'
    elif tag == 'EX':
        readabletag = 'existential there'
    elif tag == 'DT':
        readabletag = 'determiner'
    elif tag == 'MD':
        readabletag = 'modal'
    elif tag == 'LS':
        readabletag = 'list item marker'
    elif tag == 'FW':
        readabletag = 'foreign word'
    elif tag == 'CC':
        readabletag = 'coordinating conjunction '
    elif tag == 'CD':
        readabletag = 'cardinal number'
    elif tag == 'TO':
        readabletag = 'to'
    elif tag == '.':
        readabletag = 'line ending'
    elif tag == ',':
        readabletag = 'comma'
    else:
        readabletag = tag
-    readabletaglist.append(readabletag)
+# This function creates the output
 def main():
    taglist = postagger(input)
    readabletaglist = postagger_readable(taglist)
    stdout.write(' '.join(readabletaglist))
    stdout.write('\n')
-stdout.write(' '.join(readabletaglist))
+main()