You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

31 lines
767 B
Python

import nltk
file=open('faceapp.txt','r')
raw=file.read()
tokens = nltk.word_tokenize(raw)
faceapp = nltk.Text(tokens)
# my stopwords are common words I don't want to count, like "a", "an", "the".
stopwords = set(line.strip() for line in open('stopwords.txt'))
# dictionary
wordcount = {}
# spliting words from punctuation so "book" and "book!" counts as the same word
for word in raw.lower().split():
word = word.replace(".","")
word = word.replace(",","")
word = word.replace(":","")
word = word.replace("\"","")
word = word.replace("!","")
word = word.replace("“","")
word = word.replace("‘","")
word = word.replace("*","")
word = word.replace("(","")
word = word.replace(")","")
faceapp.concordance('a')