You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
|
|
|
import nltk
|
|
|
|
|
|
|
|
file=open('faceapp.txt','r')
|
|
|
|
raw=file.read()
|
|
|
|
tokens = nltk.word_tokenize(raw)
|
|
|
|
faceapp = nltk.Text(tokens)
|
|
|
|
|
|
|
|
|
|
|
|
# my stopwords are common words I don't want to count, like "a", "an", "the".
|
|
|
|
stopwords = set(line.strip() for line in open('stopwords.txt'))
|
|
|
|
|
|
|
|
# dictionary
|
|
|
|
wordcount = {}
|
|
|
|
|
|
|
|
# spliting words from punctuation so "book" and "book!" counts as the same word
|
|
|
|
for word in raw.lower().split():
|
|
|
|
word = word.replace(".","")
|
|
|
|
word = word.replace(",","")
|
|
|
|
word = word.replace(":","")
|
|
|
|
word = word.replace("\"","")
|
|
|
|
word = word.replace("!","")
|
|
|
|
word = word.replace("“","")
|
|
|
|
word = word.replace("‘","")
|
|
|
|
word = word.replace("*","")
|
|
|
|
word = word.replace("(","")
|
|
|
|
word = word.replace(")","")
|
|
|
|
|
|
|
|
|
|
|
|
faceapp.concordance('a')
|
|
|
|
|