You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
3.3 KiB
3.3 KiB
NLTK - Frequency Distribution¶
In [ ]:
import nltk import random
In [ ]:
lines = open('txt/language.txt').readlines() sentence = random.choice(lines) print(sentence)
Tokens¶
In [ ]:
tokens = nltk.word_tokenize(sentence) print(tokens)
In [ ]:
Frequency Distribution¶
In [ ]:
# frequency of characters fd = nltk.FreqDist(sentence) print(fd)
In [ ]:
print(fd.most_common(50))
In [ ]:
In [ ]:
# frequency of words fd = nltk.FreqDist(tokens) print(fd)
In [ ]:
print(fd.most_common(50))
In [ ]:
In [ ]:
# frequency of a text txt = open('txt/language.txt').read() tokens = nltk.word_tokenize(txt) fd = nltk.FreqDist(tokens) print(fd)
In [ ]:
print(fd.most_common(50))
In [ ]:
In [ ]:
# Requesting the frequency of a specific word print(fd['language'])
In [ ]: