You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

3.3 KiB

NLTK - Frequency Distribution

In [ ]:
import nltk
import random
In [ ]:
lines = open('txt/language.txt').readlines()
sentence = random.choice(lines)
print(sentence)

Tokens

In [ ]:
tokens = nltk.word_tokenize(sentence)
print(tokens)
In [ ]:
 

Frequency Distribution

In [ ]:
# frequency of characters
fd = nltk.FreqDist(sentence)
print(fd)
In [ ]:
print(fd.most_common(50))
In [ ]:
 
In [ ]:
# frequency of words
fd = nltk.FreqDist(tokens)
print(fd)
In [ ]:
print(fd.most_common(50))
In [ ]:
 
In [ ]:
# frequency of a text
txt = open('txt/language.txt').read()
tokens = nltk.word_tokenize(txt)
fd = nltk.FreqDist(tokens)
print(fd)
In [ ]:
print(fd.most_common(50))
In [ ]:
 
In [ ]:
# Requesting the frequency of a specific word
print(fd['language'])
In [ ]: