You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

2.8 KiB

NLTK - Similar Words

In [ ]:
import nltk
In [ ]:
txt = open('../txt/language.txt').read()

Tokens

In [ ]:
tokens = nltk.word_tokenize(txt)
print(tokens)

NLTK Text object

In [ ]:
text = nltk.Text(tokens)
print(text)

concordance

In [ ]:
# This is what you did with Michael before the break ...
concordance = text.concordance("language")

similarities

In [ ]:
# With a small next step ...
similar = text.similar("language")
In [ ]:
# And searching for contexts ...
contexts = text.common_contexts(["language"])

Read on

In [ ]: