# Importing the modules # import nltk # nltk.download('punkt') from nltk.tokenize import sent_tokenize, word_tokenize import warnings warnings.filterwarnings(action='ignore') import gensim from gensim.models import Word2Vec # read the 11-0.txt file with open('11-0.txt', 'r') as f: text = f.read() # replace escape char with space f = text.replace('\n', ' ') data = [] # iterate through each sentence in the file for i in sent_tokenize(f): temp = [] for j in word_tokenize(i): temp.append(j.lower()) data.append(temp) # create the CBOW model model_cbow = gensim.models.Word2Vec(data, min_count=1, vector_size=100, window=5) # print the results # Print results print(f"Cosine similarity between 'alice' and 'wonderland' - CBOW : {model_cbow.wv.similarity('alice', 'wonderland')}") print(f"Cosine similarity between 'alice' and 'machine' - CBOW : {model_cbow.wv.similarity('alice', 'machines')}") # create the Skip Gram model model_sg = gensim.models.Word2Vec(data, min_count=1, vector_size=100, window=5, sg=1) print(f"Cosine similarity between 'alice' and 'wonderland' - CBOW : {model_sg.wv.similarity('alice', 'wonderland')}") print(f"Cosine similarity between 'alice' and 'machine' - CBOW : {model_sg.wv.similarity('alice', 'machines')}")