Compare commits

...

2 Commits

Author SHA1 Message Date
Castro0o 52ca77ce4d fixed conflict 4 years ago
Castro0o 3fb9249ca6 all tokenized words 4 years ago

File diff suppressed because one or more lines are too long

@ -37,7 +37,10 @@ print('''<!DOCTYPE>
<head>
<script src="https://code.jquery.com/jquery-3.5.0.min.js"></script>
<link rel="stylesheet" href="faceapp.css">
<<<<<<< HEAD
=======
<link rel="stylesheet" href="legend.css">
>>>>>>> 6ee205254e8d1a95ada02cc7d5f0ac0fc0d3d046
<script src="highlight.js"></script>
<meta charset="utf-8">
@ -81,8 +84,10 @@ print('</div></div>')
#ToS text
print('<div class ="paragraph">')
tokenized_all = []
for paragraph in faceapp_text_list:
tokenized = word_tokenize(paragraph)
tokenized_all += tokenized # add to the tokenized_all
tagged = pos_tag(tokenized)
print('<p>')
for word, pos in tagged:
@ -95,7 +100,7 @@ print('</div>')
#tos top words list
print('<div class="top_words"><div class="top_words_title" ><b>Frequent words</b></div>')
tokens_without_stopwords = nltk.FreqDist(words.lower() for words in tokenized if words.lower() not in tos_all_stopwords)
tokens_without_stopwords = nltk.FreqDist(words.lower() for words in tokenized_all if words.lower() not in tos_all_stopwords)
frequency_word = FreqDist(tokens_without_stopwords)
top_words = tokens_without_stopwords.most_common(30)

Loading…
Cancel
Save