|
|
@ -37,10 +37,7 @@ print('''<!DOCTYPE>
|
|
|
|
<head>
|
|
|
|
<head>
|
|
|
|
<script src="https://code.jquery.com/jquery-3.5.0.min.js"></script>
|
|
|
|
<script src="https://code.jquery.com/jquery-3.5.0.min.js"></script>
|
|
|
|
<link rel="stylesheet" href="faceapp.css">
|
|
|
|
<link rel="stylesheet" href="faceapp.css">
|
|
|
|
<<<<<<< HEAD
|
|
|
|
|
|
|
|
=======
|
|
|
|
|
|
|
|
<link rel="stylesheet" href="legend.css">
|
|
|
|
<link rel="stylesheet" href="legend.css">
|
|
|
|
>>>>>>> 6ee205254e8d1a95ada02cc7d5f0ac0fc0d3d046
|
|
|
|
|
|
|
|
<script src="highlight.js"></script>
|
|
|
|
<script src="highlight.js"></script>
|
|
|
|
<meta charset="utf-8">
|
|
|
|
<meta charset="utf-8">
|
|
|
|
|
|
|
|
|
|
|
@ -84,10 +81,8 @@ print('</div></div>')
|
|
|
|
|
|
|
|
|
|
|
|
#ToS text
|
|
|
|
#ToS text
|
|
|
|
print('<div class ="paragraph">')
|
|
|
|
print('<div class ="paragraph">')
|
|
|
|
tokenized_all = []
|
|
|
|
|
|
|
|
for paragraph in faceapp_text_list:
|
|
|
|
for paragraph in faceapp_text_list:
|
|
|
|
tokenized = word_tokenize(paragraph)
|
|
|
|
tokenized = word_tokenize(paragraph)
|
|
|
|
tokenized_all += tokenized # add to the tokenized_all
|
|
|
|
|
|
|
|
tagged = pos_tag(tokenized)
|
|
|
|
tagged = pos_tag(tokenized)
|
|
|
|
print('<p>')
|
|
|
|
print('<p>')
|
|
|
|
for word, pos in tagged:
|
|
|
|
for word, pos in tagged:
|
|
|
@ -100,7 +95,7 @@ print('</div>')
|
|
|
|
#tos top words list
|
|
|
|
#tos top words list
|
|
|
|
print('<div class="top_words"><div class="top_words_title" ><b>Frequent words</b></div>')
|
|
|
|
print('<div class="top_words"><div class="top_words_title" ><b>Frequent words</b></div>')
|
|
|
|
|
|
|
|
|
|
|
|
tokens_without_stopwords = nltk.FreqDist(words.lower() for words in tokenized_all if words.lower() not in tos_all_stopwords)
|
|
|
|
tokens_without_stopwords = nltk.FreqDist(words.lower() for words in tokenized if words.lower() not in tos_all_stopwords)
|
|
|
|
frequency_word = FreqDist(tokens_without_stopwords)
|
|
|
|
frequency_word = FreqDist(tokens_without_stopwords)
|
|
|
|
top_words = tokens_without_stopwords.most_common(30)
|
|
|
|
top_words = tokens_without_stopwords.most_common(30)
|
|
|
|
|
|
|
|
|
|
|
|