|
|
@ -9,7 +9,6 @@ import codecs
|
|
|
|
from nltk import sent_tokenize, word_tokenize, pos_tag
|
|
|
|
from nltk import sent_tokenize, word_tokenize, pos_tag
|
|
|
|
from nltk.probability import FreqDist
|
|
|
|
from nltk.probability import FreqDist
|
|
|
|
from nltk.corpus import stopwords
|
|
|
|
from nltk.corpus import stopwords
|
|
|
|
from PIL import Image
|
|
|
|
|
|
|
|
import base64
|
|
|
|
import base64
|
|
|
|
nltk.download('stopwords')
|
|
|
|
nltk.download('stopwords')
|
|
|
|
|
|
|
|
|
|
|
@ -22,6 +21,8 @@ nltk.download('stopwords')
|
|
|
|
#open the txt file, read, and tokenize
|
|
|
|
#open the txt file, read, and tokenize
|
|
|
|
file = open('faceapp.txt','r')
|
|
|
|
file = open('faceapp.txt','r')
|
|
|
|
text = file.read()
|
|
|
|
text = file.read()
|
|
|
|
|
|
|
|
text_list = text.split("\n\n")
|
|
|
|
|
|
|
|
|
|
|
|
#not sure if this works..
|
|
|
|
#not sure if this works..
|
|
|
|
x = 1
|
|
|
|
x = 1
|
|
|
|
|
|
|
|
|
|
|
@ -225,11 +226,13 @@ print('</div>')
|
|
|
|
|
|
|
|
|
|
|
|
#ToS text
|
|
|
|
#ToS text
|
|
|
|
print('<div class ="paragraph">')
|
|
|
|
print('<div class ="paragraph">')
|
|
|
|
tokenized = word_tokenize(text)
|
|
|
|
for paragraph in text_list:
|
|
|
|
tagged = pos_tag(tokenized)
|
|
|
|
tokenized = word_tokenize(paragraph)
|
|
|
|
|
|
|
|
tagged = pos_tag(tokenized)
|
|
|
|
for word, pos in tagged:
|
|
|
|
print('<p>')
|
|
|
|
print('<span class="{}">{}</span>'.format(pos, word))
|
|
|
|
for word, pos in tagged:
|
|
|
|
|
|
|
|
print('<span class="{}">{}</span>'.format(pos, word))
|
|
|
|
|
|
|
|
print('</p>')
|
|
|
|
print('</div>')
|
|
|
|
print('</div>')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|