You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
21 lines
481 B
Python
21 lines
481 B
Python
from nltk import word_tokenize, pos_tag
|
|
|
|
|
|
# faceapp_file = open('faceapp.txt','r')
|
|
with open('tos_file/faceapp.txt', 'r') as faceapp_file:
|
|
faceapp_text = faceapp_file.read()
|
|
faceapp_text_list = faceapp_text.split("\n\n")
|
|
|
|
|
|
|
|
for paragraph in faceapp_text_list:
|
|
tokenized = word_tokenize(paragraph)
|
|
tagged = pos_tag(tokenized)
|
|
print('<p>')
|
|
for word, pos in tagged:
|
|
print('<span class="{}">{}</span>'.format(
|
|
pos.replace('.', 'DOT'), word))
|
|
print('</p>')
|
|
print('</div>')
|
|
|