You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

21 lines
481 B
Python

from nltk import word_tokenize, pos_tag
# faceapp_file = open('faceapp.txt','r')
with open('tos_file/faceapp.txt', 'r') as faceapp_file:
faceapp_text = faceapp_file.read()
faceapp_text_list = faceapp_text.split("\n\n")
for paragraph in faceapp_text_list:
tokenized = word_tokenize(paragraph)
tagged = pos_tag(tokenized)
print('<p>')
for word, pos in tagged:
print('<span class="{}">{}</span>'.format(
pos.replace('.', 'DOT'), word))
print('</p>')
print('</div>')