paragraphs
parent
a8ea89ace4
commit
f9aac615b7
@ -0,0 +1,20 @@
|
||||
from nltk import word_tokenize, pos_tag
|
||||
|
||||
|
||||
# faceapp_file = open('faceapp.txt','r')
|
||||
with open('tos_file/faceapp.txt', 'r') as faceapp_file:
|
||||
faceapp_text = faceapp_file.read()
|
||||
faceapp_text_list = faceapp_text.split("\n\n")
|
||||
|
||||
|
||||
|
||||
for paragraph in faceapp_text_list:
|
||||
tokenized = word_tokenize(paragraph)
|
||||
tagged = pos_tag(tokenized)
|
||||
print('<p>')
|
||||
for word, pos in tagged:
|
||||
print('<span class="{}">{}</span>'.format(
|
||||
pos.replace('.', 'DOT'), word))
|
||||
print('</p>')
|
||||
print('</div>')
|
||||
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
Loading…
Reference in New Issue