text split onto paragraphs

master
Castro0o 4 years ago
parent 8b9ff87896
commit 8a5f12b778

File diff suppressed because one or more lines are too long

@ -9,7 +9,6 @@ import codecs
from nltk import sent_tokenize, word_tokenize, pos_tag
from nltk.probability import FreqDist
from nltk.corpus import stopwords
from PIL import Image
import base64
nltk.download('stopwords')
@ -22,6 +21,8 @@ nltk.download('stopwords')
#open the txt file, read, and tokenize
file = open('faceapp.txt','r')
text = file.read()
text_list = text.split("\n\n")
#not sure if this works..
x = 1
@ -225,11 +226,13 @@ print('</div>')
#ToS text
print('<div class ="paragraph">')
tokenized = word_tokenize(text)
tagged = pos_tag(tokenized)
for word, pos in tagged:
print('<span class="{}">{}</span>'.format(pos, word))
for paragraph in text_list:
tokenized = word_tokenize(paragraph)
tagged = pos_tag(tokenized)
print('<p>')
for word, pos in tagged:
print('<span class="{}">{}</span>'.format(pos, word))
print('</p>')
print('</div>')

Loading…
Cancel
Save