text split onto paragraphs

master
Castro0o 5 years ago
parent 8b9ff87896
commit 8a5f12b778

File diff suppressed because one or more lines are too long

@ -9,7 +9,6 @@ import codecs
from nltk import sent_tokenize, word_tokenize, pos_tag from nltk import sent_tokenize, word_tokenize, pos_tag
from nltk.probability import FreqDist from nltk.probability import FreqDist
from nltk.corpus import stopwords from nltk.corpus import stopwords
from PIL import Image
import base64 import base64
nltk.download('stopwords') nltk.download('stopwords')
@ -22,6 +21,8 @@ nltk.download('stopwords')
#open the txt file, read, and tokenize #open the txt file, read, and tokenize
file = open('faceapp.txt','r') file = open('faceapp.txt','r')
text = file.read() text = file.read()
text_list = text.split("\n\n")
#not sure if this works.. #not sure if this works..
x = 1 x = 1
@ -225,11 +226,13 @@ print('</div>')
#ToS text #ToS text
print('<div class ="paragraph">') print('<div class ="paragraph">')
tokenized = word_tokenize(text) for paragraph in text_list:
tagged = pos_tag(tokenized) tokenized = word_tokenize(paragraph)
tagged = pos_tag(tokenized)
for word, pos in tagged: print('<p>')
for word, pos in tagged:
print('<span class="{}">{}</span>'.format(pos, word)) print('<span class="{}">{}</span>'.format(pos, word))
print('</p>')
print('</div>') print('</div>')

Loading…
Cancel
Save