You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

3.1 KiB

In [2]:
import nltk

with open('../speech.txt','r') as result:
    r = result.read()
    
r = r.replace('<span class="interim"></span>','').replace('\n','. ')

l=nltk.word_tokenize(r)
pos = nltk.pos_tag(l)

html = ''
for x in pos:
    if x[0] == '.':
        html += "<span class='dot'>.<span><br> "
    else:
        html += "<span class='"+x[1]+"'> "+x[0]+"<span>"
        
In [3]:
html
Out[3]:
"<span class='CC'>and<span> <span class='RB'>again<span> <span class='VBP'>are<span> <span class='PRP'>we<span> <span class='VBP'>are<span> <span class='dot'>.<span><br> <span class='NN'>let<span> <span class='POS'>'s<span> <span class='VB'>see<span> <span class='IN'>if<span> <span class='PRP'>it<span> <span class='VBZ'>works<span> <span class='RB'>again<span> <span class='IN'>with<span> <span class='DT'>these<span> <span class='JJ'>fantastic<span> <span class='NN'>xt500<span> <span class='dot'>.<span><br> "
In [127]:
with open('index.html','w') as index:
    index.write(html)
In [137]:
 
In [138]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]: