You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

132 lines
3.1 KiB
Plaintext

3 years ago
{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"id": "35367849-a425-404c-bbbd-d8d7138f1838",
"metadata": {},
"outputs": [],
"source": [
"import nltk\n",
"\n",
"with open('../speech.txt','r') as result:\n",
" r = result.read()\n",
" \n",
"r = r.replace('<span class=\"interim\"></span>','').replace('\\n','. ')\n",
"\n",
"l=nltk.word_tokenize(r)\n",
"pos = nltk.pos_tag(l)\n",
"\n",
"html = ''\n",
"for x in pos:\n",
" if x[0] == '.':\n",
" html += \"<span class='dot'>.<span><br> \"\n",
" else:\n",
" html += \"<span class='\"+x[1]+\"'> \"+x[0]+\"<span>\"\n",
" \n"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "0d1737d0-cda2-4208-9585-487ef809bdff",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"\"<span class='CC'>and<span> <span class='RB'>again<span> <span class='VBP'>are<span> <span class='PRP'>we<span> <span class='VBP'>are<span> <span class='dot'>.<span><br> <span class='NN'>let<span> <span class='POS'>'s<span> <span class='VB'>see<span> <span class='IN'>if<span> <span class='PRP'>it<span> <span class='VBZ'>works<span> <span class='RB'>again<span> <span class='IN'>with<span> <span class='DT'>these<span> <span class='JJ'>fantastic<span> <span class='NN'>xt500<span> <span class='dot'>.<span><br> \""
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"html"
]
},
{
"cell_type": "code",
"execution_count": 127,
"id": "cfe7aecd-bcff-4e3b-81c2-0622434cf62b",
"metadata": {},
"outputs": [],
"source": [
"with open('index.html','w') as index:\n",
" index.write(html)"
]
},
{
"cell_type": "code",
"execution_count": 137,
"id": "8d2195b3-dc7a-4cdc-b599-5570766df12d",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 138,
"id": "02f79b41-8238-4fa1-9530-a84b5069bce0",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "c6e3cf8e-3493-463b-807a-89342e7c8731",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "ac322fc4-db27-42a3-aef0-ac0fbe3e26fb",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "1e02a152-1c3c-4e2c-b1aa-1a1f3172810c",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "264c688c-aba4-4cf5-8f6d-b9b35c3a0969",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.7"
}
},
"nbformat": 4,
"nbformat_minor": 5
}