You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
132 lines
3.1 KiB
Plaintext
132 lines
3.1 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 2,
|
|
"id": "35367849-a425-404c-bbbd-d8d7138f1838",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import nltk\n",
|
|
"\n",
|
|
"with open('../speech.txt','r') as result:\n",
|
|
" r = result.read()\n",
|
|
" \n",
|
|
"r = r.replace('<span class=\"interim\"></span>','').replace('\\n','. ')\n",
|
|
"\n",
|
|
"l=nltk.word_tokenize(r)\n",
|
|
"pos = nltk.pos_tag(l)\n",
|
|
"\n",
|
|
"html = ''\n",
|
|
"for x in pos:\n",
|
|
" if x[0] == '.':\n",
|
|
" html += \"<span class='dot'>.<span><br> \"\n",
|
|
" else:\n",
|
|
" html += \"<span class='\"+x[1]+\"'> \"+x[0]+\"<span>\"\n",
|
|
" \n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 3,
|
|
"id": "0d1737d0-cda2-4208-9585-487ef809bdff",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"\"<span class='CC'>and<span> <span class='RB'>again<span> <span class='VBP'>are<span> <span class='PRP'>we<span> <span class='VBP'>are<span> <span class='dot'>.<span><br> <span class='NN'>let<span> <span class='POS'>'s<span> <span class='VB'>see<span> <span class='IN'>if<span> <span class='PRP'>it<span> <span class='VBZ'>works<span> <span class='RB'>again<span> <span class='IN'>with<span> <span class='DT'>these<span> <span class='JJ'>fantastic<span> <span class='NN'>xt500<span> <span class='dot'>.<span><br> \""
|
|
]
|
|
},
|
|
"execution_count": 3,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"html"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 127,
|
|
"id": "cfe7aecd-bcff-4e3b-81c2-0622434cf62b",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"with open('index.html','w') as index:\n",
|
|
" index.write(html)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 137,
|
|
"id": "8d2195b3-dc7a-4cdc-b599-5570766df12d",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 138,
|
|
"id": "02f79b41-8238-4fa1-9530-a84b5069bce0",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "c6e3cf8e-3493-463b-807a-89342e7c8731",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "ac322fc4-db27-42a3-aef0-ac0fbe3e26fb",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "1e02a152-1c3c-4e2c-b1aa-1a1f3172810c",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "264c688c-aba4-4cf5-8f6d-b9b35c3a0969",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3 (ipykernel)",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.9.7"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
}
|