updated NLTKing

master
Michael Murtaugh 4 years ago
parent 981258bace
commit 9185be341a

@ -198,7 +198,7 @@
"metadata": {},
"outputs": [],
"source": [
"words = text.split()"
"words = text.split"
]
},
{
@ -207,7 +207,7 @@
"metadata": {},
"outputs": [],
"source": [
"len(words)"
"words = text.split"
]
},
{
@ -216,7 +216,7 @@
"metadata": {},
"outputs": [],
"source": [
"from nltk import word_tokenize"
"words = text.split"
]
},
{
@ -225,7 +225,16 @@
"metadata": {},
"outputs": [],
"source": [
"tokens = word_tokenize(text)"
"words = text.split()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"len(words)"
]
},
{
@ -234,7 +243,7 @@
"metadata": {},
"outputs": [],
"source": [
"tokens = word_tokenize"
"from nltk import word_tokenize"
]
},
{
@ -243,7 +252,7 @@
"metadata": {},
"outputs": [],
"source": [
"tokens = word_tokenize"
"tokens = word_tokenize(text)"
]
},
{
@ -288,7 +297,7 @@
"metadata": {},
"outputs": [],
"source": [
"stengers"
"stengers.concordance(\"the\", width=82, lines=74)"
]
},
{
@ -297,7 +306,8 @@
"metadata": {},
"outputs": [],
"source": [
"stengers.concordance(\"the\", width=82, lines=74)"
"for line in stengers.concordance_list(\"the\", width=82, lines=74):\n",
" print (line.left_print, line.query, line.right_print)"
]
},
{
@ -306,7 +316,9 @@
"metadata": {},
"outputs": [],
"source": [
"stengers.concordance?"
"with open (\"patches/stengers_the.txt\", \"w\") as output:\n",
" for line in stengers.concordance_list(\"the\", width=82, lines=74):\n",
" print (line.left_print, line.query, line.right_print, file=output)"
]
},
{
@ -316,7 +328,7 @@
"outputs": [],
"source": [
"for line in stengers.concordance_list(\"the\", width=82, lines=74):\n",
" print (line.left_print, line.query, line.right_print)"
" print (line.query)"
]
},
{
@ -325,9 +337,7 @@
"metadata": {},
"outputs": [],
"source": [
"with open (\"patches/stengers_the.txt\", \"w\") as output:\n",
" for line in stengers.concordance_list(\"the\", width=82, lines=74):\n",
" print (line.left_print, line.query, line.right_print, file=output)"
"stengers.concordance(\"the\", width=3)\n"
]
},
{
@ -645,6 +655,115 @@
"figsize(20.0,20.0)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"stengers"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"stengers"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Nami asks: How to I get concordances of just words ending \"ity\""
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"t = stengers"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ity = []\n",
"for w in stengers:\n",
" if w.endswith(\"ity\"):\n",
" # print (w)\n",
" ity.append(w.lower())\n",
"ity = set(ity)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"for word in ity:\n",
" stengers.concordance(word)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"\"Objectivity\".lower"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"set(ity)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Clara asks, what about lines that are shorter than the width you give?\n",
"\n",
"https://www.peterbe.com/plog/how-to-pad-fill-string-by-variable-python\n",
"\n",
"cwidth is how much \"padding\" is needed for each side\n",
"it's our page width - the length of the word divided by 2\n",
"in python means \"integer\" (whole number) division"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"for line in stengers.concordance_list(\"resurgence\", width=82, lines=74):\n",
" cwidth = (82 - len(\"resurgence\")) // 2\n",
" # print (cwidth)\n",
" print ( line.left_print.rjust(cwidth), line.query, line.right_print.ljust(cwidth) )\n",
" "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,

Loading…
Cancel
Save