updated NLTKing

master
Michael Murtaugh 4 years ago
parent 981258bace
commit 9185be341a

@ -198,7 +198,7 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"words = text.split()" "words = text.split"
] ]
}, },
{ {
@ -207,7 +207,7 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"len(words)" "words = text.split"
] ]
}, },
{ {
@ -216,7 +216,7 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"from nltk import word_tokenize" "words = text.split"
] ]
}, },
{ {
@ -225,7 +225,16 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"tokens = word_tokenize(text)" "words = text.split()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"len(words)"
] ]
}, },
{ {
@ -234,7 +243,7 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"tokens = word_tokenize" "from nltk import word_tokenize"
] ]
}, },
{ {
@ -243,7 +252,7 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"tokens = word_tokenize" "tokens = word_tokenize(text)"
] ]
}, },
{ {
@ -288,7 +297,7 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"stengers" "stengers.concordance(\"the\", width=82, lines=74)"
] ]
}, },
{ {
@ -297,7 +306,8 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"stengers.concordance(\"the\", width=82, lines=74)" "for line in stengers.concordance_list(\"the\", width=82, lines=74):\n",
" print (line.left_print, line.query, line.right_print)"
] ]
}, },
{ {
@ -306,7 +316,9 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"stengers.concordance?" "with open (\"patches/stengers_the.txt\", \"w\") as output:\n",
" for line in stengers.concordance_list(\"the\", width=82, lines=74):\n",
" print (line.left_print, line.query, line.right_print, file=output)"
] ]
}, },
{ {
@ -316,7 +328,7 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"for line in stengers.concordance_list(\"the\", width=82, lines=74):\n", "for line in stengers.concordance_list(\"the\", width=82, lines=74):\n",
" print (line.left_print, line.query, line.right_print)" " print (line.query)"
] ]
}, },
{ {
@ -325,9 +337,7 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"with open (\"patches/stengers_the.txt\", \"w\") as output:\n", "stengers.concordance(\"the\", width=3)\n"
" for line in stengers.concordance_list(\"the\", width=82, lines=74):\n",
" print (line.left_print, line.query, line.right_print, file=output)"
] ]
}, },
{ {
@ -645,6 +655,115 @@
"figsize(20.0,20.0)" "figsize(20.0,20.0)"
] ]
}, },
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"stengers"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"stengers"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Nami asks: How to I get concordances of just words ending \"ity\""
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"t = stengers"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ity = []\n",
"for w in stengers:\n",
" if w.endswith(\"ity\"):\n",
" # print (w)\n",
" ity.append(w.lower())\n",
"ity = set(ity)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"for word in ity:\n",
" stengers.concordance(word)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"\"Objectivity\".lower"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"set(ity)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Clara asks, what about lines that are shorter than the width you give?\n",
"\n",
"https://www.peterbe.com/plog/how-to-pad-fill-string-by-variable-python\n",
"\n",
"cwidth is how much \"padding\" is needed for each side\n",
"it's our page width - the length of the word divided by 2\n",
"in python means \"integer\" (whole number) division"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"for line in stengers.concordance_list(\"resurgence\", width=82, lines=74):\n",
" cwidth = (82 - len(\"resurgence\")) // 2\n",
" # print (cwidth)\n",
" print ( line.left_print.rjust(cwidth), line.query, line.right_print.ljust(cwidth) )\n",
" "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,

Loading…
Cancel
Save