updated NLTKing

master
Michael Murtaugh 4 years ago
parent 42fe36dbb3
commit 981258bace

@ -69,7 +69,26 @@
"metadata": {},
"outputs": [],
"source": [
"Text?"
"nltk.text.Text"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"for line in text1.concordance_list(\"whale\"):\n",
" print (line.left_print, line.query, line.right_print)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"text5.tokens"
]
},
{
@ -92,6 +111,15 @@
"url = \"https://git.xpub.nl/XPUB/S13-Words-for-the-Future-materials/raw/branch/master/txt-essays/RESURGENCE%20Isabelle%20Stengers.txt\""
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"url"
]
},
{
"cell_type": "code",
"execution_count": null,
@ -101,6 +129,33 @@
"from urllib.request import urlopen"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"r = urlopen(url)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"rawtext = r.read()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"text = rawtext.decode()"
]
},
{
"cell_type": "code",
"execution_count": null,
@ -110,6 +165,51 @@
"text = urlopen(url).read().decode()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"len(text)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"words = text.split?"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"words = text.split"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"words = text.split()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"len(words)"
]
},
{
"cell_type": "code",
"execution_count": null,
@ -134,7 +234,52 @@
"metadata": {},
"outputs": [],
"source": [
"stengers = nltk.text.Text(tokens)"
"tokens = word_tokenize"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"tokens = word_tokenize"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"len(tokens)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"len(tokens)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"tokens[-10:]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"stengers = Text(tokens)"
]
},
{
@ -152,7 +297,16 @@
"metadata": {},
"outputs": [],
"source": [
"stengers.concordance(\"power\")"
"stengers.concordance(\"the\", width=82, lines=74)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"stengers.concordance?"
]
},
{
@ -161,7 +315,19 @@
"metadata": {},
"outputs": [],
"source": [
"stengers.similar(\"power\")"
"for line in stengers.concordance_list(\"the\", width=82, lines=74):\n",
" print (line.left_print, line.query, line.right_print)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"with open (\"patches/stengers_the.txt\", \"w\") as output:\n",
" for line in stengers.concordance_list(\"the\", width=82, lines=74):\n",
" print (line.left_print, line.query, line.right_print, file=output)"
]
},
{
@ -179,7 +345,16 @@
"metadata": {},
"outputs": [],
"source": [
"stengers.dispersion_plot([\"power\", \"freedom\"])"
"stengers.dispersion_plot([\"power\", \"the\", \"victims\"])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from nltk.probability import FreqDist"
]
},
{
@ -197,7 +372,7 @@
"metadata": {},
"outputs": [],
"source": [
"freq"
"freq[\"WHALE\"]"
]
},
{
@ -467,8 +642,15 @@
"metadata": {},
"outputs": [],
"source": [
"figsize(20.0,4.8)"
"figsize(20.0,20.0)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {

Loading…
Cancel
Save