updated NLTKing

master
Michael Murtaugh 4 years ago
parent 42fe36dbb3
commit 981258bace

@ -69,7 +69,26 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"Text?" "nltk.text.Text"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"for line in text1.concordance_list(\"whale\"):\n",
" print (line.left_print, line.query, line.right_print)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"text5.tokens"
] ]
}, },
{ {
@ -92,6 +111,15 @@
"url = \"https://git.xpub.nl/XPUB/S13-Words-for-the-Future-materials/raw/branch/master/txt-essays/RESURGENCE%20Isabelle%20Stengers.txt\"" "url = \"https://git.xpub.nl/XPUB/S13-Words-for-the-Future-materials/raw/branch/master/txt-essays/RESURGENCE%20Isabelle%20Stengers.txt\""
] ]
}, },
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"url"
]
},
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
@ -101,6 +129,33 @@
"from urllib.request import urlopen" "from urllib.request import urlopen"
] ]
}, },
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"r = urlopen(url)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"rawtext = r.read()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"text = rawtext.decode()"
]
},
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
@ -110,6 +165,51 @@
"text = urlopen(url).read().decode()" "text = urlopen(url).read().decode()"
] ]
}, },
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"len(text)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"words = text.split?"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"words = text.split"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"words = text.split()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"len(words)"
]
},
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
@ -134,7 +234,52 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"stengers = nltk.text.Text(tokens)" "tokens = word_tokenize"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"tokens = word_tokenize"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"len(tokens)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"len(tokens)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"tokens[-10:]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"stengers = Text(tokens)"
] ]
}, },
{ {
@ -152,7 +297,16 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"stengers.concordance(\"power\")" "stengers.concordance(\"the\", width=82, lines=74)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"stengers.concordance?"
] ]
}, },
{ {
@ -161,7 +315,19 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"stengers.similar(\"power\")" "for line in stengers.concordance_list(\"the\", width=82, lines=74):\n",
" print (line.left_print, line.query, line.right_print)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"with open (\"patches/stengers_the.txt\", \"w\") as output:\n",
" for line in stengers.concordance_list(\"the\", width=82, lines=74):\n",
" print (line.left_print, line.query, line.right_print, file=output)"
] ]
}, },
{ {
@ -179,7 +345,16 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"stengers.dispersion_plot([\"power\", \"freedom\"])" "stengers.dispersion_plot([\"power\", \"the\", \"victims\"])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from nltk.probability import FreqDist"
] ]
}, },
{ {
@ -197,7 +372,7 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"freq" "freq[\"WHALE\"]"
] ]
}, },
{ {
@ -467,8 +642,15 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"figsize(20.0,4.8)" "figsize(20.0,20.0)"
] ]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
} }
], ],
"metadata": { "metadata": {

Loading…
Cancel
Save