From 9185be341aeaa42922f1d61a737c11a01ff175b7 Mon Sep 17 00:00:00 2001
From: Michael Murtaugh <mm@automatist.org>
Date: Wed, 14 Oct 2020 11:21:32 +0000
Subject: [PATCH] updated NLTKing

---
 NLTKing.ipynb | 145 +++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 132 insertions(+), 13 deletions(-)

diff --git a/NLTKing.ipynb b/NLTKing.ipynb
index 524a5ca..b934d87 100644
--- a/NLTKing.ipynb
+++ b/NLTKing.ipynb
@@ -198,7 +198,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "words = text.split()"
+    "words = text.split"
    ]
   },
   {
@@ -207,7 +207,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "len(words)"
+    "words = text.split"
    ]
   },
   {
@@ -216,7 +216,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from nltk import word_tokenize"
+    "words = text.split"
    ]
   },
   {
@@ -225,7 +225,16 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "tokens = word_tokenize(text)"
+    "words = text.split()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "len(words)"
    ]
   },
   {
@@ -234,7 +243,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "tokens = word_tokenize"
+    "from nltk import word_tokenize"
    ]
   },
   {
@@ -243,7 +252,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "tokens = word_tokenize"
+    "tokens = word_tokenize(text)"
    ]
   },
   {
@@ -288,7 +297,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "stengers"
+    "stengers.concordance(\"the\", width=82, lines=74)"
    ]
   },
   {
@@ -297,7 +306,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "stengers.concordance(\"the\", width=82, lines=74)"
+    "for line in stengers.concordance_list(\"the\", width=82, lines=74):\n",
+    "    print (line.left_print, line.query, line.right_print)"
    ]
   },
   {
@@ -306,7 +316,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "stengers.concordance?"
+    "with open (\"patches/stengers_the.txt\", \"w\") as output:\n",
+    "    for line in stengers.concordance_list(\"the\", width=82, lines=74):\n",
+    "        print (line.left_print, line.query, line.right_print, file=output)"
    ]
   },
   {
@@ -316,7 +328,7 @@
    "outputs": [],
    "source": [
     "for line in stengers.concordance_list(\"the\", width=82, lines=74):\n",
-    "    print (line.left_print, line.query, line.right_print)"
+    "    print (line.query)"
    ]
   },
   {
@@ -325,9 +337,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "with open (\"patches/stengers_the.txt\", \"w\") as output:\n",
-    "    for line in stengers.concordance_list(\"the\", width=82, lines=74):\n",
-    "        print (line.left_print, line.query, line.right_print, file=output)"
+    "stengers.concordance(\"the\", width=3)\n"
    ]
   },
   {
@@ -645,6 +655,115 @@
     "figsize(20.0,20.0)"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "stengers"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "stengers"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Nami asks: How to I get concordances of just words ending \"ity\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "t = stengers"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ity = []\n",
+    "for w in stengers:\n",
+    "    if w.endswith(\"ity\"):\n",
+    "        # print (w)\n",
+    "        ity.append(w.lower())\n",
+    "ity = set(ity)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "for word in ity:\n",
+    "    stengers.concordance(word)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\"Objectivity\".lower"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "set(ity)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Clara asks, what about lines that are shorter than the width you give?\n",
+    "\n",
+    "https://www.peterbe.com/plog/how-to-pad-fill-string-by-variable-python\n",
+    "\n",
+    "cwidth is how much \"padding\" is needed for each side\n",
+    "it's our page width - the length of the word divided by 2\n",
+    "in python means \"integer\" (whole number) division"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "for line in stengers.concordance_list(\"resurgence\", width=82, lines=74):\n",
+    "    cwidth = (82 - len(\"resurgence\")) // 2\n",
+    "    # print (cwidth)\n",
+    "    print ( line.left_print.rjust(cwidth), line.query, line.right_print.ljust(cwidth) )\n",
+    "    "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
   {
    "cell_type": "code",
    "execution_count": null,