updated NLTKing

4 years ago · 9185be341a
parent 981258bace
commit 9185be341a
1 changed files with 132 additions and 13 deletions
--- a/NLTKing.ipynb
+++ b/NLTKing.ipynb
@ -198,7 +198,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "words = text.split()"
+    "words = text.split"
   ]
  },
  {
@ -207,7 +207,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "len(words)"
+    "words = text.split"
   ]
  },
  {
@ -216,7 +216,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "from nltk import word_tokenize"
+    "words = text.split"
   ]
  },
  {
@ -225,7 +225,16 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "tokens = word_tokenize(text)"
+    "words = text.split()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "len(words)"
   ]
  },
  {
@ -234,7 +243,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "tokens = word_tokenize"
+    "from nltk import word_tokenize"
   ]
  },
  {
@ -243,7 +252,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "tokens = word_tokenize"
+    "tokens = word_tokenize(text)"
   ]
  },
  {
@ -288,7 +297,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "stengers"
+    "stengers.concordance(\"the\", width=82, lines=74)"
   ]
  },
  {
@ -297,7 +306,8 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "stengers.concordance(\"the\", width=82, lines=74)"
+    "for line in stengers.concordance_list(\"the\", width=82, lines=74):\n",
    "    print (line.left_print, line.query, line.right_print)"
   ]
  },
  {
@ -306,7 +316,9 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "stengers.concordance?"
+    "with open (\"patches/stengers_the.txt\", \"w\") as output:\n",
    "    for line in stengers.concordance_list(\"the\", width=82, lines=74):\n",
    "        print (line.left_print, line.query, line.right_print, file=output)"
   ]
  },
  {
@ -316,7 +328,7 @@
   "outputs": [],
   "source": [
    "for line in stengers.concordance_list(\"the\", width=82, lines=74):\n",
-    "    print (line.left_print, line.query, line.right_print)"
+    "    print (line.query)"
   ]
  },
  {
@ -325,9 +337,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "with open (\"patches/stengers_the.txt\", \"w\") as output:\n",
+    "stengers.concordance(\"the\", width=3)\n"
    "    for line in stengers.concordance_list(\"the\", width=82, lines=74):\n",
    "        print (line.left_print, line.query, line.right_print, file=output)"
   ]
  },
  {
@ -645,6 +655,115 @@
    "figsize(20.0,20.0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "stengers"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "stengers"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Nami asks: How to I get concordances of just words ending \"ity\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "t = stengers"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "ity = []\n",
    "for w in stengers:\n",
    "    if w.endswith(\"ity\"):\n",
    "        # print (w)\n",
    "        ity.append(w.lower())\n",
    "ity = set(ity)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "for word in ity:\n",
    "    stengers.concordance(word)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "\"Objectivity\".lower"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "set(ity)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Clara asks, what about lines that are shorter than the width you give?\n",
    "\n",
    "https://www.peterbe.com/plog/how-to-pad-fill-string-by-variable-python\n",
    "\n",
    "cwidth is how much \"padding\" is needed for each side\n",
    "it's our page width - the length of the word divided by 2\n",
    "in python means \"integer\" (whole number) division"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "for line in stengers.concordance_list(\"resurgence\", width=82, lines=74):\n",
    "    cwidth = (82 - len(\"resurgence\")) // 2\n",
    "    # print (cwidth)\n",
    "    print ( line.left_print.rjust(cwidth), line.query, line.right_print.ljust(cwidth) )\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,