{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# NLTK - Part of Speech"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 96,
   "metadata": {},
   "outputs": [],
   "source": [
    "import nltk\n",
    "import random"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 101,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[' In his fifth memo, he subsequently focuses on  multiplicity as a way for literature to comprehend the complex nature of the world that for the author is a whole of wholes, where the acts of watching and knowing also intervene in the observed reality and alter it. Calvino is particularly fascinated by literary works that are built upon a combinatory logic or that are readable as different narratives. The lecture revolves around some novels that contain multiple worlds and make space for the readers’ imaginations. Therefore, let’s think visibility and multiplicity together, as: a multiplication of visibilities. They are traits specific to artistic production and define a context for the undecidable, or rather for undecidability, as the quality of being undecidable.']\n"
     ]
    }
   ],
   "source": [
    "texts = open('1.txt').readlines()\n",
    "sentence = random.shuffle(texts)\n",
    "print(texts)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 108,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[' In his fifth memo, he subsequently focuses on  multiplicity as a way for literature to comprehend the complex nature of the world that for the author is a whole of wholes, where the acts of watching and knowing also intervene in the observed reality and alter it. Calvino is particularly fascinated by literary works that are built upon a combinatory logic or that are readable as different narratives. The lecture revolves around some novels that contain multiple worlds and make space for the readers’ imaginations. Therefore, let’s think visibility and multiplicity together, as: a multiplication of visibilities. They are traits specific to artistic production and define a context for the undecidable, or rather for undecidability, as the quality of being undecidable.']\n"
     ]
    }
   ],
   "source": [
    "lines = open('1.txt').readlines()\n",
    "sentence = random.shuffle(lines)\n",
    "print(lines)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 109,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      " In his fifth memo, he subsequently focuses on  multiplicity as a way for literature to comprehend the complex nature of the world that for the author is a whole of wholes, where the acts of watching and knowing also intervene in the observed reality and alter it. Calvino is particularly fascinated by literary works that are built upon a combinatory logic or that are readable as different narratives. The lecture revolves around some novels that contain multiple worlds and make space for the readers’ imaginations. Therefore, let’s think visibility and multiplicity together, as: a multiplication of visibilities. They are traits specific to artistic production and define a context for the undecidable, or rather for undecidability, as the quality of being undecidable.\n"
     ]
    }
   ],
   "source": [
    "# using list comprehension \n",
    "text1 = ' '.join([str(elem) for elem in lines]) \n",
    "  \n",
    "print(text1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 111,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['In', 'his', 'fifth', 'memo', ',', 'he', 'subsequently', 'focuses', 'on', 'multiplicity', 'as', 'a', 'way', 'for', 'literature', 'to', 'comprehend', 'the', 'complex', 'nature', 'of', 'the', 'world', 'that', 'for', 'the', 'author', 'is', 'a', 'whole', 'of', 'wholes', ',', 'where', 'the', 'acts', 'of', 'watching', 'and', 'knowing', 'also', 'intervene', 'in', 'the', 'observed', 'reality', 'and', 'alter', 'it', '.', 'Calvino', 'is', 'particularly', 'fascinated', 'by', 'literary', 'works', 'that', 'are', 'built', 'upon', 'a', 'combinatory', 'logic', 'or', 'that', 'are', 'readable', 'as', 'different', 'narratives', '.', 'The', 'lecture', 'revolves', 'around', 'some', 'novels', 'that', 'contain', 'multiple', 'worlds', 'and', 'make', 'space', 'for', 'the', 'readers', '’', 'imaginations', '.', 'Therefore', ',', 'let', '’', 's', 'think', 'visibility', 'and', 'multiplicity', 'together', ',', 'as', ':', 'a', 'multiplication', 'of', 'visibilities', '.', 'They', 'are', 'traits', 'specific', 'to', 'artistic', 'production', 'and', 'define', 'a', 'context', 'for', 'the', 'undecidable', ',', 'or', 'rather', 'for', 'undecidability', ',', 'as', 'the', 'quality', 'of', 'being', 'undecidable', '.']\n"
     ]
    }
   ],
   "source": [
    "tokens = nltk.word_tokenize(text1)\n",
    "print(tokens)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 112,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[('In', 'IN'), ('his', 'PRP$'), ('fifth', 'JJ'), ('memo', 'NN'), (',', ','), ('he', 'PRP'), ('subsequently', 'RB'), ('focuses', 'VBZ'), ('on', 'IN'), ('multiplicity', 'NN'), ('as', 'IN'), ('a', 'DT'), ('way', 'NN'), ('for', 'IN'), ('literature', 'NN'), ('to', 'TO'), ('comprehend', 'VB'), ('the', 'DT'), ('complex', 'JJ'), ('nature', 'NN'), ('of', 'IN'), ('the', 'DT'), ('world', 'NN'), ('that', 'WDT'), ('for', 'IN'), ('the', 'DT'), ('author', 'NN'), ('is', 'VBZ'), ('a', 'DT'), ('whole', 'NN'), ('of', 'IN'), ('wholes', 'NNS'), (',', ','), ('where', 'WRB'), ('the', 'DT'), ('acts', 'NNS'), ('of', 'IN'), ('watching', 'VBG'), ('and', 'CC'), ('knowing', 'VBG'), ('also', 'RB'), ('intervene', 'NN'), ('in', 'IN'), ('the', 'DT'), ('observed', 'JJ'), ('reality', 'NN'), ('and', 'CC'), ('alter', 'NN'), ('it', 'PRP'), ('.', '.'), ('Calvino', 'NNP'), ('is', 'VBZ'), ('particularly', 'RB'), ('fascinated', 'VBN'), ('by', 'IN'), ('literary', 'JJ'), ('works', 'NNS'), ('that', 'WDT'), ('are', 'VBP'), ('built', 'VBN'), ('upon', 'IN'), ('a', 'DT'), ('combinatory', 'NN'), ('logic', 'NN'), ('or', 'CC'), ('that', 'WDT'), ('are', 'VBP'), ('readable', 'JJ'), ('as', 'IN'), ('different', 'JJ'), ('narratives', 'NNS'), ('.', '.'), ('The', 'DT'), ('lecture', 'NN'), ('revolves', 'VBZ'), ('around', 'IN'), ('some', 'DT'), ('novels', 'NNS'), ('that', 'WDT'), ('contain', 'VBP'), ('multiple', 'JJ'), ('worlds', 'NNS'), ('and', 'CC'), ('make', 'VB'), ('space', 'NN'), ('for', 'IN'), ('the', 'DT'), ('readers', 'NNS'), ('’', 'VBP'), ('imaginations', 'NNS'), ('.', '.'), ('Therefore', 'RB'), (',', ','), ('let', 'VB'), ('’', 'NNP'), ('s', 'VB'), ('think', 'VBP'), ('visibility', 'NN'), ('and', 'CC'), ('multiplicity', 'NN'), ('together', 'RB'), (',', ','), ('as', 'IN'), (':', ':'), ('a', 'DT'), ('multiplication', 'NN'), ('of', 'IN'), ('visibilities', 'NNS'), ('.', '.'), ('They', 'PRP'), ('are', 'VBP'), ('traits', 'NNS'), ('specific', 'JJ'), ('to', 'TO'), ('artistic', 'JJ'), ('production', 'NN'), ('and', 'CC'), ('define', 'VB'), ('a', 'DT'), ('context', 'NN'), ('for', 'IN'), ('the', 'DT'), ('undecidable', 'JJ'), (',', ','), ('or', 'CC'), ('rather', 'RB'), ('for', 'IN'), ('undecidability', 'NN'), (',', ','), ('as', 'IN'), ('the', 'DT'), ('quality', 'NN'), ('of', 'IN'), ('being', 'VBG'), ('undecidable', 'JJ'), ('.', '.')]\n"
     ]
    }
   ],
   "source": [
    "tagged1 = nltk.pos_tag(tokens)\n",
    "print(tagged1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 113,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['memo', 'multiplicity', 'way', 'literature', 'nature', 'world', 'author', 'whole', 'wholes', 'acts', 'intervene', 'reality', 'alter', 'Calvino', 'works', 'combinatory', 'logic', 'narratives', 'lecture', 'novels', 'worlds', 'space', 'readers', 'imaginations', '’', 'visibility', 'multiplicity', 'multiplication', 'visibilities', 'traits', 'production', 'context', 'undecidability', 'quality']\n"
     ]
    }
   ],
   "source": [
    "selection = []\n",
    "\n",
    "for word, tag in tagged1:\n",
    "    if 'NN' in tag:\n",
    "        selection.append(word)\n",
    "\n",
    "print(selection)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 114,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'space', 'undecidability', 'world', 'worlds', 'narratives', 'way', 'literature', 'quality', 'works', 'novels', 'visibilities', 'acts', 'readers', 'nature', 'context', 'reality', 'whole', 'wholes', 'Calvino', 'multiplication', '’', 'visibility', 'combinatory', 'lecture', 'alter', 'imaginations', 'memo', 'multiplicity', 'production', 'author', 'logic', 'traits', 'intervene'}\n"
     ]
    }
   ],
   "source": [
    "# remove overlapped words, using set()\n",
    "selection = []\n",
    "\n",
    "for word, tag in tagged1:\n",
    "    if 'NN' in tag:\n",
    "        selection.append(word)\n",
    "\n",
    "print(set(selection))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 115,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['imaginations', 'alter', 'works', 'imaginations', 'lecture', 'imaginations']\n"
     ]
    }
   ],
   "source": [
    "nntagged1 = random.choices(selection, k=6)\n",
    "print(nntagged1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 116,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "imaginations and alter and works and imaginations and lecture and imaginations\n"
     ]
    }
   ],
   "source": [
    "nnand = \" and \".join(nntagged1)\n",
    "\n",
    "print(nnand)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 117,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['novels', 'narratives', 'traits', 'lecture', 'narratives', 'traits']\n"
     ]
    }
   ],
   "source": [
    "nntagged2 = random.choices(selection, k=6)\n",
    "print(nntagged2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 118,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "novels and narratives and traits and lecture and narratives and traits\n"
     ]
    }
   ],
   "source": [
    "nnand2 = \" and \".join(nntagged2)\n",
    "\n",
    "print(nnand2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 121,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['multiplicity', 'works', 'author', 'reality', 'multiplicity', 'wholes']\n"
     ]
    }
   ],
   "source": [
    "nntagged3 = random.choices(selection, k=6)\n",
    "print(nntagged3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 123,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "multiplicity and works and author and reality and multiplicity and wholes\n"
     ]
    }
   ],
   "source": [
    "nnand3 = \" and \".join(nntagged3)\n",
    "\n",
    "print(nnand3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "lines = open('relevant.txt').readlines()\n",
    "sentence = random.shuffle(lines)\n",
    "print(lines)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "list"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "type(lines)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Tokens"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['In', 'fact', ',', 'if', 'something', 'is', 'possible', 'when', 'it', 'contains', 'and', 'under', 'certain', 'terms', 'performs', 'the', 'possibility', 'of', 'its', 'actualisation', ',', 'a', 'world', 'is', 'potential', 'when', 'it', 'can', 'maintain', 'its', 'potentiality', 'and', 'never', 'actualize', 'itself', 'into', 'one', 'actual', 'form', '.', 'The', 'kind', 'of', 'collective', 'body', 'that', 'undecidability', 'produces', 'could', 'of', 'course', 'be', 'seen', 'as', 'an', 'image', 'of', 'a', 'possible', 'or', 'future', 'societal', 'structure', ',', 'but', 'it', 'is', 'rather', 'an', 'enigmatic', 'subject', ':', 'it', 'is', 'not', 'there', 'to', 'actualize', 'itself', 'but', 'to', 'keep', 'being', 'a', 'sheer', ',', 'glimmering', 'potentiality', '.', 'If', 'the', 'coexistence', 'of', 'different', 'media', 'already', 'implies', 'different', 'angles', ',', 'durations', ',', 'discourses', ',', 'and', 'forms', 'of', 'spectatorship', ',', 'the', 'performance', 'itself', 'keeps', 'an', 'undecidable', 'bound', 'between', 'its', 'real', 'and', 'fictional', 'ontologies', '.', 'Which', 'is', 'to', 'say', ',', 'if', 'it', 'doesn', '’', 't', 'give', 'up', 'on', 'involving', 'radically', 'different', 'realities', 'into', 'its', 'operation', 'modes', 'and', 'doesn', '’', 't', 'fade', 'out', 'from', 'the', 'scene', 'of', 'the', '‘', 'real', '’', 'world', '.', 'In', 'particular', ',', 'the', 'potentiality', 'generated', 'by', 'undecidable', 'artworks', 'is', 'grounded', 'in', 'a', 'logic', 'of', 'addition', 'and', 'contradiction', 'that', 'is', 'specific', 'of', 'art', '.', 'If', 'the', 'coexistence', 'of', 'different', 'media', 'already', 'implies', 'different', 'angles', ',', 'durations', ',', 'discourses', ',', 'and', 'forms', 'of', 'spectatorship', ',', 'the', 'performance', 'itself', 'keeps', 'an', 'undecidable', 'bound', 'between', 'its', 'real', 'and', 'fictional', 'ontologies', '.', 'In', 'fact', ',', 'undecidability', 'is', 'a', 'specific', 'force', 'at', 'work', 'that', 'consciously', 'articulates', ',', 'redefines', ',', 'or', 'alters', 'the', 'complex', 'system', 'of', 'links', ',', 'bounds', ',', 'and', 'resonances', 'between', 'different', 'potential', 'and', 'actual', 'worlds', '.', 'What', 'is', 'peculiar', 'to', 'this', 'kind', 'of', 'artworks', 'then', ',', 'and', 'what', 'within', 'them', 'can', 'produce', 'an', 'understanding', 'of', 'the', 'place', 'of', 'art', 'and', 'of', 'its', 'politics', 'today', ',', 'is', 'that', 'they', 'generate', 'a', 'multiplicity', 'of', 'gazes', 'and', 'of', 'forms', 'of', 'spectatorship', 'that', 'also', 'coexist', 'one', 'next', 'to', 'the', 'other', 'without', 'mediating', 'between', 'their', 'own', 'positions', 'and', 'points', 'of', 'view', '.', 'We', 'might', 'stretch', 'this', 'line', 'of', 'thought', 'a', 'bit', 'further', 'and', 'propose', 'that', 'art', '’', 's', 'potentiality', 'is', 'that', 'of', 'multiplying', 'the', 'visible', 'as', 'an', 'actual', 'counterstrategy', 'to', 'the', 'proliferation', 'of', 'images', 'that', 'surrounds', 'us', '.', 'Undecidability', 'could', 'then', 'be', 'detached', 'from', 'art', 'and', 'applied', 'to', 'curation', ',', 'instituting', 'processes', 'or', 'even', 'to', 'politics', 'at', 'large', ':', 'the', 'unfolding', 'of', 'its', 'resonances', 'and', 'consequences', 'already', 'opens', 'this', 'possibility', 'and', 'even', 'beckons', 'it', '.', '(', 'Nevertheless', ',', 'acknowledging', 'it', 'as', 'specific', 'to', 'art', ',', 'and', 'thus', 'as', 'a', 'means', 'without', 'ends', ',', 'seems', 'to', 'better', 'protect', 'the', 'inner', 'nature', 'and', 'the', 'intact', 'potentiality', 'of', 'a', 'quality', 'that', 'does', 'not', 'make', 'itself', 'available', 'for', 'any', 'use', 'and', 'does', 'not', 'serve', 'any', 'agenda', ',', 'but', 'stays', 'autonomous', 'and', 'operates', 'by', 'creating', 'its', 'own', 'conditions', 'all', 'over', 'again', '.', 'Here', ',', 'spectators', 'are', 'invited', 'to', 'enter', 'the', 'work', '’', 's', 'fictional', 'world', 'carrying', 'with', 'themselves', 'the', 'so-called', 'real', 'world', 'and', 'all', 'their', 'other', 'fictional', 'worlds', ';', 'a', 'space', 'is', 'created', 'where', 'all', 'these', 'worlds', 'are', 'equally', 'welcomed', '.', 'Undecidability', 'could', 'then', 'be', 'detached', 'from', 'art', 'and', 'applied', 'to', 'curation', ',', 'instituting', 'processes', 'or', 'even', 'to', 'politics', 'at', 'large', ':', 'the', 'unfolding', 'of', 'its', 'resonances', 'and', 'consequences', 'already', 'opens', 'this', 'possibility', 'and', 'even', 'beckons', 'it', '.', '-Nevertheless', ',', 'acknowledging', 'it', 'as', 'specific', 'to', 'art', ',', 'and', 'thus', 'as', 'a', 'means', 'without', 'ends', ',', 'seems', 'to', 'better', 'protect', 'the', 'inner', 'nature', 'and', 'the', 'intact', 'potentiality', 'of', 'a', 'quality', 'that', 'does', 'not', 'make', 'itself', 'available', 'for', 'any', 'use', 'and', 'does', 'not', 'serve', 'any', 'agenda', ',', 'but', 'stays', 'autonomous', 'and', 'operates', 'by', 'creating', 'its', 'own', 'conditions', 'all', 'over', 'again', '.']\n"
     ]
    }
   ],
   "source": [
    "tokens = nltk.word_tokenize(full)\n",
    "print(tokens)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Part of Speech \"tags\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[('In', 'IN'), ('fact', 'NN'), (',', ','), ('if', 'IN'), ('something', 'NN'), ('is', 'VBZ'), ('possible', 'JJ'), ('when', 'WRB'), ('it', 'PRP'), ('contains', 'VBZ'), ('and', 'CC'), ('under', 'IN'), ('certain', 'JJ'), ('terms', 'NNS'), ('performs', 'VBP'), ('the', 'DT'), ('possibility', 'NN'), ('of', 'IN'), ('its', 'PRP$'), ('actualisation', 'NN'), (',', ','), ('a', 'DT'), ('world', 'NN'), ('is', 'VBZ'), ('potential', 'JJ'), ('when', 'WRB'), ('it', 'PRP'), ('can', 'MD'), ('maintain', 'VB'), ('its', 'PRP$'), ('potentiality', 'NN'), ('and', 'CC'), ('never', 'RB'), ('actualize', 'VB'), ('itself', 'PRP'), ('into', 'IN'), ('one', 'CD'), ('actual', 'JJ'), ('form', 'NN'), ('.', '.'), ('The', 'DT'), ('kind', 'NN'), ('of', 'IN'), ('collective', 'JJ'), ('body', 'NN'), ('that', 'WDT'), ('undecidability', 'JJ'), ('produces', 'NNS'), ('could', 'MD'), ('of', 'IN'), ('course', 'NN'), ('be', 'VB'), ('seen', 'VBN'), ('as', 'IN'), ('an', 'DT'), ('image', 'NN'), ('of', 'IN'), ('a', 'DT'), ('possible', 'JJ'), ('or', 'CC'), ('future', 'JJ'), ('societal', 'JJ'), ('structure', 'NN'), (',', ','), ('but', 'CC'), ('it', 'PRP'), ('is', 'VBZ'), ('rather', 'RB'), ('an', 'DT'), ('enigmatic', 'JJ'), ('subject', 'NN'), (':', ':'), ('it', 'PRP'), ('is', 'VBZ'), ('not', 'RB'), ('there', 'RB'), ('to', 'TO'), ('actualize', 'VB'), ('itself', 'PRP'), ('but', 'CC'), ('to', 'TO'), ('keep', 'VB'), ('being', 'VBG'), ('a', 'DT'), ('sheer', 'NN'), (',', ','), ('glimmering', 'VBG'), ('potentiality', 'NN'), ('.', '.'), ('If', 'IN'), ('the', 'DT'), ('coexistence', 'NN'), ('of', 'IN'), ('different', 'JJ'), ('media', 'NNS'), ('already', 'RB'), ('implies', 'VBZ'), ('different', 'JJ'), ('angles', 'NNS'), (',', ','), ('durations', 'NNS'), (',', ','), ('discourses', 'NNS'), (',', ','), ('and', 'CC'), ('forms', 'NNS'), ('of', 'IN'), ('spectatorship', 'NN'), (',', ','), ('the', 'DT'), ('performance', 'NN'), ('itself', 'PRP'), ('keeps', 'VBZ'), ('an', 'DT'), ('undecidable', 'JJ'), ('bound', 'NN'), ('between', 'IN'), ('its', 'PRP$'), ('real', 'JJ'), ('and', 'CC'), ('fictional', 'JJ'), ('ontologies', 'NNS'), ('.', '.'), ('Which', 'NNP'), ('is', 'VBZ'), ('to', 'TO'), ('say', 'VB'), (',', ','), ('if', 'IN'), ('it', 'PRP'), ('doesn', 'VBZ'), ('’', 'JJ'), ('t', 'NNS'), ('give', 'VBP'), ('up', 'RP'), ('on', 'IN'), ('involving', 'VBG'), ('radically', 'RB'), ('different', 'JJ'), ('realities', 'NNS'), ('into', 'IN'), ('its', 'PRP$'), ('operation', 'NN'), ('modes', 'NNS'), ('and', 'CC'), ('doesn', 'NN'), ('’', 'NNP'), ('t', 'NN'), ('fade', 'VBD'), ('out', 'RP'), ('from', 'IN'), ('the', 'DT'), ('scene', 'NN'), ('of', 'IN'), ('the', 'DT'), ('‘', 'NNP'), ('real', 'JJ'), ('’', 'JJ'), ('world', 'NN'), ('.', '.'), ('In', 'IN'), ('particular', 'JJ'), (',', ','), ('the', 'DT'), ('potentiality', 'NN'), ('generated', 'VBN'), ('by', 'IN'), ('undecidable', 'JJ'), ('artworks', 'NNS'), ('is', 'VBZ'), ('grounded', 'VBN'), ('in', 'IN'), ('a', 'DT'), ('logic', 'NN'), ('of', 'IN'), ('addition', 'NN'), ('and', 'CC'), ('contradiction', 'NN'), ('that', 'WDT'), ('is', 'VBZ'), ('specific', 'JJ'), ('of', 'IN'), ('art', 'NN'), ('.', '.'), ('If', 'IN'), ('the', 'DT'), ('coexistence', 'NN'), ('of', 'IN'), ('different', 'JJ'), ('media', 'NNS'), ('already', 'RB'), ('implies', 'VBZ'), ('different', 'JJ'), ('angles', 'NNS'), (',', ','), ('durations', 'NNS'), (',', ','), ('discourses', 'NNS'), (',', ','), ('and', 'CC'), ('forms', 'NNS'), ('of', 'IN'), ('spectatorship', 'NN'), (',', ','), ('the', 'DT'), ('performance', 'NN'), ('itself', 'PRP'), ('keeps', 'VBZ'), ('an', 'DT'), ('undecidable', 'JJ'), ('bound', 'NN'), ('between', 'IN'), ('its', 'PRP$'), ('real', 'JJ'), ('and', 'CC'), ('fictional', 'JJ'), ('ontologies', 'NNS'), ('.', '.'), ('In', 'IN'), ('fact', 'NN'), (',', ','), ('undecidability', 'NN'), ('is', 'VBZ'), ('a', 'DT'), ('specific', 'JJ'), ('force', 'NN'), ('at', 'IN'), ('work', 'NN'), ('that', 'WDT'), ('consciously', 'RB'), ('articulates', 'VBZ'), (',', ','), ('redefines', 'NNS'), (',', ','), ('or', 'CC'), ('alters', 'VBZ'), ('the', 'DT'), ('complex', 'JJ'), ('system', 'NN'), ('of', 'IN'), ('links', 'NNS'), (',', ','), ('bounds', 'NNS'), (',', ','), ('and', 'CC'), ('resonances', 'NNS'), ('between', 'IN'), ('different', 'JJ'), ('potential', 'NN'), ('and', 'CC'), ('actual', 'JJ'), ('worlds', 'NNS'), ('.', '.'), ('What', 'WP'), ('is', 'VBZ'), ('peculiar', 'JJ'), ('to', 'TO'), ('this', 'DT'), ('kind', 'NN'), ('of', 'IN'), ('artworks', 'NNS'), ('then', 'RB'), (',', ','), ('and', 'CC'), ('what', 'WP'), ('within', 'IN'), ('them', 'PRP'), ('can', 'MD'), ('produce', 'VB'), ('an', 'DT'), ('understanding', 'NN'), ('of', 'IN'), ('the', 'DT'), ('place', 'NN'), ('of', 'IN'), ('art', 'NN'), ('and', 'CC'), ('of', 'IN'), ('its', 'PRP$'), ('politics', 'NNS'), ('today', 'NN'), (',', ','), ('is', 'VBZ'), ('that', 'IN'), ('they', 'PRP'), ('generate', 'VBP'), ('a', 'DT'), ('multiplicity', 'NN'), ('of', 'IN'), ('gazes', 'NNS'), ('and', 'CC'), ('of', 'IN'), ('forms', 'NNS'), ('of', 'IN'), ('spectatorship', 'NN'), ('that', 'WDT'), ('also', 'RB'), ('coexist', 'VBP'), ('one', 'CD'), ('next', 'JJ'), ('to', 'TO'), ('the', 'DT'), ('other', 'JJ'), ('without', 'IN'), ('mediating', 'VBG'), ('between', 'IN'), ('their', 'PRP$'), ('own', 'JJ'), ('positions', 'NNS'), ('and', 'CC'), ('points', 'NNS'), ('of', 'IN'), ('view', 'NN'), ('.', '.'), ('We', 'PRP'), ('might', 'MD'), ('stretch', 'VB'), ('this', 'DT'), ('line', 'NN'), ('of', 'IN'), ('thought', 'NN'), ('a', 'DT'), ('bit', 'NN'), ('further', 'JJ'), ('and', 'CC'), ('propose', 'VB'), ('that', 'IN'), ('art', 'NN'), ('’', 'NNP'), ('s', 'NN'), ('potentiality', 'NN'), ('is', 'VBZ'), ('that', 'IN'), ('of', 'IN'), ('multiplying', 'VBG'), ('the', 'DT'), ('visible', 'JJ'), ('as', 'IN'), ('an', 'DT'), ('actual', 'JJ'), ('counterstrategy', 'NN'), ('to', 'TO'), ('the', 'DT'), ('proliferation', 'NN'), ('of', 'IN'), ('images', 'NNS'), ('that', 'WDT'), ('surrounds', 'VBZ'), ('us', 'PRP'), ('.', '.'), ('Undecidability', 'NN'), ('could', 'MD'), ('then', 'RB'), ('be', 'VB'), ('detached', 'VBN'), ('from', 'IN'), ('art', 'NN'), ('and', 'CC'), ('applied', 'VBN'), ('to', 'TO'), ('curation', 'NN'), (',', ','), ('instituting', 'VBG'), ('processes', 'NNS'), ('or', 'CC'), ('even', 'RB'), ('to', 'TO'), ('politics', 'NNS'), ('at', 'IN'), ('large', 'JJ'), (':', ':'), ('the', 'DT'), ('unfolding', 'NN'), ('of', 'IN'), ('its', 'PRP$'), ('resonances', 'NNS'), ('and', 'CC'), ('consequences', 'NNS'), ('already', 'RB'), ('opens', 'VBZ'), ('this', 'DT'), ('possibility', 'NN'), ('and', 'CC'), ('even', 'RB'), ('beckons', 'NNS'), ('it', 'PRP'), ('.', '.'), ('(', '('), ('Nevertheless', 'NNP'), (',', ','), ('acknowledging', 'VBG'), ('it', 'PRP'), ('as', 'IN'), ('specific', 'JJ'), ('to', 'TO'), ('art', 'VB'), (',', ','), ('and', 'CC'), ('thus', 'RB'), ('as', 'IN'), ('a', 'DT'), ('means', 'NN'), ('without', 'IN'), ('ends', 'NNS'), (',', ','), ('seems', 'VBZ'), ('to', 'TO'), ('better', 'RBR'), ('protect', 'VB'), ('the', 'DT'), ('inner', 'JJ'), ('nature', 'NN'), ('and', 'CC'), ('the', 'DT'), ('intact', 'JJ'), ('potentiality', 'NN'), ('of', 'IN'), ('a', 'DT'), ('quality', 'NN'), ('that', 'WDT'), ('does', 'VBZ'), ('not', 'RB'), ('make', 'VB'), ('itself', 'PRP'), ('available', 'JJ'), ('for', 'IN'), ('any', 'DT'), ('use', 'NN'), ('and', 'CC'), ('does', 'VBZ'), ('not', 'RB'), ('serve', 'VB'), ('any', 'DT'), ('agenda', 'NN'), (',', ','), ('but', 'CC'), ('stays', 'VBZ'), ('autonomous', 'JJ'), ('and', 'CC'), ('operates', 'VBZ'), ('by', 'IN'), ('creating', 'VBG'), ('its', 'PRP$'), ('own', 'JJ'), ('conditions', 'NNS'), ('all', 'DT'), ('over', 'RB'), ('again', 'RB'), ('.', '.'), ('Here', 'RB'), (',', ','), ('spectators', 'NNS'), ('are', 'VBP'), ('invited', 'VBN'), ('to', 'TO'), ('enter', 'VB'), ('the', 'DT'), ('work', 'NN'), ('’', 'NNP'), ('s', 'VBD'), ('fictional', 'JJ'), ('world', 'NN'), ('carrying', 'VBG'), ('with', 'IN'), ('themselves', 'PRP'), ('the', 'DT'), ('so-called', 'JJ'), ('real', 'JJ'), ('world', 'NN'), ('and', 'CC'), ('all', 'DT'), ('their', 'PRP$'), ('other', 'JJ'), ('fictional', 'JJ'), ('worlds', 'NNS'), (';', ':'), ('a', 'DT'), ('space', 'NN'), ('is', 'VBZ'), ('created', 'VBN'), ('where', 'WRB'), ('all', 'PDT'), ('these', 'DT'), ('worlds', 'NNS'), ('are', 'VBP'), ('equally', 'RB'), ('welcomed', 'VBN'), ('.', '.'), ('Undecidability', 'NN'), ('could', 'MD'), ('then', 'RB'), ('be', 'VB'), ('detached', 'VBN'), ('from', 'IN'), ('art', 'NN'), ('and', 'CC'), ('applied', 'VBN'), ('to', 'TO'), ('curation', 'NN'), (',', ','), ('instituting', 'VBG'), ('processes', 'NNS'), ('or', 'CC'), ('even', 'RB'), ('to', 'TO'), ('politics', 'NNS'), ('at', 'IN'), ('large', 'JJ'), (':', ':'), ('the', 'DT'), ('unfolding', 'NN'), ('of', 'IN'), ('its', 'PRP$'), ('resonances', 'NNS'), ('and', 'CC'), ('consequences', 'NNS'), ('already', 'RB'), ('opens', 'VBZ'), ('this', 'DT'), ('possibility', 'NN'), ('and', 'CC'), ('even', 'RB'), ('beckons', 'NNS'), ('it', 'PRP'), ('.', '.'), ('-Nevertheless', 'NN'), (',', ','), ('acknowledging', 'VBG'), ('it', 'PRP'), ('as', 'IN'), ('specific', 'JJ'), ('to', 'TO'), ('art', 'VB'), (',', ','), ('and', 'CC'), ('thus', 'RB'), ('as', 'IN'), ('a', 'DT'), ('means', 'NN'), ('without', 'IN'), ('ends', 'NNS'), (',', ','), ('seems', 'VBZ'), ('to', 'TO'), ('better', 'RBR'), ('protect', 'VB'), ('the', 'DT'), ('inner', 'JJ'), ('nature', 'NN'), ('and', 'CC'), ('the', 'DT'), ('intact', 'JJ'), ('potentiality', 'NN'), ('of', 'IN'), ('a', 'DT'), ('quality', 'NN'), ('that', 'WDT'), ('does', 'VBZ'), ('not', 'RB'), ('make', 'VB'), ('itself', 'PRP'), ('available', 'JJ'), ('for', 'IN'), ('any', 'DT'), ('use', 'NN'), ('and', 'CC'), ('does', 'VBZ'), ('not', 'RB'), ('serve', 'VB'), ('any', 'DT'), ('agenda', 'NN'), (',', ','), ('but', 'CC'), ('stays', 'VBZ'), ('autonomous', 'JJ'), ('and', 'CC'), ('operates', 'VBZ'), ('by', 'IN'), ('creating', 'VBG'), ('its', 'PRP$'), ('own', 'JJ'), ('conditions', 'NNS'), ('all', 'DT'), ('over', 'RB'), ('again', 'RB'), ('.', '.')]\n"
     ]
    }
   ],
   "source": [
    "tagged = nltk.pos_tag(tokens)\n",
    "print(tagged)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Now, you could select for example all the type of **verbs**:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['fact', 'something', 'terms', 'possibility', 'actualisation', 'world', 'potentiality', 'form', 'kind', 'body', 'produces', 'course', 'image', 'structure', 'subject', 'sheer', 'potentiality', 'coexistence', 'media', 'angles', 'durations', 'discourses', 'forms', 'spectatorship', 'performance', 'bound', 'ontologies', 'Which', 't', 'realities', 'operation', 'modes', 'doesn', '’', 't', 'scene', '‘', 'world', 'potentiality', 'artworks', 'logic', 'addition', 'contradiction', 'art', 'coexistence', 'media', 'angles', 'durations', 'discourses', 'forms', 'spectatorship', 'performance', 'bound', 'ontologies', 'fact', 'undecidability', 'force', 'work', 'redefines', 'system', 'links', 'bounds', 'resonances', 'potential', 'worlds', 'kind', 'artworks', 'understanding', 'place', 'art', 'politics', 'today', 'multiplicity', 'gazes', 'forms', 'spectatorship', 'positions', 'points', 'view', 'line', 'thought', 'bit', 'art', '’', 's', 'potentiality', 'counterstrategy', 'proliferation', 'images', 'Undecidability', 'art', 'curation', 'processes', 'politics', 'unfolding', 'resonances', 'consequences', 'possibility', 'beckons', 'Nevertheless', 'means', 'ends', 'nature', 'potentiality', 'quality', 'use', 'agenda', 'conditions', 'spectators', 'work', '’', 'world', 'world', 'worlds', 'space', 'worlds', 'Undecidability', 'art', 'curation', 'processes', 'politics', 'unfolding', 'resonances', 'consequences', 'possibility', 'beckons', '-Nevertheless', 'means', 'ends', 'nature', 'potentiality', 'quality', 'use', 'agenda', 'conditions']\n"
     ]
    }
   ],
   "source": [
    "selection = []\n",
    "\n",
    "for word, tag in tagged:\n",
    "    if 'NN' in tag:\n",
    "        selection.append(word)\n",
    "\n",
    "print(selection)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'today', 'proliferation', 'angles', 'art', 'worlds', 'counterstrategy', 'image', 'performance', 'consequences', 'sheer', 'addition', 'kind', 'means', 'work', 's', '’', '-Nevertheless', 'course', 'terms', 'line', 'spectators', 'spectatorship', 'world', 'thought', 'Undecidability', 'Which', 'images', 'bound', 'actualisation', 'something', 'forms', 'place', 'conditions', 'produces', 't', 'use', 'multiplicity', 'durations', 'points', 'undecidability', 'resonances', 'politics', 'realities', 'quality', 'discourses', 'system', 'operation', 'view', 'scene', 'agenda', 'body', 'modes', '‘', 'structure', 'force', 'potentiality', 'processes', 'redefines', 'ontologies', 'links', 'curation', 'beckons', 'possibility', 'ends', 'bounds', 'space', 'coexistence', 'media', 'potential', 'Nevertheless', 'form', 'nature', 'fact', 'understanding', 'positions', 'bit', 'artworks', 'doesn', 'logic', 'unfolding', 'contradiction', 'gazes', 'subject'}\n"
     ]
    }
   ],
   "source": [
    "# remove overlapped words, using set()\n",
    "selection = []\n",
    "\n",
    "for word, tag in tagged:\n",
    "    if 'NN' in tag:\n",
    "        selection.append(word)\n",
    "\n",
    "print(set(selection))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#gotta use stopwords.(), because i dont need unnecessary characters, such as , . 'etc."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 68,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['potentiality', 'consequences', 'images', 'world', 'art', 'gazes']\n"
     ]
    }
   ],
   "source": [
    "nnrando2 = random.choices(selection, k=6)\n",
    "print(nnrando2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 69,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "potentiality and consequences and images and world and art and gazes\n"
     ]
    }
   ],
   "source": [
    "nnand2 = \" and \".join(nnrando2)\n",
    "\n",
    "print(nnand2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 70,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['system', 'conditions', 'bounds', 'bound', 'redefines', 'force']\n"
     ]
    }
   ],
   "source": [
    "nnrando3 = random.choices(selection, k=6)\n",
    "print(nnrando3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 71,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "system and conditions and bounds and bound and redefines and force\n"
     ]
    }
   ],
   "source": [
    "nnand3 = \" and \".join(nnrando3)\n",
    "\n",
    "print(nnand3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 72,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['potentiality', 'fact', 'place', 'worlds', 'Nevertheless', 'conditions']\n"
     ]
    }
   ],
   "source": [
    "nnrando4 = random.choices(selection, k=6)\n",
    "print(nnrando4)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 73,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "potentiality and fact and place and worlds and Nevertheless and conditions\n"
     ]
    }
   ],
   "source": [
    "nnand4 = \" and \".join(nnrando4)\n",
    "\n",
    "print(nnand4)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 75,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['subject', 'forms', 'nature', 'modes', 'forms', 'gazes']\n"
     ]
    }
   ],
   "source": [
    "nnrando5 = random.choices(selection, k=6)\n",
    "print(nnrando5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 76,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "subject and forms and nature and modes and forms and gazes\n"
     ]
    }
   ],
   "source": [
    "nnand5 = \" and \".join(nnrando5)\n",
    "\n",
    "print(nnand5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['is', 'contains', 'performs', 'is', 'maintain', 'actualize', 'be', 'seen', 'is', 'is', 'actualize', 'keep', 'being', 'glimmering', 'implies', 'keeps', 'is', 'say', 'doesn', 'give', 'involving', 'fade', 'generated', 'is', 'grounded', 'is', 'implies', 'keeps', 'is', 'articulates', 'alters', 'is', 'produce', 'is', 'generate', 'coexist', 'mediating', 'stretch', 'propose', 'is', 'multiplying', 'surrounds', 'be', 'detached', 'applied', 'instituting', 'opens', 'acknowledging', 'art', 'seems', 'protect', 'does', 'make', 'does', 'serve', 'stays', 'operates', 'creating', 'are', 'invited', 'enter', 's', 'carrying', 'is', 'created', 'are', 'welcomed', 'be', 'detached', 'applied', 'instituting', 'opens', 'acknowledging', 'art', 'seems', 'protect', 'does', 'make', 'does', 'serve', 'stays', 'operates', 'creating']\n"
     ]
    }
   ],
   "source": [
    "selection = []\n",
    "\n",
    "for word, tag in tagged:\n",
    "    if 'VB' in tag:\n",
    "        selection.append(word)\n",
    "\n",
    "print(selection)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 77,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'actualize', 'keep', 'instituting', 'protect', 'operates', 'make', 'being', 'art', 'serve', 'detached', 'are', 'articulates', 'welcomed', 'maintain', 'contains', 'created', 'produce', 'performs', 'applied', 'does', 'surrounds', 'say', 'carrying', 'generate', 'coexist', 'generated', 'seen', 'be', 'stretch', 'multiplying', 'alters', 'enter', 'give', 'creating', 'is', 'glimmering', 's', 'seems', 'acknowledging', 'implies', 'invited', 'stays', 'mediating', 'propose', 'doesn', 'fade', 'grounded', 'involving', 'keeps', 'opens'}\n"
     ]
    }
   ],
   "source": [
    "selection = []\n",
    "\n",
    "for word, tag in tagged:\n",
    "    if 'VB' in tag:\n",
    "        selection.append(word)\n",
    "\n",
    "print(set(selection))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 78,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['alters', 'is', 'keep', 'is', 'detached', 'glimmering']\n"
     ]
    }
   ],
   "source": [
    "vbrando = random.choices(selection, k=6)\n",
    "print(vbrando)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 79,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "alters and is and keep and is and detached and glimmering\n"
     ]
    }
   ],
   "source": [
    "vband = \" and \".join(vbrando)\n",
    "\n",
    "print(vband)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 81,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['detached', 'invited', 'seen', 'is', 'is', 'does']\n"
     ]
    }
   ],
   "source": [
    "vbrando2 = random.choices(selection, k=6)\n",
    "print(vbrando2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 82,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "detached and invited and seen and is and is and does\n"
     ]
    }
   ],
   "source": [
    "vband2 = \" and \".join(vbrando2)\n",
    "\n",
    "print(vband2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 83,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['glimmering', 'seems', 'seen', 'make', 'invited', 'operates']\n"
     ]
    }
   ],
   "source": [
    "vbrando3 = random.choices(selection, k=6)\n",
    "print(vbrando3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 84,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "glimmering and seems and seen and make and invited and operates\n"
     ]
    }
   ],
   "source": [
    "vband3 = \" and \".join(vbrando3)\n",
    "\n",
    "print(vband3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['possible', 'certain', 'potential', 'actual', 'collective', 'undecidability', 'possible', 'future', 'societal', 'enigmatic', 'different', 'different', 'undecidable', 'real', 'fictional', '’', 'different', 'real', '’', 'particular', 'undecidable', 'specific', 'different', 'different', 'undecidable', 'real', 'fictional', 'specific', 'complex', 'different', 'actual', 'peculiar', 'next', 'other', 'own', 'further', 'visible', 'actual', 'large', 'specific', 'inner', 'intact', 'available', 'autonomous', 'own', 'fictional', 'so-called', 'real', 'other', 'fictional', 'large', 'specific', 'inner', 'intact', 'available', 'autonomous', 'own']\n"
     ]
    }
   ],
   "source": [
    "selection = []\n",
    "\n",
    "for word, tag in tagged:\n",
    "    if 'JJ' in tag:\n",
    "        selection.append(word)\n",
    "\n",
    "print(selection)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 85,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'next', 'undecidable', 'different', 'large', 'undecidability', 'societal', 'complex', 'enigmatic', 'autonomous', 'fictional', 'potential', 'available', 'further', 'actual', 'inner', 'future', 'intact', 'own', '’', 'so-called', 'real', 'certain', 'particular', 'specific', 'other', 'visible', 'collective', 'peculiar', 'possible'}\n"
     ]
    }
   ],
   "source": [
    "selection = []\n",
    "\n",
    "for word, tag in tagged:\n",
    "    if 'JJ' in tag:\n",
    "        selection.append(word)\n",
    "\n",
    "print(set(selection))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 91,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['possible', 'real', 'undecidability', 'collective', 'actual', 'other']\n"
     ]
    }
   ],
   "source": [
    "jjrando = random.choices(selection, k=6)\n",
    "print(jjrando)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 92,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "possible and real and undecidability and collective and actual and other\n"
     ]
    }
   ],
   "source": [
    "jjand = \" and \".join(jjrando)\n",
    "\n",
    "print(jjand)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 88,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['particular', 'real', 'inner', 'different', 'autonomous', 'specific']\n"
     ]
    }
   ],
   "source": [
    "jjrando2 = random.choices(selection, k=6)\n",
    "print(jjrando2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 90,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "particular and real and inner and different and autonomous and specific\n"
     ]
    }
   ],
   "source": [
    "jjand2 = \" and \".join(jjrando2)\n",
    "\n",
    "print(jjand2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 93,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['specific', 'future', 'actual', 'next', 'own', 'own']\n"
     ]
    }
   ],
   "source": [
    "jjrando3 = random.choices(selection, k=6)\n",
    "print(jjrando3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 94,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "specific and future and actual and next and own and own\n"
     ]
    }
   ],
   "source": [
    "jjand3 = \" and \".join(jjrando3)\n",
    "\n",
    "print(jjand3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['the', 'a', 'The', 'that', 'an', 'a', 'an', 'a', 'the', 'the', 'an', 'the', 'the', 'the', 'a', 'that', 'the', 'the', 'an', 'a', 'that', 'the', 'this', 'an', 'the', 'a', 'that', 'the', 'this', 'a', 'the', 'an', 'the', 'that', 'the', 'this', 'a', 'the', 'the', 'a', 'that', 'any', 'any', 'all', 'the', 'the', 'all', 'a', 'all', 'these', 'the', 'this', 'a', 'the', 'the', 'a', 'that', 'any', 'any', 'all']\n"
     ]
    }
   ],
   "source": [
    "selection = []\n",
    "\n",
    "for word, tag in tagged:\n",
    "    if 'DT' in tag:\n",
    "        selection.append(word)\n",
    "\n",
    "print(selection)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['and', 'and', 'or', 'but', 'but', 'and', 'and', 'and', 'and', 'and', 'and', 'or', 'and', 'and', 'and', 'and', 'and', 'and', 'and', 'and', 'or', 'and', 'and', 'and', 'and', 'and', 'but', 'and', 'and', 'and', 'or', 'and', 'and', 'and', 'and', 'and', 'but', 'and']\n"
     ]
    }
   ],
   "source": [
    "selection = []\n",
    "\n",
    "for word, tag in tagged:\n",
    "    if 'CC' in tag:\n",
    "        selection.append(word)\n",
    "\n",
    "print(selection)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['one', 'one']\n"
     ]
    }
   ],
   "source": [
    "selection = []\n",
    "\n",
    "for word, tag in tagged:\n",
    "    if 'CD' in tag:\n",
    "        selection.append(word)\n",
    "\n",
    "print(selection)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['In', 'if', 'under', 'of', 'into', 'of', 'of', 'as', 'of', 'If', 'of', 'of', 'between', 'if', 'on', 'into', 'from', 'of', 'In', 'by', 'in', 'of', 'of', 'If', 'of', 'of', 'between', 'In', 'at', 'of', 'between', 'of', 'within', 'of', 'of', 'of', 'that', 'of', 'of', 'of', 'without', 'between', 'of', 'of', 'that', 'that', 'of', 'as', 'of', 'from', 'at', 'of', 'as', 'as', 'without', 'of', 'for', 'by', 'with', 'from', 'at', 'of', 'as', 'as', 'without', 'of', 'for', 'by']\n"
     ]
    }
   ],
   "source": [
    "selection = []\n",
    "\n",
    "for word, tag in tagged:\n",
    "    if 'IN' in tag:\n",
    "        selection.append(word)\n",
    "\n",
    "print(selection)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['when', 'when', 'never', 'rather', 'not', 'there', 'already', 'radically', 'already', 'consciously', 'then', 'also', 'then', 'even', 'already', 'even', 'thus', 'better', 'not', 'not', 'over', 'again', 'Here', 'where', 'equally', 'then', 'even', 'already', 'even', 'thus', 'better', 'not', 'not', 'over', 'again']\n"
     ]
    }
   ],
   "source": [
    "selection = []\n",
    "\n",
    "for word, tag in tagged:\n",
    "    if 'RB' in tag:\n",
    "        selection.append(word)\n",
    "\n",
    "print(selection)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['better', 'better']\n"
     ]
    }
   ],
   "source": [
    "selection = []\n",
    "\n",
    "for word, tag in tagged:\n",
    "    if 'RBR' in tag:\n",
    "        selection.append(word)\n",
    "\n",
    "print(selection)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[]\n"
     ]
    }
   ],
   "source": [
    "selection = []\n",
    "\n",
    "for word, tag in tagged:\n",
    "    if 'RBS' in tag:\n",
    "        selection.append(word)\n",
    "\n",
    "print(selection)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Where do these tags come from?"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "> An off-the-shelf tagger is available for English. It uses the Penn Treebank tagset.\n",
    "\n",
    "From: http://www.nltk.org/api/nltk.tag.html#module-nltk.tag"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "> NLTK provides documentation for each tag, which can be queried using the tag, e.g. nltk.help.upenn_tagset('RB').\n",
    "\n",
    "From: http://www.nltk.org/book_1ed/ch05.html"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {},
   "outputs": [
    {
     "ename": "LookupError",
     "evalue": "\n**********************************************************************\n  Resource \u001b[93mtagsets\u001b[0m not found.\n  Please use the NLTK Downloader to obtain the resource:\n\n  \u001b[31m>>> import nltk\n  >>> nltk.download('tagsets')\n  \u001b[0m\n  For more information see: https://www.nltk.org/data.html\n\n  Attempted to load \u001b[93mhelp/tagsets/PY3/upenn_tagset.pickle\u001b[0m\n\n  Searched in:\n    - '/home/namikim/nltk_data'\n    - '/usr/nltk_data'\n    - '/usr/share/nltk_data'\n    - '/usr/lib/nltk_data'\n    - '/usr/share/nltk_data'\n    - '/usr/local/share/nltk_data'\n    - '/usr/lib/nltk_data'\n    - '/usr/local/lib/nltk_data'\n    - ''\n**********************************************************************\n",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mLookupError\u001b[0m                               Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-47-3ec8764fce3e>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mnltk\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhelp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mupenn_tagset\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'RB'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[0;32m/usr/local/lib/python3.7/dist-packages/nltk/help.py\u001b[0m in \u001b[0;36mupenn_tagset\u001b[0;34m(tagpattern)\u001b[0m\n\u001b[1;32m     25\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     26\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mupenn_tagset\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtagpattern\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 27\u001b[0;31m     \u001b[0m_format_tagset\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"upenn_tagset\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtagpattern\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     28\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     29\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/usr/local/lib/python3.7/dist-packages/nltk/help.py\u001b[0m in \u001b[0;36m_format_tagset\u001b[0;34m(tagset, tagpattern)\u001b[0m\n\u001b[1;32m     44\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     45\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_format_tagset\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtagset\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtagpattern\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 46\u001b[0;31m     \u001b[0mtagdict\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mload\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"help/tagsets/\"\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mtagset\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m\".pickle\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     47\u001b[0m     \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mtagpattern\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     48\u001b[0m         \u001b[0m_print_entries\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msorted\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtagdict\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtagdict\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/usr/local/lib/python3.7/dist-packages/nltk/data.py\u001b[0m in \u001b[0;36mload\u001b[0;34m(resource_url, format, cache, verbose, logic_parser, fstruct_reader, encoding)\u001b[0m\n\u001b[1;32m    750\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    751\u001b[0m     \u001b[0;31m# Load the resource.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 752\u001b[0;31m     \u001b[0mopened_resource\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_open\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mresource_url\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    753\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    754\u001b[0m     \u001b[0;32mif\u001b[0m \u001b[0mformat\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m\"raw\"\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/usr/local/lib/python3.7/dist-packages/nltk/data.py\u001b[0m in \u001b[0;36m_open\u001b[0;34m(resource_url)\u001b[0m\n\u001b[1;32m    875\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    876\u001b[0m     \u001b[0;32mif\u001b[0m \u001b[0mprotocol\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0mprotocol\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlower\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m\"nltk\"\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 877\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0mfind\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpath_\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpath\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m\"\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mopen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    878\u001b[0m     \u001b[0;32melif\u001b[0m \u001b[0mprotocol\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlower\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m\"file\"\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    879\u001b[0m         \u001b[0;31m# urllib might not use mode='rb', so handle this one ourselves:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/usr/local/lib/python3.7/dist-packages/nltk/data.py\u001b[0m in \u001b[0;36mfind\u001b[0;34m(resource_name, paths)\u001b[0m\n\u001b[1;32m    583\u001b[0m     \u001b[0msep\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m\"*\"\u001b[0m \u001b[0;34m*\u001b[0m \u001b[0;36m70\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    584\u001b[0m     \u001b[0mresource_not_found\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m\"\\n%s\\n%s\\n%s\\n\"\u001b[0m \u001b[0;34m%\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0msep\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmsg\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msep\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 585\u001b[0;31m     \u001b[0;32mraise\u001b[0m \u001b[0mLookupError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mresource_not_found\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    586\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    587\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mLookupError\u001b[0m: \n**********************************************************************\n  Resource \u001b[93mtagsets\u001b[0m not found.\n  Please use the NLTK Downloader to obtain the resource:\n\n  \u001b[31m>>> import nltk\n  >>> nltk.download('tagsets')\n  \u001b[0m\n  For more information see: https://www.nltk.org/data.html\n\n  Attempted to load \u001b[93mhelp/tagsets/PY3/upenn_tagset.pickle\u001b[0m\n\n  Searched in:\n    - '/home/namikim/nltk_data'\n    - '/usr/nltk_data'\n    - '/usr/share/nltk_data'\n    - '/usr/lib/nltk_data'\n    - '/usr/share/nltk_data'\n    - '/usr/local/share/nltk_data'\n    - '/usr/lib/nltk_data'\n    - '/usr/local/lib/nltk_data'\n    - ''\n**********************************************************************\n"
     ]
    }
   ],
   "source": [
    "nltk.help.upenn_tagset('RB')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "------------"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "An alphabetical list of part-of-speech tags used in the Penn Treebank Project ([link](https://www.ling.upenn.edu/courses/Fall_2003/ling001/penn_treebank_pos.html)):\n",
    "\n",
    "<table cellspacing=\"2\" cellpadding=\"2\" border=\"0\">\n",
    "  <tbody><tr bgcolor=\"#DFDFFF\" align=\"none\"> \n",
    "    <td align=\"none\"> \n",
    "      <div align=\"left\">Number</div>\n",
    "    </td>\n",
    "    <td> \n",
    "      <div align=\"left\">Tag</div>\n",
    "    </td>\n",
    "    <td> \n",
    "      <div align=\"left\">Description</div>\n",
    "    </td>\n",
    "  </tr>\n",
    "  <tr bgcolor=\"#FFFFCA\"> \n",
    "    <td align=\"none\"> 1. </td>\n",
    "    <td>CC </td>\n",
    "    <td>Coordinating conjunction </td>\n",
    "  </tr>\n",
    "  <tr bgcolor=\"#FFFFCA\"> \n",
    "    <td align=\"none\"> 2. </td>\n",
    "    <td>CD </td>\n",
    "    <td>Cardinal number </td>\n",
    "  </tr>\n",
    "  <tr bgcolor=\"#FFFFCA\"> \n",
    "    <td align=\"none\"> 3. </td>\n",
    "    <td>DT </td>\n",
    "    <td>Determiner </td>\n",
    "  </tr>\n",
    "  <tr bgcolor=\"#FFFFCA\"> \n",
    "    <td align=\"none\"> 4. </td>\n",
    "    <td>EX </td>\n",
    "    <td>Existential <i>there<i> </i></i></td>\n",
    "  </tr>\n",
    "  <tr bgcolor=\"#FFFFCA\"> \n",
    "    <td align=\"none\"> 5. </td>\n",
    "    <td>FW </td>\n",
    "    <td>Foreign word </td>\n",
    "  </tr>\n",
    "  <tr bgcolor=\"#FFFFCA\"> \n",
    "    <td align=\"none\"> 6. </td>\n",
    "    <td>IN </td>\n",
    "    <td>Preposition or subordinating conjunction </td>\n",
    "  </tr>\n",
    "  <tr bgcolor=\"#FFFFCA\"> \n",
    "    <td align=\"none\"> 7. </td>\n",
    "    <td>JJ </td>\n",
    "    <td>Adjective </td>\n",
    "  </tr>\n",
    "  <tr bgcolor=\"#FFFFCA\"> \n",
    "    <td align=\"none\"> 8. </td>\n",
    "    <td>JJR </td>\n",
    "    <td>Adjective, comparative </td>\n",
    "  </tr>\n",
    "  <tr bgcolor=\"#FFFFCA\"> \n",
    "    <td align=\"none\"> 9. </td>\n",
    "    <td>JJS </td>\n",
    "    <td>Adjective, superlative </td>\n",
    "  </tr>\n",
    "  <tr bgcolor=\"#FFFFCA\"> \n",
    "    <td align=\"none\"> 10. </td>\n",
    "    <td>LS </td>\n",
    "    <td>List item marker </td>\n",
    "  </tr>\n",
    "  <tr bgcolor=\"#FFFFCA\"> \n",
    "    <td align=\"none\"> 11. </td>\n",
    "    <td>MD </td>\n",
    "    <td>Modal </td>\n",
    "  </tr>\n",
    "  <tr bgcolor=\"#FFFFCA\"> \n",
    "    <td align=\"none\"> 12. </td>\n",
    "    <td>NN </td>\n",
    "    <td>Noun, singular or mass </td>\n",
    "  </tr>\n",
    "  <tr bgcolor=\"#FFFFCA\"> \n",
    "    <td align=\"none\"> 13. </td>\n",
    "    <td>NNS </td>\n",
    "    <td>Noun, plural </td>\n",
    "  </tr>\n",
    "  <tr bgcolor=\"#FFFFCA\"> \n",
    "    <td align=\"none\"> 14. </td>\n",
    "    <td>NNP </td>\n",
    "    <td>Proper noun, singular </td>\n",
    "  </tr>\n",
    "  <tr bgcolor=\"#FFFFCA\"> \n",
    "    <td align=\"none\"> 15. </td>\n",
    "    <td>NNPS </td>\n",
    "    <td>Proper noun, plural </td>\n",
    "  </tr>\n",
    "  <tr bgcolor=\"#FFFFCA\"> \n",
    "    <td align=\"none\"> 16. </td>\n",
    "    <td>PDT </td>\n",
    "    <td>Predeterminer </td>\n",
    "  </tr>\n",
    "  <tr bgcolor=\"#FFFFCA\"> \n",
    "    <td align=\"none\"> 17. </td>\n",
    "    <td>POS </td>\n",
    "    <td>Possessive ending </td>\n",
    "  </tr>\n",
    "  <tr bgcolor=\"#FFFFCA\"> \n",
    "    <td align=\"none\"> 18. </td>\n",
    "    <td>PRP </td>\n",
    "    <td>Personal pronoun </td>\n",
    "  </tr>\n",
    "  <tr bgcolor=\"#FFFFCA\"> \n",
    "    <td align=\"none\"> 19. </td>\n",
    "    <td>PRP\\$ </td>\n",
    "    <td>Possessive pronoun </td>\n",
    "  </tr>\n",
    "  <tr bgcolor=\"#FFFFCA\"> \n",
    "    <td align=\"none\"> 20. </td>\n",
    "    <td>RB </td>\n",
    "    <td>Adverb </td>\n",
    "  </tr>\n",
    "  <tr bgcolor=\"#FFFFCA\"> \n",
    "    <td align=\"none\"> 21. </td>\n",
    "    <td>RBR </td>\n",
    "    <td>Adverb, comparative </td>\n",
    "  </tr>\n",
    "  <tr bgcolor=\"#FFFFCA\"> \n",
    "    <td align=\"none\"> 22. </td>\n",
    "    <td>RBS </td>\n",
    "    <td>Adverb, superlative </td>\n",
    "  </tr>\n",
    "  <tr bgcolor=\"#FFFFCA\"> \n",
    "    <td align=\"none\"> 23. </td>\n",
    "    <td>RP </td>\n",
    "    <td>Particle </td>\n",
    "  </tr>\n",
    "  <tr bgcolor=\"#FFFFCA\"> \n",
    "    <td align=\"none\"> 24. </td>\n",
    "    <td>SYM </td>\n",
    "    <td>Symbol </td>\n",
    "  </tr>\n",
    "  <tr bgcolor=\"#FFFFCA\"> \n",
    "    <td align=\"none\"> 25. </td>\n",
    "    <td>TO </td>\n",
    "    <td><i>to</i> </td>\n",
    "  </tr>\n",
    "  <tr bgcolor=\"#FFFFCA\"> \n",
    "    <td align=\"none\"> 26. </td>\n",
    "    <td>UH </td>\n",
    "    <td>Interjection </td>\n",
    "  </tr>\n",
    "  <tr bgcolor=\"#FFFFCA\"> \n",
    "    <td align=\"none\"> 27. </td>\n",
    "    <td>VB </td>\n",
    "    <td>Verb, base form </td>\n",
    "  </tr>\n",
    "  <tr bgcolor=\"#FFFFCA\"> \n",
    "    <td align=\"none\"> 28. </td>\n",
    "    <td>VBD </td>\n",
    "    <td>Verb, past tense </td>\n",
    "  </tr>\n",
    "  <tr bgcolor=\"#FFFFCA\"> \n",
    "    <td align=\"none\"> 29. </td>\n",
    "    <td>VBG </td>\n",
    "    <td>Verb, gerund or present participle </td>\n",
    "  </tr>\n",
    "  <tr bgcolor=\"#FFFFCA\"> \n",
    "    <td align=\"none\"> 30. </td>\n",
    "    <td>VBN </td>\n",
    "    <td>Verb, past participle </td>\n",
    "  </tr>\n",
    "  <tr bgcolor=\"#FFFFCA\"> \n",
    "    <td align=\"none\"> 31. </td>\n",
    "    <td>VBP </td>\n",
    "    <td>Verb, non-3rd person singular present </td>\n",
    "  </tr>\n",
    "  <tr bgcolor=\"#FFFFCA\"> \n",
    "    <td align=\"none\"> 32. </td>\n",
    "    <td>VBZ </td>\n",
    "    <td>Verb, 3rd person singular present </td>\n",
    "  </tr>\n",
    "  <tr bgcolor=\"#FFFFCA\"> \n",
    "    <td align=\"none\"> 33. </td>\n",
    "    <td>WDT </td>\n",
    "    <td>Wh-determiner </td>\n",
    "  </tr>\n",
    "  <tr bgcolor=\"#FFFFCA\"> \n",
    "    <td align=\"none\"> 34. </td>\n",
    "    <td>WP </td>\n",
    "    <td>Wh-pronoun </td>\n",
    "  </tr>\n",
    "  <tr bgcolor=\"#FFFFCA\"> \n",
    "    <td align=\"none\"> 35. </td>\n",
    "    <td>WP$ </td>\n",
    "    <td>Possessive wh-pronoun </td>\n",
    "  </tr>\n",
    "  <tr bgcolor=\"#FFFFCA\"> \n",
    "    <td align=\"none\"> 36. </td>\n",
    "    <td>WRB </td>\n",
    "    <td>Wh-adverb \n",
    "</td></tr></tbody></table>"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## A telling/tricky case\n",
    "It's important to realize that POS tagging is not a fixed property of a word -- but depends on the context of each word. The NLTK book gives an example of [homonyms](http://www.nltk.org/book_1ed/ch05.html#using-a-tagger) -- words that are written the same, but are actually pronounced differently and have different meanings depending on their use."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "text = nltk.word_tokenize(\"They refuse to permit us to obtain the refuse permit\")\n",
    "nltk.pos_tag(text)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "From the book:\n",
    "\n",
    "> Notice that refuse and permit both appear as a present tense verb (VBP) and a noun (NN). E.g. refUSE is a verb meaning \"deny,\" while REFuse is a noun meaning \"trash\" (i.e. they are not homophones). Thus, we need to know which word is being used in order to pronounce the text correctly. (For this reason, text-to-speech systems usually perform POS-tagging.)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Applying to an entire text"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "language = open('../txt/language.txt').read()\n",
    "tokens = nltk.word_tokenize(language)\n",
    "tagged = nltk.pos_tag(tokens)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "tagged"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "words = \"in the beginning was heaven and earth and the time of the whatever\".split()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "words"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "words.index(\"the\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "for i, word in enumerate(words):\n",
    "    if word == \"the\":\n",
    "        print (i, word)\n",
    "    else:\n",
    "        print (word.upper())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import random \n",
    "\n",
    "words = {}\n",
    "words[\"VB\"] = []\n",
    "\n",
    "for word in nltk.word_tokenize(\"in the beginning was heaven and earth and the time of the whatever\"):\n",
    "    words[\"VB\"].append(word)\n",
    "    \n",
    "random.choice(words[\"VB\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}