{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "from pattern.search import STRICT, search\n", "from pattern.en import parsetree" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "https://github.com/clips/pattern/wiki/pattern-search\n", "( inspired by [videogrep](https://github.com/antiboredom/videogrep/blob/master/videogrep/searcher.py) search )" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "text = open(\"../txt/words-for-the-future/OTHERNESS.txt\").read()" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'Otherness | Daniel L. Everett\\n\\nWhen I was 26, I moved to the Amazon, from California, in order to st'" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "text[:100]" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": true, "jupyter": { "outputs_hidden": true } }, "outputs": [ { "ename": "RuntimeError", "evalue": "generator raised StopIteration", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mStopIteration\u001b[0m Traceback (most recent call last)", "\u001b[0;32m~/.local/lib/python3.7/site-packages/pattern/text/__init__.py\u001b[0m in \u001b[0;36m_read\u001b[0;34m(path, encoding, comment)\u001b[0m\n\u001b[1;32m 608\u001b[0m \u001b[0;32myield\u001b[0m \u001b[0mline\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 609\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mStopIteration\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 610\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mStopIteration\u001b[0m: ", "\nThe above exception was the direct cause of the following exception:\n", "\u001b[0;31mRuntimeError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mtree\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mparsetree\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtext\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[0;32m~/.local/lib/python3.7/site-packages/pattern/text/en/__init__.py\u001b[0m in \u001b[0;36mparsetree\u001b[0;34m(s, *args, **kwargs)\u001b[0m\n\u001b[1;32m 173\u001b[0m \"\"\" Returns a parsed Text from the given string.\n\u001b[1;32m 174\u001b[0m \"\"\"\n\u001b[0;32m--> 175\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mText\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mparse\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ms\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 176\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 177\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/.local/lib/python3.7/site-packages/pattern/text/en/__init__.py\u001b[0m in \u001b[0;36mparse\u001b[0;34m(s, *args, **kwargs)\u001b[0m\n\u001b[1;32m 167\u001b[0m \"\"\" Returns a tagged Unicode string.\n\u001b[1;32m 168\u001b[0m \"\"\"\n\u001b[0;32m--> 169\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mparser\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mparse\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ms\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 170\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 171\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/.local/lib/python3.7/site-packages/pattern/text/__init__.py\u001b[0m in \u001b[0;36mparse\u001b[0;34m(self, s, tokenize, tags, chunks, relations, lemmata, encoding, **kwargs)\u001b[0m\n\u001b[1;32m 1170\u001b[0m \u001b[0;31m# Tagger (required by chunker, labeler & lemmatizer).\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1171\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mtags\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0mchunks\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0mrelations\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0mlemmata\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1172\u001b[0;31m \u001b[0ms\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfind_tags\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ms\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1173\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1174\u001b[0m \u001b[0ms\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mw\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mw\u001b[0m \u001b[0;32min\u001b[0m \u001b[0ms\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/.local/lib/python3.7/site-packages/pattern/text/en/__init__.py\u001b[0m in \u001b[0;36mfind_tags\u001b[0;34m(self, tokens, **kwargs)\u001b[0m\n\u001b[1;32m 112\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"tagset\"\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0mUNIVERSAL\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 113\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msetdefault\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"map\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;32mlambda\u001b[0m \u001b[0mtoken\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtag\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mpenntreebank2universal\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtoken\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtag\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 114\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0m_Parser\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfind_tags\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtokens\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 115\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 116\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/.local/lib/python3.7/site-packages/pattern/text/__init__.py\u001b[0m in \u001b[0;36mfind_tags\u001b[0;34m(self, tokens, **kwargs)\u001b[0m\n\u001b[1;32m 1111\u001b[0m \u001b[0;31m# [\"The\", \"cat\", \"purs\"] => [[\"The\", \"DT\"], [\"cat\", \"NN\"], [\"purs\", \"VB\"]]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1112\u001b[0m return find_tags(tokens,\n\u001b[0;32m-> 1113\u001b[0;31m \u001b[0mlexicon\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"lexicon\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlexicon\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0;34m{\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1114\u001b[0m \u001b[0mmodel\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"model\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmodel\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1115\u001b[0m \u001b[0mmorphology\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"morphology\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmorphology\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/.local/lib/python3.7/site-packages/pattern/text/__init__.py\u001b[0m in \u001b[0;36m__len__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 374\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 375\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__len__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 376\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_lazy\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"__len__\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 377\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 378\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__iter__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/.local/lib/python3.7/site-packages/pattern/text/__init__.py\u001b[0m in \u001b[0;36m_lazy\u001b[0;34m(self, method, *args)\u001b[0m\n\u001b[1;32m 366\u001b[0m \"\"\"\n\u001b[1;32m 367\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mdict\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__len__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 368\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mload\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 369\u001b[0m \u001b[0msetattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmethod\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtypes\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mMethodType\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mgetattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdict\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmethod\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 370\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mgetattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdict\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmethod\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/.local/lib/python3.7/site-packages/pattern/text/__init__.py\u001b[0m in \u001b[0;36mload\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 623\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mload\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 624\u001b[0m \u001b[0;31m# Arnold NNP x\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 625\u001b[0;31m \u001b[0mdict\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mupdate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msplit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\" \"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mx\u001b[0m \u001b[0;32min\u001b[0m \u001b[0m_read\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_path\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msplit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\" \"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 626\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 627\u001b[0m \u001b[0;31m#--- FREQUENCY -------------------------------------------------------------------------------------\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/.local/lib/python3.7/site-packages/pattern/text/__init__.py\u001b[0m in \u001b[0;36m\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m 623\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mload\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 624\u001b[0m \u001b[0;31m# Arnold NNP x\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 625\u001b[0;31m \u001b[0mdict\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mupdate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msplit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\" \"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mx\u001b[0m \u001b[0;32min\u001b[0m \u001b[0m_read\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_path\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msplit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\" \"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 626\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 627\u001b[0m \u001b[0;31m#--- FREQUENCY -------------------------------------------------------------------------------------\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mRuntimeError\u001b[0m: generator raised StopIteration" ] } ], "source": [ "tree = parsetree(text)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "ename": "NameError", "evalue": "name 'tree' is not defined", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mtree\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[0;31mNameError\u001b[0m: name 'tree' is not defined" ] } ], "source": [ "tree" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Sentence(\"This/DT/O/O encounter/RB/B-ADVP/O with/IN/B-PP/O these/DT/O/O ‘/''/O/O others/NNS/B-NP/O ,/,/O/O ’/''/O/O so/RB/B-ADVP/O unlike/IN/B-PP/B-PNP myself/PRP/B-NP/I-PNP ,/,/O/O was/VBD/B-VP/O to/TO/I-VP/O be/VB/I-VP/O the/DT/O/O defining/VBG/B-VP/O experience/NN/B-NP/O for/IN/B-PP/B-PNP the/DT/B-NP/I-PNP rest/NN/I-NP/I-PNP of/IN/B-PP/B-PNP my/PRP$/B-NP/I-PNP life/NN/I-NP/I-PNP ././O/O\")" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "tree[7]" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "collapsed": true, "jupyter": { "outputs_hidden": true } }, "outputs": [ { "data": { "text/plain": [ "[Match(words=[Word('unrelated/JJ')]),\n", " Match(words=[Word('other/JJ')]),\n", " Match(words=[Word('small/JJ')]),\n", " Match(words=[Word('missionary/JJ')]),\n", " Match(words=[Word('bumpy/JJ')]),\n", " Match(words=[Word('first/JJ')]),\n", " Match(words=[Word('weak/JJ')]),\n", " Match(words=[Word('taut/JJ')]),\n", " Match(words=[Word('unrelated/JJ')]),\n", " Match(words=[Word('other/JJ')]),\n", " Match(words=[Word('many/JJ')]),\n", " Match(words=[Word('little/JJ')]),\n", " Match(words=[Word('easy/JJ')]),\n", " Match(words=[Word('enough/JJ')]),\n", " Match(words=[Word('other/JJ')]),\n", " Match(words=[Word('uncomfortable/JJ')]),\n", " Match(words=[Word('suspicious/JJ')]),\n", " Match(words=[Word('new/JJ')]),\n", " Match(words=[Word('different/JJ')]),\n", " Match(words=[Word('simple/JJ')]),\n", " Match(words=[Word('binary/JJ')]),\n", " Match(words=[Word('old/JJ')]),\n", " Match(words=[Word('religious/JJ')]),\n", " Match(words=[Word('other/JJ')]),\n", " Match(words=[Word('different/JJ')]),\n", " Match(words=[Word('unintelligible/JJ')]),\n", " Match(words=[Word('different-looking/JJ')]),\n", " Match(words=[Word('otherness/JJ')]),\n", " Match(words=[Word('live/JJ')]),\n", " Match(words=[Word('unacceptable/JJ')]),\n", " Match(words=[Word('missionary/JJ')]),\n", " Match(words=[Word('eternal/JJ')]),\n", " Match(words=[Word('encounter/JJ')]),\n", " Match(words=[Word('uneasy/JJ')]),\n", " Match(words=[Word('dangerous/JJ')]),\n", " Match(words=[Word('insufficient/JJ')]),\n", " Match(words=[Word('ethno-centric/JJ')]),\n", " Match(words=[Word('own/JJ')]),\n", " Match(words=[Word('fortunate/JJ')]),\n", " Match(words=[Word('gentle/JJ')]),\n", " Match(words=[Word('many/JJ')]),\n", " Match(words=[Word('silly/JJ')]),\n", " Match(words=[Word('years-long/JJ')]),\n", " Match(words=[Word('first/JJ')]),\n", " Match(words=[Word('first/JJ')]),\n", " Match(words=[Word('young/JJ')]),\n", " Match(words=[Word('large/JJ')]),\n", " Match(words=[Word('small/JJ')]),\n", " Match(words=[Word('fresh/JJ')]),\n", " Match(words=[Word('young/JJ')]),\n", " Match(words=[Word('then-unintelligible/JJ')]),\n", " Match(words=[Word('easy/JJ')]),\n", " Match(words=[Word('polite/JJ')]),\n", " Match(words=[Word('Many/JJ')]),\n", " Match(words=[Word('other/JJ')]),\n", " Match(words=[Word('Western/JJ')]),\n", " Match(words=[Word('polite/JJ')]),\n", " Match(words=[Word('Western/JJ')]),\n", " Match(words=[Word('first/JJ')]),\n", " Match(words=[Word('close/JJ')]),\n", " Match(words=[Word('first/JJ')]),\n", " Match(words=[Word('small/JJ')]),\n", " Match(words=[Word('early/JJ')]),\n", " Match(words=[Word('subsequent/JJ')]),\n", " Match(words=[Word('individual/JJ')]),\n", " Match(words=[Word('normal/JJ')]),\n", " Match(words=[Word('comfortable/JJ')]),\n", " Match(words=[Word('new/JJ')]),\n", " Match(words=[Word('new/JJ')]),\n", " Match(words=[Word('dissonant/JJ')]),\n", " Match(words=[Word('steady/JJ')]),\n", " Match(words=[Word('familiar/JJ')]),\n", " Match(words=[Word('Comfort/JJ')]),\n", " Match(words=[Word('acquired/JJ')]),\n", " Match(words=[Word('different/JJ')]),\n", " Match(words=[Word('different/JJ')]),\n", " Match(words=[Word('sexual/JJ')]),\n", " Match(words=[Word('different/JJ')]),\n", " Match(words=[Word('biological/JJ')]),\n", " Match(words=[Word('little/JJ')]),\n", " Match(words=[Word('possible/JJ')]),\n", " Match(words=[Word('worthwhile/JJ')]),\n", " Match(words=[Word('obvious/JJ')]),\n", " Match(words=[Word('first/JJ')]),\n", " Match(words=[Word('other/JJ')]),\n", " Match(words=[Word('behavioral/JJ')]),\n", " Match(words=[Word('earliest/JJ')]),\n", " Match(words=[Word('normal/JJ')]),\n", " Match(words=[Word('correct/JJ')]),\n", " Match(words=[Word('crucial/JJ')]),\n", " Match(words=[Word('in-group/JJ')]),\n", " Match(words=[Word('social/JJ')]),\n", " Match(words=[Word('else/JJ')]),\n", " Match(words=[Word('own/JJ')]),\n", " Match(words=[Word('own/JJ')]),\n", " Match(words=[Word('familiar/JJ')]),\n", " Match(words=[Word('fit/JJ')]),\n", " Match(words=[Word('several/JJ')]),\n", " Match(words=[Word('Pirahã/JJ')]),\n", " Match(words=[Word('full/JJ')]),\n", " Match(words=[Word('noticed/JJ')]),\n", " Match(words=[Word('old/JJ')]),\n", " Match(words=[Word('sharp/JJ')]),\n", " Match(words=[Word('30cm/JJ')]),\n", " Match(words=[Word('dangerous/JJ')]),\n", " Match(words=[Word('handed/JJ')]),\n", " Match(words=[Word('non-life-threatening/JJ')]),\n", " Match(words=[Word('necessary/JJ')]),\n", " Match(words=[Word('Dutch/JJ')]),\n", " Match(words=[Word('sharp/JJ')]),\n", " Match(words=[Word('other/JJ')]),\n", " Match(words=[Word('m)otherness/JJ')]),\n", " Match(words=[Word('sure/JJ')]),\n", " Match(words=[Word('able/JJ')]),\n", " Match(words=[Word('occasional/JJ')]),\n", " Match(words=[Word('interesting/JJ')]),\n", " Match(words=[Word('crooked/JJ')]),\n", " Match(words=[Word('straight/JJ')]),\n", " Match(words=[Word('bizarre/JJ')]),\n", " Match(words=[Word('American/JJ')]),\n", " Match(words=[Word('missionary/JJ')]),\n", " Match(words=[Word('excited/JJ')]),\n", " Match(words=[Word('other/JJ')]),\n", " Match(words=[Word('Pirahã/JJ')]),\n", " Match(words=[Word('native/JJ')]),\n", " Match(words=[Word('native/JJ')]),\n", " Match(words=[Word('other/JJ')]),\n", " Match(words=[Word('local/JJ')]),\n", " Match(words=[Word('comfortable/JJ')]),\n", " Match(words=[Word('similar/JJ')]),\n", " Match(words=[Word('many/JJ')]),\n", " Match(words=[Word('favorite/JJ')]),\n", " Match(words=[Word('American/JJ')]),\n", " Match(words=[Word('different/JJ')]),\n", " Match(words=[Word('irrelevant/JJ')]),\n", " Match(words=[Word('brilliant/JJ')]),\n", " Match(words=[Word('boring/JJ')]),\n", " Match(words=[Word('adjacent/JJ')]),\n", " Match(words=[Word('full/JJ')]),\n", " Match(words=[Word('brilliant/JJ')]),\n", " Match(words=[Word('good/JJ')]),\n", " Match(words=[Word('human/JJ')]),\n", " Match(words=[Word('independent/JJ')]),\n", " Match(words=[Word('natural/JJ')]),\n", " Match(words=[Word('solitary/JJ')]),\n", " Match(words=[Word('strange/JJ')]),\n", " Match(words=[Word('slow/JJ')]),\n", " Match(words=[Word('otherness/JJ')]),\n", " Match(words=[Word('original/JJ')]),\n", " Match(words=[Word('paradoxical/JJ')]),\n", " Match(words=[Word('otherness/JJ')]),\n", " Match(words=[Word('otherness/JJ')]),\n", " Match(words=[Word('panoramic/JJ')]),\n", " Match(words=[Word('own/JJ')]),\n", " Match(words=[Word('unique/JJ')]),\n", " Match(words=[Word('important/JJ')]),\n", " Match(words=[Word('other/JJ')]),\n", " Match(words=[Word('individual/JJ')]),\n", " Match(words=[Word('s/JJ')]),\n", " Match(words=[Word('small/JJ')]),\n", " Match(words=[Word('read/JJ')]),\n", " Match(words=[Word('possible/JJ')]),\n", " Match(words=[Word('own/JJ')]),\n", " Match(words=[Word('i]The/JJ')]),\n", " Match(words=[Word('poor/JJ')]),\n", " Match(words=[Word('good/JJ')]),\n", " Match(words=[Word('measurable/JJ')]),\n", " Match(words=[Word('daily/JJ')]),\n", " Match(words=[Word('social/JJ')]),\n", " Match(words=[Word('little/JJ')]),\n", " Match(words=[Word('new/JJ')]),\n", " Match(words=[Word('conceptual/JJ')]),\n", " Match(words=[Word('cultural/JJ')]),\n", " Match(words=[Word('social/JJ')]),\n", " Match(words=[Word('predictable/JJ')]),\n", " Match(words=[Word('predictable/JJ')]),\n", " Match(words=[Word('other/JJ')]),\n", " Match(words=[Word('thinking/JJ')]),\n", " Match(words=[Word('strong/JJ')]),\n", " Match(words=[Word('desirable/JJ')]),\n", " Match(words=[Word('unexpected/JJ')]),\n", " Match(words=[Word('constant/JJ')]),\n", " Match(words=[Word('useful/JJ')]),\n", " Match(words=[Word('biological/JJ')]),\n", " Match(words=[Word('cognitive/JJ')]),\n", " Match(words=[Word('cultural/JJ')]),\n", " Match(words=[Word('unsuccessful/JJ')]),\n", " Match(words=[Word('strange/JJ')]),\n", " Match(words=[Word('successful/JJ')]),\n", " Match(words=[Word('own/JJ')]),\n", " Match(words=[Word('little/JJ')]),\n", " Match(words=[Word('such/JJ')]),\n", " Match(words=[Word('political/JJ')]),\n", " Match(words=[Word('important/JJ')]),\n", " Match(words=[Word('new/JJ')]),\n", " Match(words=[Word('new/JJ')]),\n", " Match(words=[Word('unable/JJ')]),\n", " Match(words=[Word('little/JJ')]),\n", " Match(words=[Word('little/JJ')]),\n", " Match(words=[Word('18th/JJ')]),\n", " Match(words=[Word('identical/JJ')]),\n", " Match(words=[Word('light/JJ')]),\n", " Match(words=[Word('multiple/JJ')]),\n", " Match(words=[Word('familiar/JJ')]),\n", " Match(words=[Word('other/JJ')]),\n", " Match(words=[Word('new/JJ')]),\n", " Match(words=[Word('new/JJ')]),\n", " Match(words=[Word('same/JJ')]),\n", " Match(words=[Word('same/JJ')]),\n", " Match(words=[Word('same/JJ')]),\n", " Match(words=[Word('same/JJ')]),\n", " Match(words=[Word('same/JJ')]),\n", " Match(words=[Word('same/JJ')]),\n", " Match(words=[Word('new/JJ')]),\n", " Match(words=[Word('anti-immigration/JJ')]),\n", " Match(words=[Word('political/JJ')]),\n", " Match(words=[Word('otherness/JJ')]),\n", " Match(words=[Word('otherness/JJ')]),\n", " Match(words=[Word('motivated/JJ')]),\n", " Match(words=[Word('ultimate/JJ')]),\n", " Match(words=[Word('otherness/JJ')]),\n", " Match(words=[Word('cognitive/JJ')]),\n", " Match(words=[Word('new/JJ')]),\n", " Match(words=[Word('new/JJ')]),\n", " Match(words=[Word('own/JJ')]),\n", " Match(words=[Word('only/JJ')]),\n", " Match(words=[Word('otherness/JJ')]),\n", " Match(words=[Word('otherness/JJ')]),\n", " Match(words=[Word('invented/JJ')]),\n", " Match(words=[Word('communal/JJ')]),\n", " Match(words=[Word('cultural/JJ')]),\n", " Match(words=[Word('cultural/JJ')]),\n", " Match(words=[Word('human/JJ')]),\n", " Match(words=[Word('other/JJ')]),\n", " Match(words=[Word('distinct/JJ')]),\n", " Match(words=[Word('other/JJ')]),\n", " Match(words=[Word('s/JJ')]),\n", " Match(words=[Word('other/JJ')]),\n", " Match(words=[Word('Amazonian/JJ')]),\n", " Match(words=[Word('doomed/JJ')]),\n", " Match(words=[Word('Greek/JJ')]),\n", " Match(words=[Word('repetitive/JJ')]),\n", " Match(words=[Word('daily/JJ')]),\n", " Match(words=[Word('huge/JJ')]),\n", " Match(words=[Word('only/JJ')]),\n", " Match(words=[Word('same/JJ')]),\n", " Match(words=[Word('next/JJ')])]" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "search(\"JJ\", tree)" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[Match(words=[Word('study/VB'), Word('the/DT'), Word('language/NN')]),\n", " Match(words=[Word('be/VB'), Word('a/DT'), Word('prostitute/NN')]),\n", " Match(words=[Word('seem/VB'), Word('that/DT'), Word('way/NN')]),\n", " Match(words=[Word('conduct/VB'), Word('a/DT'), Word('pilot/NN')]),\n", " Match(words=[Word('let/VB'), Word('the/DT'), Word('stick/NN')]),\n", " Match(words=[Word('remove/VB'), Word('the/DT'), Word('otherness/NN')]),\n", " Match(words=[Word('occupy/VB'), Word('a/DT'), Word('part/NN')])]" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "search('VB DT NN', tree)" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "study the language\n", "be a prostitute\n", "seem that way\n", "conduct a pilot\n", "let the stick\n", "remove the otherness\n", "occupy a part\n" ] } ], "source": [ "for m in search (\"VB DT NN\", tree):\n", " print (f\"{m.string}\")" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'occupy a part'" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "m.string" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "My body\n", "my brain\n", "My task\n", "my life\n", "our species\n", "our child\n", "their differences\n", "my belief\n", "my encounter\n", "my own\n", "my silly beliefs\n", "my life\n", "my first day\n", "his hut\n", "its tongue\n", "our mother\n", "our mother\n", "our father\n", "our first experiences\n", "our values\n", "our mother and the select\n", "our subsequent lives\n", "Our earliest associations\n", "our narrow range\n", "our in-group\n", "my own writings.[1\n", "our family or our village\n", "our own identity\n", "our identity\n", "our family\n", "our norm\n", "our experience\n", "our expectations\n", "its occupants\n", "their beliefs and children\n", "his face\n", "his mother\n", "her toddler\n", "her child\n", "his quasi-stabbing\n", "her child\n", "her child a sharp knife\n", "its contribution\n", "our lives\n", "their language\n", "their translations\n", "their comments\n", "my request\n", "their language\n", "our behavior\n", "their language\n", "their culture\n", "their language\n", "Our sense\n", "our enveloping\n", "our childhood development\n", "our conversations and the structures\n", "our interactions\n", "their phrases\n", "my favorite book\n", "his year\n", "His year\n", "its institutions\n", "our senses\n", "our sense\n", "his lessons\n", "our sense\n", "our own unique identity\n", "our oneness\n", "my life\n", "his own question\n", "his essay\n", "our behavior\n", "His example\n", "his case\n", "our lives\n", "our lives\n", "our expectations\n", "our environments\n", "our own\n", "our Homo ancestors\n", "our environment\n", "their language\n", "their culture and language\n", "our familiar environment\n", "our world\n", "Our preference\n", "our fear then itself\n", "Our languages and cognitive abilities\n", "their relationships\n", "our own\n", "our species ability\n", "our human\n", "our greatest fears\n", "our greatest treasure\n", "his efforts\n" ] } ], "source": [ "for m in search (\"PRP$ *\", tree):\n", " print (f\"{m.string}\")" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "from pattern.en import wordnet" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [], "source": [ "sense = wordnet.synsets(\"language\")[0]" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Synset('communication.n.02')" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sense.hypernym" ] }, { "cell_type": "code", "execution_count": 45, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "matching neighbor\n", "matching friend\n", "matching child\n", "matching woman\n", "matching prostitute\n", "matching man\n", "matching man\n", "matching guest\n", "matching host\n", "matching mother\n", "matching mother\n", "matching father\n", "matching mother\n", "matching professor\n", "matching cowboy\n", "matching psychologist\n", "matching pilot\n", "matching toddler\n", "matching mother\n", "matching mother\n", "matching toddler\n", "matching woman\n", "matching baby\n", "matching child\n", "matching mother\n", "matching baby\n", "matching mother\n", "matching child\n", "matching mother\n", "matching child\n", "matching child\n", "matching child\n", "matching speaker\n", "matching speaker\n", "matching foreigner\n", "matching tourist\n", "matching friend\n", "matching man\n", "matching foreigner\n", "matching handyman\n", "matching stranger\n", "matching Homo\n", "matching Homo\n", "matching Homo\n" ] } ], "source": [ "output = []\n", "search_word=\"person\"\n", "for search_word in search_word.split('|'):\n", " synset = wordnet.synsets(search_word)[0]\n", " pos = synset.pos\n", " possible_words = search(pos, tree)\n", " for match in possible_words:\n", " # print (f\"match {match}\")\n", " word = match[0].string\n", " synsets = wordnet.synsets(word)\n", " if len(synsets) > 0:\n", " hypernyms = synsets[0].hypernyms(recursive=True)\n", " if any(search_word == h.senses[0] for h in hypernyms):\n", " print(f\"matching {word}\")\n", " output.append(word)" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['phrase']" ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "output" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.3" } }, "nbformat": 4, "nbformat_minor": 4 }