You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

746 lines
37 KiB
Plaintext

4 years ago
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
4 years ago
"metadata": {},
"outputs": [],
"source": [
"from pattern.search import STRICT, search\n",
"from pattern.en import parsetree"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"https://github.com/clips/pattern/wiki/pattern-search\n",
"( inspired by [videogrep](https://github.com/antiboredom/videogrep/blob/master/videogrep/searcher.py) search )"
]
},
{
"cell_type": "code",
"execution_count": 2,
4 years ago
"metadata": {},
"outputs": [],
"source": [
"text = open(\"../txt/words-for-the-future/OTHERNESS.txt\").read()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'Otherness | Daniel L. Everett\\n\\nWhen I was 26, I moved to the Amazon, from California, in order to st'"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"text[:100]"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": true,
"jupyter": {
"outputs_hidden": true
}
},
4 years ago
"outputs": [
{
"ename": "RuntimeError",
"evalue": "generator raised StopIteration",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mStopIteration\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m~/.local/lib/python3.7/site-packages/pattern/text/__init__.py\u001b[0m in \u001b[0;36m_read\u001b[0;34m(path, encoding, comment)\u001b[0m\n\u001b[1;32m 608\u001b[0m \u001b[0;32myield\u001b[0m \u001b[0mline\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 609\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mStopIteration\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 610\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mStopIteration\u001b[0m: ",
"\nThe above exception was the direct cause of the following exception:\n",
"\u001b[0;31mRuntimeError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-3-ac287bca8f52>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mtree\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mparsetree\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtext\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
4 years ago
"\u001b[0;32m~/.local/lib/python3.7/site-packages/pattern/text/en/__init__.py\u001b[0m in \u001b[0;36mparsetree\u001b[0;34m(s, *args, **kwargs)\u001b[0m\n\u001b[1;32m 173\u001b[0m \"\"\" Returns a parsed Text from the given string.\n\u001b[1;32m 174\u001b[0m \"\"\"\n\u001b[0;32m--> 175\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mText\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mparse\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ms\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 176\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 177\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/.local/lib/python3.7/site-packages/pattern/text/en/__init__.py\u001b[0m in \u001b[0;36mparse\u001b[0;34m(s, *args, **kwargs)\u001b[0m\n\u001b[1;32m 167\u001b[0m \"\"\" Returns a tagged Unicode string.\n\u001b[1;32m 168\u001b[0m \"\"\"\n\u001b[0;32m--> 169\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mparser\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mparse\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ms\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 170\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 171\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/.local/lib/python3.7/site-packages/pattern/text/__init__.py\u001b[0m in \u001b[0;36mparse\u001b[0;34m(self, s, tokenize, tags, chunks, relations, lemmata, encoding, **kwargs)\u001b[0m\n\u001b[1;32m 1170\u001b[0m \u001b[0;31m# Tagger (required by chunker, labeler & lemmatizer).\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1171\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mtags\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0mchunks\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0mrelations\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0mlemmata\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1172\u001b[0;31m \u001b[0ms\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfind_tags\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ms\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1173\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1174\u001b[0m \u001b[0ms\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mw\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mw\u001b[0m \u001b[0;32min\u001b[0m \u001b[0ms\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/.local/lib/python3.7/site-packages/pattern/text/en/__init__.py\u001b[0m in \u001b[0;36mfind_tags\u001b[0;34m(self, tokens, **kwargs)\u001b[0m\n\u001b[1;32m 112\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"tagset\"\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0mUNIVERSAL\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 113\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msetdefault\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"map\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;32mlambda\u001b[0m \u001b[0mtoken\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtag\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mpenntreebank2universal\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtoken\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtag\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 114\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0m_Parser\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfind_tags\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtokens\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 115\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 116\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/.local/lib/python3.7/site-packages/pattern/text/__init__.py\u001b[0m in \u001b[0;36mfind_tags\u001b[0;34m(self, tokens, **kwargs)\u001b[0m\n\u001b[1;32m 1111\u001b[0m \u001b[0;31m# [\"The\", \"cat\", \"purs\"] => [[\"The\", \"DT\"], [\"cat\", \"NN\"], [\"purs\", \"VB\"]]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1112\u001b[0m return find_tags(tokens,\n\u001b[0;32m-> 1113\u001b[0;31m \u001b[0mlexicon\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"lexicon\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlexicon\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0;34m{\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1114\u001b[0m \u001b[0mmodel\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"model\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmodel\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1115\u001b[0m \u001b[0mmorphology\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"morphology\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmorphology\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/.local/lib/python3.7/site-packages/pattern/text/__init__.py\u001b[0m in \u001b[0;36m__len__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 374\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 375\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__len__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 376\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_lazy\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"__len__\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 377\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 378\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__iter__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
4 years ago
"\u001b[0;32m~/.local/lib/python3.7/site-packages/pattern/text/__init__.py\u001b[0m in \u001b[0;36m_lazy\u001b[0;34m(self, method, *args)\u001b[0m\n\u001b[1;32m 366\u001b[0m \"\"\"\n\u001b[1;32m 367\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mdict\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__len__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 368\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mload\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 369\u001b[0m \u001b[0msetattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmethod\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtypes\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mMethodType\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mgetattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdict\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmethod\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 370\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mgetattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdict\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmethod\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/.local/lib/python3.7/site-packages/pattern/text/__init__.py\u001b[0m in \u001b[0;36mload\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 623\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mload\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 624\u001b[0m \u001b[0;31m# Arnold NNP x\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 625\u001b[0;31m \u001b[0mdict\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mupdate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msplit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\" \"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mx\u001b[0m \u001b[0;32min\u001b[0m \u001b[0m_read\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_path\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msplit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\" \"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 626\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 627\u001b[0m \u001b[0;31m#--- FREQUENCY -------------------------------------------------------------------------------------\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/.local/lib/python3.7/site-packages/pattern/text/__init__.py\u001b[0m in \u001b[0;36m<genexpr>\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m 623\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mload\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 624\u001b[0m \u001b[0;31m# Arnold NNP x\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 625\u001b[0;31m \u001b[0mdict\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mupdate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msplit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\" \"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mx\u001b[0m \u001b[0;32min\u001b[0m \u001b[0m_read\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_path\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msplit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\" \"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 626\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 627\u001b[0m \u001b[0;31m#--- FREQUENCY -------------------------------------------------------------------------------------\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
4 years ago
"\u001b[0;31mRuntimeError\u001b[0m: generator raised StopIteration"
]
}
],
"source": [
"tree = parsetree(text)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"ename": "NameError",
"evalue": "name 'tree' is not defined",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-5-7a1e081e78d4>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mtree\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;31mNameError\u001b[0m: name 'tree' is not defined"
]
}
],
"source": [
"tree"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Sentence(\"This/DT/O/O encounter/RB/B-ADVP/O with/IN/B-PP/O these/DT/O/O /''/O/O others/NNS/B-NP/O ,/,/O/O /''/O/O so/RB/B-ADVP/O unlike/IN/B-PP/B-PNP myself/PRP/B-NP/I-PNP ,/,/O/O was/VBD/B-VP/O to/TO/I-VP/O be/VB/I-VP/O the/DT/O/O defining/VBG/B-VP/O experience/NN/B-NP/O for/IN/B-PP/B-PNP the/DT/B-NP/I-PNP rest/NN/I-NP/I-PNP of/IN/B-PP/B-PNP my/PRP$/B-NP/I-PNP life/NN/I-NP/I-PNP ././O/O\")"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"tree[7]"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"collapsed": true,
"jupyter": {
"outputs_hidden": true
}
},
"outputs": [
{
"data": {
"text/plain": [
"[Match(words=[Word('unrelated/JJ')]),\n",
" Match(words=[Word('other/JJ')]),\n",
" Match(words=[Word('small/JJ')]),\n",
" Match(words=[Word('missionary/JJ')]),\n",
" Match(words=[Word('bumpy/JJ')]),\n",
" Match(words=[Word('first/JJ')]),\n",
" Match(words=[Word('weak/JJ')]),\n",
" Match(words=[Word('taut/JJ')]),\n",
" Match(words=[Word('unrelated/JJ')]),\n",
" Match(words=[Word('other/JJ')]),\n",
" Match(words=[Word('many/JJ')]),\n",
" Match(words=[Word('little/JJ')]),\n",
" Match(words=[Word('easy/JJ')]),\n",
" Match(words=[Word('enough/JJ')]),\n",
" Match(words=[Word('other/JJ')]),\n",
" Match(words=[Word('uncomfortable/JJ')]),\n",
" Match(words=[Word('suspicious/JJ')]),\n",
" Match(words=[Word('new/JJ')]),\n",
" Match(words=[Word('different/JJ')]),\n",
" Match(words=[Word('simple/JJ')]),\n",
" Match(words=[Word('binary/JJ')]),\n",
" Match(words=[Word('old/JJ')]),\n",
" Match(words=[Word('religious/JJ')]),\n",
" Match(words=[Word('other/JJ')]),\n",
" Match(words=[Word('different/JJ')]),\n",
" Match(words=[Word('unintelligible/JJ')]),\n",
" Match(words=[Word('different-looking/JJ')]),\n",
" Match(words=[Word('otherness/JJ')]),\n",
" Match(words=[Word('live/JJ')]),\n",
" Match(words=[Word('unacceptable/JJ')]),\n",
" Match(words=[Word('missionary/JJ')]),\n",
" Match(words=[Word('eternal/JJ')]),\n",
" Match(words=[Word('encounter/JJ')]),\n",
" Match(words=[Word('uneasy/JJ')]),\n",
" Match(words=[Word('dangerous/JJ')]),\n",
" Match(words=[Word('insufficient/JJ')]),\n",
" Match(words=[Word('ethno-centric/JJ')]),\n",
" Match(words=[Word('own/JJ')]),\n",
" Match(words=[Word('fortunate/JJ')]),\n",
" Match(words=[Word('gentle/JJ')]),\n",
" Match(words=[Word('many/JJ')]),\n",
" Match(words=[Word('silly/JJ')]),\n",
" Match(words=[Word('years-long/JJ')]),\n",
" Match(words=[Word('first/JJ')]),\n",
" Match(words=[Word('first/JJ')]),\n",
" Match(words=[Word('young/JJ')]),\n",
" Match(words=[Word('large/JJ')]),\n",
" Match(words=[Word('small/JJ')]),\n",
" Match(words=[Word('fresh/JJ')]),\n",
" Match(words=[Word('young/JJ')]),\n",
" Match(words=[Word('then-unintelligible/JJ')]),\n",
" Match(words=[Word('easy/JJ')]),\n",
" Match(words=[Word('polite/JJ')]),\n",
" Match(words=[Word('Many/JJ')]),\n",
" Match(words=[Word('other/JJ')]),\n",
" Match(words=[Word('Western/JJ')]),\n",
" Match(words=[Word('polite/JJ')]),\n",
" Match(words=[Word('Western/JJ')]),\n",
" Match(words=[Word('first/JJ')]),\n",
" Match(words=[Word('close/JJ')]),\n",
" Match(words=[Word('first/JJ')]),\n",
" Match(words=[Word('small/JJ')]),\n",
" Match(words=[Word('early/JJ')]),\n",
" Match(words=[Word('subsequent/JJ')]),\n",
" Match(words=[Word('individual/JJ')]),\n",
" Match(words=[Word('normal/JJ')]),\n",
" Match(words=[Word('comfortable/JJ')]),\n",
" Match(words=[Word('new/JJ')]),\n",
" Match(words=[Word('new/JJ')]),\n",
" Match(words=[Word('dissonant/JJ')]),\n",
" Match(words=[Word('steady/JJ')]),\n",
" Match(words=[Word('familiar/JJ')]),\n",
" Match(words=[Word('Comfort/JJ')]),\n",
" Match(words=[Word('acquired/JJ')]),\n",
" Match(words=[Word('different/JJ')]),\n",
" Match(words=[Word('different/JJ')]),\n",
" Match(words=[Word('sexual/JJ')]),\n",
" Match(words=[Word('different/JJ')]),\n",
" Match(words=[Word('biological/JJ')]),\n",
" Match(words=[Word('little/JJ')]),\n",
" Match(words=[Word('possible/JJ')]),\n",
" Match(words=[Word('worthwhile/JJ')]),\n",
" Match(words=[Word('obvious/JJ')]),\n",
" Match(words=[Word('first/JJ')]),\n",
" Match(words=[Word('other/JJ')]),\n",
" Match(words=[Word('behavioral/JJ')]),\n",
" Match(words=[Word('earliest/JJ')]),\n",
" Match(words=[Word('normal/JJ')]),\n",
" Match(words=[Word('correct/JJ')]),\n",
" Match(words=[Word('crucial/JJ')]),\n",
" Match(words=[Word('in-group/JJ')]),\n",
" Match(words=[Word('social/JJ')]),\n",
" Match(words=[Word('else/JJ')]),\n",
" Match(words=[Word('own/JJ')]),\n",
" Match(words=[Word('own/JJ')]),\n",
" Match(words=[Word('familiar/JJ')]),\n",
" Match(words=[Word('fit/JJ')]),\n",
" Match(words=[Word('several/JJ')]),\n",
" Match(words=[Word('Pirahã/JJ')]),\n",
" Match(words=[Word('full/JJ')]),\n",
" Match(words=[Word('noticed/JJ')]),\n",
" Match(words=[Word('old/JJ')]),\n",
" Match(words=[Word('sharp/JJ')]),\n",
" Match(words=[Word('30cm/JJ')]),\n",
" Match(words=[Word('dangerous/JJ')]),\n",
" Match(words=[Word('handed/JJ')]),\n",
" Match(words=[Word('non-life-threatening/JJ')]),\n",
" Match(words=[Word('necessary/JJ')]),\n",
" Match(words=[Word('Dutch/JJ')]),\n",
" Match(words=[Word('sharp/JJ')]),\n",
" Match(words=[Word('other/JJ')]),\n",
" Match(words=[Word('m)otherness/JJ')]),\n",
" Match(words=[Word('sure/JJ')]),\n",
" Match(words=[Word('able/JJ')]),\n",
" Match(words=[Word('occasional/JJ')]),\n",
" Match(words=[Word('interesting/JJ')]),\n",
" Match(words=[Word('crooked/JJ')]),\n",
" Match(words=[Word('straight/JJ')]),\n",
" Match(words=[Word('bizarre/JJ')]),\n",
" Match(words=[Word('American/JJ')]),\n",
" Match(words=[Word('missionary/JJ')]),\n",
" Match(words=[Word('excited/JJ')]),\n",
" Match(words=[Word('other/JJ')]),\n",
" Match(words=[Word('Pirahã/JJ')]),\n",
" Match(words=[Word('native/JJ')]),\n",
" Match(words=[Word('native/JJ')]),\n",
" Match(words=[Word('other/JJ')]),\n",
" Match(words=[Word('local/JJ')]),\n",
" Match(words=[Word('comfortable/JJ')]),\n",
" Match(words=[Word('similar/JJ')]),\n",
" Match(words=[Word('many/JJ')]),\n",
" Match(words=[Word('favorite/JJ')]),\n",
" Match(words=[Word('American/JJ')]),\n",
" Match(words=[Word('different/JJ')]),\n",
" Match(words=[Word('irrelevant/JJ')]),\n",
" Match(words=[Word('brilliant/JJ')]),\n",
" Match(words=[Word('boring/JJ')]),\n",
" Match(words=[Word('adjacent/JJ')]),\n",
" Match(words=[Word('full/JJ')]),\n",
" Match(words=[Word('brilliant/JJ')]),\n",
" Match(words=[Word('good/JJ')]),\n",
" Match(words=[Word('human/JJ')]),\n",
" Match(words=[Word('independent/JJ')]),\n",
" Match(words=[Word('natural/JJ')]),\n",
" Match(words=[Word('solitary/JJ')]),\n",
" Match(words=[Word('strange/JJ')]),\n",
" Match(words=[Word('slow/JJ')]),\n",
" Match(words=[Word('otherness/JJ')]),\n",
" Match(words=[Word('original/JJ')]),\n",
" Match(words=[Word('paradoxical/JJ')]),\n",
" Match(words=[Word('otherness/JJ')]),\n",
" Match(words=[Word('otherness/JJ')]),\n",
" Match(words=[Word('panoramic/JJ')]),\n",
" Match(words=[Word('own/JJ')]),\n",
" Match(words=[Word('unique/JJ')]),\n",
" Match(words=[Word('important/JJ')]),\n",
" Match(words=[Word('other/JJ')]),\n",
" Match(words=[Word('individual/JJ')]),\n",
" Match(words=[Word('s/JJ')]),\n",
" Match(words=[Word('small/JJ')]),\n",
" Match(words=[Word('read/JJ')]),\n",
" Match(words=[Word('possible/JJ')]),\n",
" Match(words=[Word('own/JJ')]),\n",
" Match(words=[Word('i]The/JJ')]),\n",
" Match(words=[Word('poor/JJ')]),\n",
" Match(words=[Word('good/JJ')]),\n",
" Match(words=[Word('measurable/JJ')]),\n",
" Match(words=[Word('daily/JJ')]),\n",
" Match(words=[Word('social/JJ')]),\n",
" Match(words=[Word('little/JJ')]),\n",
" Match(words=[Word('new/JJ')]),\n",
" Match(words=[Word('conceptual/JJ')]),\n",
" Match(words=[Word('cultural/JJ')]),\n",
" Match(words=[Word('social/JJ')]),\n",
" Match(words=[Word('predictable/JJ')]),\n",
" Match(words=[Word('predictable/JJ')]),\n",
" Match(words=[Word('other/JJ')]),\n",
" Match(words=[Word('thinking/JJ')]),\n",
" Match(words=[Word('strong/JJ')]),\n",
" Match(words=[Word('desirable/JJ')]),\n",
" Match(words=[Word('unexpected/JJ')]),\n",
" Match(words=[Word('constant/JJ')]),\n",
" Match(words=[Word('useful/JJ')]),\n",
" Match(words=[Word('biological/JJ')]),\n",
" Match(words=[Word('cognitive/JJ')]),\n",
" Match(words=[Word('cultural/JJ')]),\n",
" Match(words=[Word('unsuccessful/JJ')]),\n",
" Match(words=[Word('strange/JJ')]),\n",
" Match(words=[Word('successful/JJ')]),\n",
" Match(words=[Word('own/JJ')]),\n",
" Match(words=[Word('little/JJ')]),\n",
" Match(words=[Word('such/JJ')]),\n",
" Match(words=[Word('political/JJ')]),\n",
" Match(words=[Word('important/JJ')]),\n",
" Match(words=[Word('new/JJ')]),\n",
" Match(words=[Word('new/JJ')]),\n",
" Match(words=[Word('unable/JJ')]),\n",
" Match(words=[Word('little/JJ')]),\n",
" Match(words=[Word('little/JJ')]),\n",
" Match(words=[Word('18th/JJ')]),\n",
" Match(words=[Word('identical/JJ')]),\n",
" Match(words=[Word('light/JJ')]),\n",
" Match(words=[Word('multiple/JJ')]),\n",
" Match(words=[Word('familiar/JJ')]),\n",
" Match(words=[Word('other/JJ')]),\n",
" Match(words=[Word('new/JJ')]),\n",
" Match(words=[Word('new/JJ')]),\n",
" Match(words=[Word('same/JJ')]),\n",
" Match(words=[Word('same/JJ')]),\n",
" Match(words=[Word('same/JJ')]),\n",
" Match(words=[Word('same/JJ')]),\n",
" Match(words=[Word('same/JJ')]),\n",
" Match(words=[Word('same/JJ')]),\n",
" Match(words=[Word('new/JJ')]),\n",
" Match(words=[Word('anti-immigration/JJ')]),\n",
" Match(words=[Word('political/JJ')]),\n",
" Match(words=[Word('otherness/JJ')]),\n",
" Match(words=[Word('otherness/JJ')]),\n",
" Match(words=[Word('motivated/JJ')]),\n",
" Match(words=[Word('ultimate/JJ')]),\n",
" Match(words=[Word('otherness/JJ')]),\n",
" Match(words=[Word('cognitive/JJ')]),\n",
" Match(words=[Word('new/JJ')]),\n",
" Match(words=[Word('new/JJ')]),\n",
" Match(words=[Word('own/JJ')]),\n",
" Match(words=[Word('only/JJ')]),\n",
" Match(words=[Word('otherness/JJ')]),\n",
" Match(words=[Word('otherness/JJ')]),\n",
" Match(words=[Word('invented/JJ')]),\n",
" Match(words=[Word('communal/JJ')]),\n",
" Match(words=[Word('cultural/JJ')]),\n",
" Match(words=[Word('cultural/JJ')]),\n",
" Match(words=[Word('human/JJ')]),\n",
" Match(words=[Word('other/JJ')]),\n",
" Match(words=[Word('distinct/JJ')]),\n",
" Match(words=[Word('other/JJ')]),\n",
" Match(words=[Word('s/JJ')]),\n",
" Match(words=[Word('other/JJ')]),\n",
" Match(words=[Word('Amazonian/JJ')]),\n",
" Match(words=[Word('doomed/JJ')]),\n",
" Match(words=[Word('Greek/JJ')]),\n",
" Match(words=[Word('repetitive/JJ')]),\n",
" Match(words=[Word('daily/JJ')]),\n",
" Match(words=[Word('huge/JJ')]),\n",
" Match(words=[Word('only/JJ')]),\n",
" Match(words=[Word('same/JJ')]),\n",
" Match(words=[Word('next/JJ')])]"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"search(\"JJ\", tree)"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[Match(words=[Word('study/VB'), Word('the/DT'), Word('language/NN')]),\n",
" Match(words=[Word('be/VB'), Word('a/DT'), Word('prostitute/NN')]),\n",
" Match(words=[Word('seem/VB'), Word('that/DT'), Word('way/NN')]),\n",
" Match(words=[Word('conduct/VB'), Word('a/DT'), Word('pilot/NN')]),\n",
" Match(words=[Word('let/VB'), Word('the/DT'), Word('stick/NN')]),\n",
" Match(words=[Word('remove/VB'), Word('the/DT'), Word('otherness/NN')]),\n",
" Match(words=[Word('occupy/VB'), Word('a/DT'), Word('part/NN')])]"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"search('VB DT NN', tree)"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"study the language\n",
"be a prostitute\n",
"seem that way\n",
"conduct a pilot\n",
"let the stick\n",
"remove the otherness\n",
"occupy a part\n"
]
}
],
"source": [
"for m in search (\"VB DT NN\", tree):\n",
" print (f\"{m.string}\")"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'occupy a part'"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"m.string"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"My body\n",
"my brain\n",
"My task\n",
"my life\n",
"our species\n",
"our child\n",
"their differences\n",
"my belief\n",
"my encounter\n",
"my own\n",
"my silly beliefs\n",
"my life\n",
"my first day\n",
"his hut\n",
"its tongue\n",
"our mother\n",
"our mother\n",
"our father\n",
"our first experiences\n",
"our values\n",
"our mother and the select\n",
"our subsequent lives\n",
"Our earliest associations\n",
"our narrow range\n",
"our in-group\n",
"my own writings.[1\n",
"our family or our village\n",
"our own identity\n",
"our identity\n",
"our family\n",
"our norm\n",
"our experience\n",
"our expectations\n",
"its occupants\n",
"their beliefs and children\n",
"his face\n",
"his mother\n",
"her toddler\n",
"her child\n",
"his quasi-stabbing\n",
"her child\n",
"her child a sharp knife\n",
"its contribution\n",
"our lives\n",
"their language\n",
"their translations\n",
"their comments\n",
"my request\n",
"their language\n",
"our behavior\n",
"their language\n",
"their culture\n",
"their language\n",
"Our sense\n",
"our enveloping\n",
"our childhood development\n",
"our conversations and the structures\n",
"our interactions\n",
"their phrases\n",
"my favorite book\n",
"his year\n",
"His year\n",
"its institutions\n",
"our senses\n",
"our sense\n",
"his lessons\n",
"our sense\n",
"our own unique identity\n",
"our oneness\n",
"my life\n",
"his own question\n",
"his essay\n",
"our behavior\n",
"His example\n",
"his case\n",
"our lives\n",
"our lives\n",
"our expectations\n",
"our environments\n",
"our own\n",
"our Homo ancestors\n",
"our environment\n",
"their language\n",
"their culture and language\n",
"our familiar environment\n",
"our world\n",
"Our preference\n",
"our fear then itself\n",
"Our languages and cognitive abilities\n",
"their relationships\n",
"our own\n",
"our species ability\n",
"our human\n",
"our greatest fears\n",
"our greatest treasure\n",
"his efforts\n"
]
}
],
"source": [
"for m in search (\"PRP$ *\", tree):\n",
" print (f\"{m.string}\")"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"from pattern.en import wordnet"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
"sense = wordnet.synsets(\"language\")[0]"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Synset('communication.n.02')"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sense.hypernym"
]
},
{
"cell_type": "code",
"execution_count": 45,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"matching neighbor\n",
"matching friend\n",
"matching child\n",
"matching woman\n",
"matching prostitute\n",
"matching man\n",
"matching man\n",
"matching guest\n",
"matching host\n",
"matching mother\n",
"matching mother\n",
"matching father\n",
"matching mother\n",
"matching professor\n",
"matching cowboy\n",
"matching psychologist\n",
"matching pilot\n",
"matching toddler\n",
"matching mother\n",
"matching mother\n",
"matching toddler\n",
"matching woman\n",
"matching baby\n",
"matching child\n",
"matching mother\n",
"matching baby\n",
"matching mother\n",
"matching child\n",
"matching mother\n",
"matching child\n",
"matching child\n",
"matching child\n",
"matching speaker\n",
"matching speaker\n",
"matching foreigner\n",
"matching tourist\n",
"matching friend\n",
"matching man\n",
"matching foreigner\n",
"matching handyman\n",
"matching stranger\n",
"matching Homo\n",
"matching Homo\n",
"matching Homo\n"
]
}
],
"source": [
"output = []\n",
"search_word=\"person\"\n",
"for search_word in search_word.split('|'):\n",
" synset = wordnet.synsets(search_word)[0]\n",
" pos = synset.pos\n",
" possible_words = search(pos, tree)\n",
" for match in possible_words:\n",
" # print (f\"match {match}\")\n",
" word = match[0].string\n",
" synsets = wordnet.synsets(word)\n",
" if len(synsets) > 0:\n",
" hypernyms = synsets[0].hypernyms(recursive=True)\n",
" if any(search_word == h.senses[0] for h in hypernyms):\n",
" print(f\"matching {word}\")\n",
" output.append(word)"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['phrase']"
]
},
"execution_count": 31,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"output"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
}
},
"nbformat": 4,
"nbformat_minor": 4
}