|
|
|
@ -0,0 +1,741 @@
|
|
|
|
|
{
|
|
|
|
|
"cells": [
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 6,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
|
|
|
|
"from pattern.search import STRICT, search\n",
|
|
|
|
|
"from pattern.en import parsetree"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"https://github.com/clips/pattern/wiki/pattern-search\n",
|
|
|
|
|
"( inspired by [videogrep](https://github.com/antiboredom/videogrep/blob/master/videogrep/searcher.py) search )"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 7,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
|
|
|
|
"text = open(\"../txt/words-for-the-future/OTHERNESS.txt\").read()"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 3,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
|
|
|
|
"text/plain": [
|
|
|
|
|
"'Otherness | Daniel L. Everett\\n\\nWhen I was 26, I moved to the Amazon, from California, in order to st'"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
"execution_count": 3,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "execute_result"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"text[:100]"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 8,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"ename": "RuntimeError",
|
|
|
|
|
"evalue": "generator raised StopIteration",
|
|
|
|
|
"output_type": "error",
|
|
|
|
|
"traceback": [
|
|
|
|
|
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
|
|
|
|
"\u001b[0;31mStopIteration\u001b[0m Traceback (most recent call last)",
|
|
|
|
|
"\u001b[0;32m~/.local/lib/python3.7/site-packages/pattern/text/__init__.py\u001b[0m in \u001b[0;36m_read\u001b[0;34m(path, encoding, comment)\u001b[0m\n\u001b[1;32m 608\u001b[0m \u001b[0;32myield\u001b[0m \u001b[0mline\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 609\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mStopIteration\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 610\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
|
|
|
|
|
"\u001b[0;31mStopIteration\u001b[0m: ",
|
|
|
|
|
"\nThe above exception was the direct cause of the following exception:\n",
|
|
|
|
|
"\u001b[0;31mRuntimeError\u001b[0m Traceback (most recent call last)",
|
|
|
|
|
"\u001b[0;32m<ipython-input-8-ac287bca8f52>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mtree\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mparsetree\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtext\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
|
|
|
|
|
"\u001b[0;32m~/.local/lib/python3.7/site-packages/pattern/text/en/__init__.py\u001b[0m in \u001b[0;36mparsetree\u001b[0;34m(s, *args, **kwargs)\u001b[0m\n\u001b[1;32m 173\u001b[0m \"\"\" Returns a parsed Text from the given string.\n\u001b[1;32m 174\u001b[0m \"\"\"\n\u001b[0;32m--> 175\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mText\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mparse\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ms\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 176\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 177\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
|
|
|
|
|
"\u001b[0;32m~/.local/lib/python3.7/site-packages/pattern/text/en/__init__.py\u001b[0m in \u001b[0;36mparse\u001b[0;34m(s, *args, **kwargs)\u001b[0m\n\u001b[1;32m 167\u001b[0m \"\"\" Returns a tagged Unicode string.\n\u001b[1;32m 168\u001b[0m \"\"\"\n\u001b[0;32m--> 169\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mparser\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mparse\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ms\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 170\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 171\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
|
|
|
|
|
"\u001b[0;32m~/.local/lib/python3.7/site-packages/pattern/text/__init__.py\u001b[0m in \u001b[0;36mparse\u001b[0;34m(self, s, tokenize, tags, chunks, relations, lemmata, encoding, **kwargs)\u001b[0m\n\u001b[1;32m 1170\u001b[0m \u001b[0;31m# Tagger (required by chunker, labeler & lemmatizer).\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1171\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mtags\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0mchunks\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0mrelations\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0mlemmata\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1172\u001b[0;31m \u001b[0ms\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfind_tags\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ms\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1173\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1174\u001b[0m \u001b[0ms\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mw\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mw\u001b[0m \u001b[0;32min\u001b[0m \u001b[0ms\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
|
|
|
|
"\u001b[0;32m~/.local/lib/python3.7/site-packages/pattern/text/en/__init__.py\u001b[0m in \u001b[0;36mfind_tags\u001b[0;34m(self, tokens, **kwargs)\u001b[0m\n\u001b[1;32m 112\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"tagset\"\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0mUNIVERSAL\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 113\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msetdefault\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"map\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;32mlambda\u001b[0m \u001b[0mtoken\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtag\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mpenntreebank2universal\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtoken\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtag\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 114\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0m_Parser\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfind_tags\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtokens\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 115\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 116\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
|
|
|
|
|
"\u001b[0;32m~/.local/lib/python3.7/site-packages/pattern/text/__init__.py\u001b[0m in \u001b[0;36mfind_tags\u001b[0;34m(self, tokens, **kwargs)\u001b[0m\n\u001b[1;32m 1118\u001b[0m \u001b[0mlanguage\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"language\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlanguage\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1119\u001b[0m \u001b[0mdefault\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"default\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdefault\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1120\u001b[0;31m map = kwargs.get(\"map\", None))\n\u001b[0m\u001b[1;32m 1121\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1122\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mfind_chunks\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtokens\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
|
|
|
|
"\u001b[0;32m~/.local/lib/python3.7/site-packages/pattern/text/__init__.py\u001b[0m in \u001b[0;36mfind_tags\u001b[0;34m(tokens, lexicon, model, morphology, context, entities, default, language, map, **kwargs)\u001b[0m\n\u001b[1;32m 1538\u001b[0m \u001b[0;31m# Tag named entities.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1539\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mentities\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1540\u001b[0;31m \u001b[0mtagged\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mentities\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtagged\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1541\u001b[0m \u001b[0;31m# Map tags with a custom function.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1542\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mmap\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
|
|
|
|
"\u001b[0;32m~/.local/lib/python3.7/site-packages/pattern/text/__init__.py\u001b[0m in \u001b[0;36mapply\u001b[0;34m(self, tokens)\u001b[0m\n\u001b[1;32m 974\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0mRE_ENTITY3\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmatch\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mw\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 975\u001b[0m \u001b[0mtokens\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtag\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 976\u001b[0;31m \u001b[0;32mif\u001b[0m \u001b[0mw\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 977\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0me\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mw\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 978\u001b[0m \u001b[0;31m# Look ahead to see if successive words match the named entity.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
|
|
|
|
"\u001b[0;32m~/.local/lib/python3.7/site-packages/pattern/text/__init__.py\u001b[0m in \u001b[0;36m__contains__\u001b[0;34m(self, *args)\u001b[0m\n\u001b[1;32m 380\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 381\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__contains__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 382\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_lazy\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"__contains__\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 383\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 384\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__getitem__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
|
|
|
|
"\u001b[0;32m~/.local/lib/python3.7/site-packages/pattern/text/__init__.py\u001b[0m in \u001b[0;36m_lazy\u001b[0;34m(self, method, *args)\u001b[0m\n\u001b[1;32m 366\u001b[0m \"\"\"\n\u001b[1;32m 367\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mdict\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__len__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 368\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mload\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 369\u001b[0m \u001b[0msetattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmethod\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtypes\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mMethodType\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mgetattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdict\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmethod\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 370\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mgetattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdict\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmethod\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
|
|
|
|
"\u001b[0;32m~/.local/lib/python3.7/site-packages/pattern/text/__init__.py\u001b[0m in \u001b[0;36mload\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 957\u001b[0m \u001b[0;31m# [\"Alexander\", \"the\", \"Great\", \"PERS\"]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 958\u001b[0m \u001b[0;31m# {\"alexander\": [[\"alexander\", \"the\", \"great\", \"pers\"], ...]}\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 959\u001b[0;31m \u001b[0;32mfor\u001b[0m \u001b[0mx\u001b[0m \u001b[0;32min\u001b[0m \u001b[0m_read\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 960\u001b[0m \u001b[0mx\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlower\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mx\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msplit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 961\u001b[0m \u001b[0mdict\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msetdefault\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
|
|
|
|
"\u001b[0;31mRuntimeError\u001b[0m: generator raised StopIteration"
|
|
|
|
|
]
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"tree = parsetree(text)"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 5,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"ename": "NameError",
|
|
|
|
|
"evalue": "name 'tree' is not defined",
|
|
|
|
|
"output_type": "error",
|
|
|
|
|
"traceback": [
|
|
|
|
|
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
|
|
|
|
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
|
|
|
|
|
"\u001b[0;32m<ipython-input-5-7a1e081e78d4>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mtree\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
|
|
|
|
|
"\u001b[0;31mNameError\u001b[0m: name 'tree' is not defined"
|
|
|
|
|
]
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"tree"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 24,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
|
|
|
|
"text/plain": [
|
|
|
|
|
"Sentence(\"This/DT/O/O encounter/RB/B-ADVP/O with/IN/B-PP/O these/DT/O/O ‘/''/O/O others/NNS/B-NP/O ,/,/O/O ’/''/O/O so/RB/B-ADVP/O unlike/IN/B-PP/B-PNP myself/PRP/B-NP/I-PNP ,/,/O/O was/VBD/B-VP/O to/TO/I-VP/O be/VB/I-VP/O the/DT/O/O defining/VBG/B-VP/O experience/NN/B-NP/O for/IN/B-PP/B-PNP the/DT/B-NP/I-PNP rest/NN/I-NP/I-PNP of/IN/B-PP/B-PNP my/PRP$/B-NP/I-PNP life/NN/I-NP/I-PNP ././O/O\")"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
"execution_count": 24,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "execute_result"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"tree[7]"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 16,
|
|
|
|
|
"metadata": {
|
|
|
|
|
"collapsed": true,
|
|
|
|
|
"jupyter": {
|
|
|
|
|
"outputs_hidden": true
|
|
|
|
|
}
|
|
|
|
|
},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
|
|
|
|
"text/plain": [
|
|
|
|
|
"[Match(words=[Word('unrelated/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('other/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('small/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('missionary/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('bumpy/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('first/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('weak/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('taut/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('unrelated/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('other/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('many/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('little/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('easy/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('enough/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('other/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('uncomfortable/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('suspicious/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('new/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('different/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('simple/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('binary/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('old/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('religious/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('other/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('different/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('unintelligible/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('different-looking/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('otherness/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('live/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('unacceptable/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('missionary/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('eternal/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('encounter/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('uneasy/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('dangerous/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('insufficient/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('ethno-centric/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('own/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('fortunate/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('gentle/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('many/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('silly/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('years-long/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('first/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('first/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('young/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('large/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('small/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('fresh/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('young/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('then-unintelligible/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('easy/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('polite/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('Many/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('other/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('Western/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('polite/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('Western/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('first/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('close/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('first/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('small/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('early/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('subsequent/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('individual/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('normal/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('comfortable/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('new/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('new/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('dissonant/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('steady/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('familiar/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('Comfort/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('acquired/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('different/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('different/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('sexual/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('different/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('biological/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('little/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('possible/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('worthwhile/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('obvious/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('first/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('other/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('behavioral/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('earliest/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('normal/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('correct/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('crucial/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('in-group/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('social/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('else/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('own/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('own/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('familiar/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('fit/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('several/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('Pirahã/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('full/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('noticed/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('old/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('sharp/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('30cm/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('dangerous/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('handed/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('non-life-threatening/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('necessary/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('Dutch/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('sharp/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('other/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('m)otherness/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('sure/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('able/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('occasional/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('interesting/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('crooked/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('straight/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('bizarre/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('American/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('missionary/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('excited/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('other/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('Pirahã/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('native/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('native/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('other/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('local/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('comfortable/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('similar/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('many/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('favorite/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('American/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('different/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('irrelevant/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('brilliant/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('boring/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('adjacent/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('full/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('brilliant/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('good/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('human/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('independent/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('natural/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('solitary/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('strange/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('slow/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('otherness/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('original/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('paradoxical/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('otherness/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('otherness/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('panoramic/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('own/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('unique/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('important/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('other/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('individual/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('s/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('small/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('read/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('possible/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('own/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('i]The/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('poor/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('good/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('measurable/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('daily/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('social/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('little/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('new/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('conceptual/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('cultural/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('social/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('predictable/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('predictable/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('other/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('thinking/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('strong/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('desirable/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('unexpected/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('constant/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('useful/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('biological/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('cognitive/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('cultural/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('unsuccessful/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('strange/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('successful/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('own/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('little/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('such/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('political/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('important/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('new/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('new/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('unable/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('little/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('little/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('18th/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('identical/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('light/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('multiple/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('familiar/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('other/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('new/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('new/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('same/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('same/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('same/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('same/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('same/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('same/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('new/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('anti-immigration/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('political/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('otherness/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('otherness/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('motivated/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('ultimate/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('otherness/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('cognitive/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('new/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('new/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('own/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('only/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('otherness/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('otherness/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('invented/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('communal/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('cultural/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('cultural/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('human/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('other/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('distinct/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('other/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('s/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('other/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('Amazonian/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('doomed/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('Greek/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('repetitive/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('daily/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('huge/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('only/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('same/JJ')]),\n",
|
|
|
|
|
" Match(words=[Word('next/JJ')])]"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
"execution_count": 16,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "execute_result"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"search(\"JJ\", tree)"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 17,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
|
|
|
|
"text/plain": [
|
|
|
|
|
"[Match(words=[Word('study/VB'), Word('the/DT'), Word('language/NN')]),\n",
|
|
|
|
|
" Match(words=[Word('be/VB'), Word('a/DT'), Word('prostitute/NN')]),\n",
|
|
|
|
|
" Match(words=[Word('seem/VB'), Word('that/DT'), Word('way/NN')]),\n",
|
|
|
|
|
" Match(words=[Word('conduct/VB'), Word('a/DT'), Word('pilot/NN')]),\n",
|
|
|
|
|
" Match(words=[Word('let/VB'), Word('the/DT'), Word('stick/NN')]),\n",
|
|
|
|
|
" Match(words=[Word('remove/VB'), Word('the/DT'), Word('otherness/NN')]),\n",
|
|
|
|
|
" Match(words=[Word('occupy/VB'), Word('a/DT'), Word('part/NN')])]"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
"execution_count": 17,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "execute_result"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"search('VB DT NN', tree)"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 22,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"name": "stdout",
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
"text": [
|
|
|
|
|
"study the language\n",
|
|
|
|
|
"be a prostitute\n",
|
|
|
|
|
"seem that way\n",
|
|
|
|
|
"conduct a pilot\n",
|
|
|
|
|
"let the stick\n",
|
|
|
|
|
"remove the otherness\n",
|
|
|
|
|
"occupy a part\n"
|
|
|
|
|
]
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"for m in search (\"VB DT NN\", tree):\n",
|
|
|
|
|
" print (f\"{m.string}\")"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 20,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
|
|
|
|
"text/plain": [
|
|
|
|
|
"'occupy a part'"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
"execution_count": 20,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "execute_result"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"m.string"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 25,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"name": "stdout",
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
"text": [
|
|
|
|
|
"My body\n",
|
|
|
|
|
"my brain\n",
|
|
|
|
|
"My task\n",
|
|
|
|
|
"my life\n",
|
|
|
|
|
"our species\n",
|
|
|
|
|
"our child\n",
|
|
|
|
|
"their differences\n",
|
|
|
|
|
"my belief\n",
|
|
|
|
|
"my encounter\n",
|
|
|
|
|
"my own\n",
|
|
|
|
|
"my silly beliefs\n",
|
|
|
|
|
"my life\n",
|
|
|
|
|
"my first day\n",
|
|
|
|
|
"his hut\n",
|
|
|
|
|
"its tongue\n",
|
|
|
|
|
"our mother\n",
|
|
|
|
|
"our mother\n",
|
|
|
|
|
"our father\n",
|
|
|
|
|
"our first experiences\n",
|
|
|
|
|
"our values\n",
|
|
|
|
|
"our mother and the select\n",
|
|
|
|
|
"our subsequent lives\n",
|
|
|
|
|
"Our earliest associations\n",
|
|
|
|
|
"our narrow range\n",
|
|
|
|
|
"our in-group\n",
|
|
|
|
|
"my own writings.[1\n",
|
|
|
|
|
"our family or our village\n",
|
|
|
|
|
"our own identity\n",
|
|
|
|
|
"our identity\n",
|
|
|
|
|
"our family\n",
|
|
|
|
|
"our norm\n",
|
|
|
|
|
"our experience\n",
|
|
|
|
|
"our expectations\n",
|
|
|
|
|
"its occupants\n",
|
|
|
|
|
"their beliefs and children\n",
|
|
|
|
|
"his face\n",
|
|
|
|
|
"his mother\n",
|
|
|
|
|
"her toddler\n",
|
|
|
|
|
"her child\n",
|
|
|
|
|
"his quasi-stabbing\n",
|
|
|
|
|
"her child\n",
|
|
|
|
|
"her child a sharp knife\n",
|
|
|
|
|
"its contribution\n",
|
|
|
|
|
"our lives\n",
|
|
|
|
|
"their language\n",
|
|
|
|
|
"their translations\n",
|
|
|
|
|
"their comments\n",
|
|
|
|
|
"my request\n",
|
|
|
|
|
"their language\n",
|
|
|
|
|
"our behavior\n",
|
|
|
|
|
"their language\n",
|
|
|
|
|
"their culture\n",
|
|
|
|
|
"their language\n",
|
|
|
|
|
"Our sense\n",
|
|
|
|
|
"our enveloping\n",
|
|
|
|
|
"our childhood development\n",
|
|
|
|
|
"our conversations and the structures\n",
|
|
|
|
|
"our interactions\n",
|
|
|
|
|
"their phrases\n",
|
|
|
|
|
"my favorite book\n",
|
|
|
|
|
"his year\n",
|
|
|
|
|
"His year\n",
|
|
|
|
|
"its institutions\n",
|
|
|
|
|
"our senses\n",
|
|
|
|
|
"our sense\n",
|
|
|
|
|
"his lessons\n",
|
|
|
|
|
"our sense\n",
|
|
|
|
|
"our own unique identity\n",
|
|
|
|
|
"our oneness\n",
|
|
|
|
|
"my life\n",
|
|
|
|
|
"his own question\n",
|
|
|
|
|
"his essay\n",
|
|
|
|
|
"our behavior\n",
|
|
|
|
|
"His example\n",
|
|
|
|
|
"his case\n",
|
|
|
|
|
"our lives\n",
|
|
|
|
|
"our lives\n",
|
|
|
|
|
"our expectations\n",
|
|
|
|
|
"our environments\n",
|
|
|
|
|
"our own\n",
|
|
|
|
|
"our Homo ancestors\n",
|
|
|
|
|
"our environment\n",
|
|
|
|
|
"their language\n",
|
|
|
|
|
"their culture and language\n",
|
|
|
|
|
"our familiar environment\n",
|
|
|
|
|
"our world\n",
|
|
|
|
|
"Our preference\n",
|
|
|
|
|
"our fear then itself\n",
|
|
|
|
|
"Our languages and cognitive abilities\n",
|
|
|
|
|
"their relationships\n",
|
|
|
|
|
"our own\n",
|
|
|
|
|
"our species ability\n",
|
|
|
|
|
"our human\n",
|
|
|
|
|
"our greatest fears\n",
|
|
|
|
|
"our greatest treasure\n",
|
|
|
|
|
"his efforts\n"
|
|
|
|
|
]
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"for m in search (\"PRP$ *\", tree):\n",
|
|
|
|
|
" print (f\"{m.string}\")"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 9,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
|
|
|
|
"from pattern.en import wordnet"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 19,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
|
|
|
|
"sense = wordnet.synsets(\"language\")[0]"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 20,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
|
|
|
|
"text/plain": [
|
|
|
|
|
"Synset('communication.n.02')"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
"execution_count": 20,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "execute_result"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"sense.hypernym"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 45,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"name": "stdout",
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
"text": [
|
|
|
|
|
"matching neighbor\n",
|
|
|
|
|
"matching friend\n",
|
|
|
|
|
"matching child\n",
|
|
|
|
|
"matching woman\n",
|
|
|
|
|
"matching prostitute\n",
|
|
|
|
|
"matching man\n",
|
|
|
|
|
"matching man\n",
|
|
|
|
|
"matching guest\n",
|
|
|
|
|
"matching host\n",
|
|
|
|
|
"matching mother\n",
|
|
|
|
|
"matching mother\n",
|
|
|
|
|
"matching father\n",
|
|
|
|
|
"matching mother\n",
|
|
|
|
|
"matching professor\n",
|
|
|
|
|
"matching cowboy\n",
|
|
|
|
|
"matching psychologist\n",
|
|
|
|
|
"matching pilot\n",
|
|
|
|
|
"matching toddler\n",
|
|
|
|
|
"matching mother\n",
|
|
|
|
|
"matching mother\n",
|
|
|
|
|
"matching toddler\n",
|
|
|
|
|
"matching woman\n",
|
|
|
|
|
"matching baby\n",
|
|
|
|
|
"matching child\n",
|
|
|
|
|
"matching mother\n",
|
|
|
|
|
"matching baby\n",
|
|
|
|
|
"matching mother\n",
|
|
|
|
|
"matching child\n",
|
|
|
|
|
"matching mother\n",
|
|
|
|
|
"matching child\n",
|
|
|
|
|
"matching child\n",
|
|
|
|
|
"matching child\n",
|
|
|
|
|
"matching speaker\n",
|
|
|
|
|
"matching speaker\n",
|
|
|
|
|
"matching foreigner\n",
|
|
|
|
|
"matching tourist\n",
|
|
|
|
|
"matching friend\n",
|
|
|
|
|
"matching man\n",
|
|
|
|
|
"matching foreigner\n",
|
|
|
|
|
"matching handyman\n",
|
|
|
|
|
"matching stranger\n",
|
|
|
|
|
"matching Homo\n",
|
|
|
|
|
"matching Homo\n",
|
|
|
|
|
"matching Homo\n"
|
|
|
|
|
]
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"output = []\n",
|
|
|
|
|
"search_word=\"person\"\n",
|
|
|
|
|
"for search_word in search_word.split('|'):\n",
|
|
|
|
|
" synset = wordnet.synsets(search_word)[0]\n",
|
|
|
|
|
" pos = synset.pos\n",
|
|
|
|
|
" possible_words = search(pos, tree)\n",
|
|
|
|
|
" for match in possible_words:\n",
|
|
|
|
|
" # print (f\"match {match}\")\n",
|
|
|
|
|
" word = match[0].string\n",
|
|
|
|
|
" synsets = wordnet.synsets(word)\n",
|
|
|
|
|
" if len(synsets) > 0:\n",
|
|
|
|
|
" hypernyms = synsets[0].hypernyms(recursive=True)\n",
|
|
|
|
|
" if any(search_word == h.senses[0] for h in hypernyms):\n",
|
|
|
|
|
" print(f\"matching {word}\")\n",
|
|
|
|
|
" output.append(word)"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 31,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
|
|
|
|
"text/plain": [
|
|
|
|
|
"['phrase']"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
"execution_count": 31,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "execute_result"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"output"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": null,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": []
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"metadata": {
|
|
|
|
|
"kernelspec": {
|
|
|
|
|
"display_name": "Python 3",
|
|
|
|
|
"language": "python",
|
|
|
|
|
"name": "python3"
|
|
|
|
|
},
|
|
|
|
|
"language_info": {
|
|
|
|
|
"codemirror_mode": {
|
|
|
|
|
"name": "ipython",
|
|
|
|
|
"version": 3
|
|
|
|
|
},
|
|
|
|
|
"file_extension": ".py",
|
|
|
|
|
"mimetype": "text/x-python",
|
|
|
|
|
"name": "python",
|
|
|
|
|
"nbconvert_exporter": "python",
|
|
|
|
|
"pygments_lexer": "ipython3",
|
|
|
|
|
"version": "3.7.3"
|
|
|
|
|
}
|
|
|
|
|
},
|
|
|
|
|
"nbformat": 4,
|
|
|
|
|
"nbformat_minor": 4
|
|
|
|
|
}
|