You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

434 lines
18 KiB
Plaintext

{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Wordnet"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import random\n",
"import nltk\n",
"from nltk.corpus import wordnet"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"# You only need to run this once\n",
"# nltk.download('wordnet')"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Access to hardware functions is limited not only through the software application, but through the syntax the software application may use for storing and transmitting the information it processes.\n",
"\n"
]
}
],
"source": [
"lines = open('../txt/language.txt').readlines()\n",
"sentence = random.choice(lines)\n",
"print(sentence)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"----------\n",
"word: Access\n",
"synset: Synset('entree.n.02')\n",
"lemmas: <bound method Synset.lemma_names of Synset('entree.n.02')>\n",
"synset: Synset('access.n.02')\n",
"lemmas: <bound method Synset.lemma_names of Synset('access.n.02')>\n",
"synset: Synset('access.n.03')\n",
"lemmas: <bound method Synset.lemma_names of Synset('access.n.03')>\n",
"synset: Synset('access.n.04')\n",
"lemmas: <bound method Synset.lemma_names of Synset('access.n.04')>\n",
"synset: Synset('access.n.05')\n",
"lemmas: <bound method Synset.lemma_names of Synset('access.n.05')>\n",
"synset: Synset('access.n.06')\n",
"lemmas: <bound method Synset.lemma_names of Synset('access.n.06')>\n",
"synset: Synset('access.v.01')\n",
"lemmas: <bound method Synset.lemma_names of Synset('access.v.01')>\n",
"synset: Synset('access.v.02')\n",
"lemmas: <bound method Synset.lemma_names of Synset('access.v.02')>\n",
"----------\n",
"word: to\n",
"----------\n",
"word: hardware\n",
"synset: Synset('hardware.n.01')\n",
"lemmas: <bound method Synset.lemma_names of Synset('hardware.n.01')>\n",
"synset: Synset('hardware.n.02')\n",
"lemmas: <bound method Synset.lemma_names of Synset('hardware.n.02')>\n",
"synset: Synset('hardware.n.03')\n",
"lemmas: <bound method Synset.lemma_names of Synset('hardware.n.03')>\n",
"----------\n",
"word: functions\n",
"synset: Synset('function.n.01')\n",
"lemmas: <bound method Synset.lemma_names of Synset('function.n.01')>\n",
"synset: Synset('function.n.02')\n",
"lemmas: <bound method Synset.lemma_names of Synset('function.n.02')>\n",
"synset: Synset('function.n.03')\n",
"lemmas: <bound method Synset.lemma_names of Synset('function.n.03')>\n",
"synset: Synset('function.n.04')\n",
"lemmas: <bound method Synset.lemma_names of Synset('function.n.04')>\n",
"synset: Synset('function.n.05')\n",
"lemmas: <bound method Synset.lemma_names of Synset('function.n.05')>\n",
"synset: Synset('affair.n.03')\n",
"lemmas: <bound method Synset.lemma_names of Synset('affair.n.03')>\n",
"synset: Synset('routine.n.03')\n",
"lemmas: <bound method Synset.lemma_names of Synset('routine.n.03')>\n",
"synset: Synset('function.v.01')\n",
"lemmas: <bound method Synset.lemma_names of Synset('function.v.01')>\n",
"synset: Synset('serve.v.01')\n",
"lemmas: <bound method Synset.lemma_names of Synset('serve.v.01')>\n",
"synset: Synset('officiate.v.02')\n",
"lemmas: <bound method Synset.lemma_names of Synset('officiate.v.02')>\n",
"----------\n",
"word: is\n",
"synset: Synset('be.v.01')\n",
"lemmas: <bound method Synset.lemma_names of Synset('be.v.01')>\n",
"synset: Synset('be.v.02')\n",
"lemmas: <bound method Synset.lemma_names of Synset('be.v.02')>\n",
"synset: Synset('be.v.03')\n",
"lemmas: <bound method Synset.lemma_names of Synset('be.v.03')>\n",
"synset: Synset('exist.v.01')\n",
"lemmas: <bound method Synset.lemma_names of Synset('exist.v.01')>\n",
"synset: Synset('be.v.05')\n",
"lemmas: <bound method Synset.lemma_names of Synset('be.v.05')>\n",
"synset: Synset('equal.v.01')\n",
"lemmas: <bound method Synset.lemma_names of Synset('equal.v.01')>\n",
"synset: Synset('constitute.v.01')\n",
"lemmas: <bound method Synset.lemma_names of Synset('constitute.v.01')>\n",
"synset: Synset('be.v.08')\n",
"lemmas: <bound method Synset.lemma_names of Synset('be.v.08')>\n",
"synset: Synset('embody.v.02')\n",
"lemmas: <bound method Synset.lemma_names of Synset('embody.v.02')>\n",
"synset: Synset('be.v.10')\n",
"lemmas: <bound method Synset.lemma_names of Synset('be.v.10')>\n",
"synset: Synset('be.v.11')\n",
"lemmas: <bound method Synset.lemma_names of Synset('be.v.11')>\n",
"synset: Synset('be.v.12')\n",
"lemmas: <bound method Synset.lemma_names of Synset('be.v.12')>\n",
"synset: Synset('cost.v.01')\n",
"lemmas: <bound method Synset.lemma_names of Synset('cost.v.01')>\n",
"----------\n",
"word: limited\n",
"synset: Synset('express.n.02')\n",
"lemmas: <bound method Synset.lemma_names of Synset('express.n.02')>\n",
"synset: Synset('restrict.v.03')\n",
"lemmas: <bound method Synset.lemma_names of Synset('restrict.v.03')>\n",
"synset: Synset('limit.v.02')\n",
"lemmas: <bound method Synset.lemma_names of Synset('limit.v.02')>\n",
"synset: Synset('specify.v.02')\n",
"lemmas: <bound method Synset.lemma_names of Synset('specify.v.02')>\n",
"synset: Synset('limited.a.01')\n",
"lemmas: <bound method Synset.lemma_names of Synset('limited.a.01')>\n",
"synset: Synset('circumscribed.s.01')\n",
"lemmas: <bound method Synset.lemma_names of Synset('circumscribed.s.01')>\n",
"synset: Synset('limited.s.03')\n",
"lemmas: <bound method Synset.lemma_names of Synset('limited.s.03')>\n",
"synset: Synset('limited.s.04')\n",
"lemmas: <bound method Synset.lemma_names of Synset('limited.s.04')>\n",
"synset: Synset('limited.s.05')\n",
"lemmas: <bound method Synset.lemma_names of Synset('limited.s.05')>\n",
"synset: Synset('limited.s.06')\n",
"lemmas: <bound method Synset.lemma_names of Synset('limited.s.06')>\n",
"synset: Synset('limited.s.07')\n",
"lemmas: <bound method Synset.lemma_names of Synset('limited.s.07')>\n",
"----------\n",
"word: not\n",
"synset: Synset('not.r.01')\n",
"lemmas: <bound method Synset.lemma_names of Synset('not.r.01')>\n",
"----------\n",
"word: only\n",
"synset: Synset('lone.s.03')\n",
"lemmas: <bound method Synset.lemma_names of Synset('lone.s.03')>\n",
"synset: Synset('alone.s.03')\n",
"lemmas: <bound method Synset.lemma_names of Synset('alone.s.03')>\n",
"synset: Synset('merely.r.01')\n",
"lemmas: <bound method Synset.lemma_names of Synset('merely.r.01')>\n",
"synset: Synset('entirely.r.02')\n",
"lemmas: <bound method Synset.lemma_names of Synset('entirely.r.02')>\n",
"synset: Synset('only.r.03')\n",
"lemmas: <bound method Synset.lemma_names of Synset('only.r.03')>\n",
"synset: Synset('only.r.04')\n",
"lemmas: <bound method Synset.lemma_names of Synset('only.r.04')>\n",
"synset: Synset('only.r.05')\n",
"lemmas: <bound method Synset.lemma_names of Synset('only.r.05')>\n",
"synset: Synset('only.r.06')\n",
"lemmas: <bound method Synset.lemma_names of Synset('only.r.06')>\n",
"synset: Synset('only.r.07')\n",
"lemmas: <bound method Synset.lemma_names of Synset('only.r.07')>\n",
"----------\n",
"word: through\n",
"synset: Synset('done.s.01')\n",
"lemmas: <bound method Synset.lemma_names of Synset('done.s.01')>\n",
"synset: Synset('through.s.02')\n",
"lemmas: <bound method Synset.lemma_names of Synset('through.s.02')>\n",
"synset: Synset('through.r.01')\n",
"lemmas: <bound method Synset.lemma_names of Synset('through.r.01')>\n",
"synset: Synset('through.r.02')\n",
"lemmas: <bound method Synset.lemma_names of Synset('through.r.02')>\n",
"synset: Synset('through.r.03')\n",
"lemmas: <bound method Synset.lemma_names of Synset('through.r.03')>\n",
"synset: Synset('through.r.04')\n",
"lemmas: <bound method Synset.lemma_names of Synset('through.r.04')>\n",
"synset: Synset('through.r.05')\n",
"lemmas: <bound method Synset.lemma_names of Synset('through.r.05')>\n",
"----------\n",
"word: the\n",
"----------\n",
"word: software\n",
"synset: Synset('software.n.01')\n",
"lemmas: <bound method Synset.lemma_names of Synset('software.n.01')>\n",
"----------\n",
"word: application,\n",
"----------\n",
"word: but\n",
"synset: Synset('merely.r.01')\n",
"lemmas: <bound method Synset.lemma_names of Synset('merely.r.01')>\n",
"----------\n",
"word: through\n",
"synset: Synset('done.s.01')\n",
"lemmas: <bound method Synset.lemma_names of Synset('done.s.01')>\n",
"synset: Synset('through.s.02')\n",
"lemmas: <bound method Synset.lemma_names of Synset('through.s.02')>\n",
"synset: Synset('through.r.01')\n",
"lemmas: <bound method Synset.lemma_names of Synset('through.r.01')>\n",
"synset: Synset('through.r.02')\n",
"lemmas: <bound method Synset.lemma_names of Synset('through.r.02')>\n",
"synset: Synset('through.r.03')\n",
"lemmas: <bound method Synset.lemma_names of Synset('through.r.03')>\n",
"synset: Synset('through.r.04')\n",
"lemmas: <bound method Synset.lemma_names of Synset('through.r.04')>\n",
"synset: Synset('through.r.05')\n",
"lemmas: <bound method Synset.lemma_names of Synset('through.r.05')>\n",
"----------\n",
"word: the\n",
"----------\n",
"word: syntax\n",
"synset: Synset('syntax.n.01')\n",
"lemmas: <bound method Synset.lemma_names of Synset('syntax.n.01')>\n",
"synset: Synset('syntax.n.02')\n",
"lemmas: <bound method Synset.lemma_names of Synset('syntax.n.02')>\n",
"synset: Synset('syntax.n.03')\n",
"lemmas: <bound method Synset.lemma_names of Synset('syntax.n.03')>\n",
"----------\n",
"word: the\n",
"----------\n",
"word: software\n",
"synset: Synset('software.n.01')\n",
"lemmas: <bound method Synset.lemma_names of Synset('software.n.01')>\n",
"----------\n",
"word: application\n",
"synset: Synset('application.n.01')\n",
"lemmas: <bound method Synset.lemma_names of Synset('application.n.01')>\n",
"synset: Synset('application.n.02')\n",
"lemmas: <bound method Synset.lemma_names of Synset('application.n.02')>\n",
"synset: Synset('application.n.03')\n",
"lemmas: <bound method Synset.lemma_names of Synset('application.n.03')>\n",
"synset: Synset('application.n.04')\n",
"lemmas: <bound method Synset.lemma_names of Synset('application.n.04')>\n",
"synset: Synset('lotion.n.02')\n",
"lemmas: <bound method Synset.lemma_names of Synset('lotion.n.02')>\n",
"synset: Synset('application.n.06')\n",
"lemmas: <bound method Synset.lemma_names of Synset('application.n.06')>\n",
"synset: Synset('application.n.07')\n",
"lemmas: <bound method Synset.lemma_names of Synset('application.n.07')>\n",
"----------\n",
"word: may\n",
"synset: Synset('may.n.01')\n",
"lemmas: <bound method Synset.lemma_names of Synset('may.n.01')>\n",
"synset: Synset('whitethorn.n.01')\n",
"lemmas: <bound method Synset.lemma_names of Synset('whitethorn.n.01')>\n",
"----------\n",
"word: use\n",
"synset: Synset('use.n.01')\n",
"lemmas: <bound method Synset.lemma_names of Synset('use.n.01')>\n",
"synset: Synset('function.n.02')\n",
"lemmas: <bound method Synset.lemma_names of Synset('function.n.02')>\n",
"synset: Synset('use.n.03')\n",
"lemmas: <bound method Synset.lemma_names of Synset('use.n.03')>\n",
"synset: Synset('consumption.n.03')\n",
"lemmas: <bound method Synset.lemma_names of Synset('consumption.n.03')>\n",
"synset: Synset('habit.n.02')\n",
"lemmas: <bound method Synset.lemma_names of Synset('habit.n.02')>\n",
"synset: Synset('manipulation.n.01')\n",
"lemmas: <bound method Synset.lemma_names of Synset('manipulation.n.01')>\n",
"synset: Synset('use.n.07')\n",
"lemmas: <bound method Synset.lemma_names of Synset('use.n.07')>\n",
"synset: Synset('use.v.01')\n",
"lemmas: <bound method Synset.lemma_names of Synset('use.v.01')>\n",
"synset: Synset('use.v.02')\n",
"lemmas: <bound method Synset.lemma_names of Synset('use.v.02')>\n",
"synset: Synset('use.v.03')\n",
"lemmas: <bound method Synset.lemma_names of Synset('use.v.03')>\n",
"synset: Synset('use.v.04')\n",
"lemmas: <bound method Synset.lemma_names of Synset('use.v.04')>\n",
"synset: Synset('practice.v.04')\n",
"lemmas: <bound method Synset.lemma_names of Synset('practice.v.04')>\n",
"synset: Synset('use.v.06')\n",
"lemmas: <bound method Synset.lemma_names of Synset('use.v.06')>\n",
"----------\n",
"word: for\n",
"----------\n",
"word: storing\n",
"synset: Synset('store.v.01')\n",
"lemmas: <bound method Synset.lemma_names of Synset('store.v.01')>\n",
"synset: Synset('store.v.02')\n",
"lemmas: <bound method Synset.lemma_names of Synset('store.v.02')>\n",
"----------\n",
"word: and\n",
"----------\n",
"word: transmitting\n",
"synset: Synset('transmission.n.01')\n",
"lemmas: <bound method Synset.lemma_names of Synset('transmission.n.01')>\n",
"synset: Synset('convey.v.03')\n",
"lemmas: <bound method Synset.lemma_names of Synset('convey.v.03')>\n",
"synset: Synset('impart.v.03')\n",
"lemmas: <bound method Synset.lemma_names of Synset('impart.v.03')>\n",
"synset: Synset('air.v.03')\n",
"lemmas: <bound method Synset.lemma_names of Synset('air.v.03')>\n",
"synset: Synset('transmit.v.04')\n",
"lemmas: <bound method Synset.lemma_names of Synset('transmit.v.04')>\n",
"----------\n",
"word: the\n",
"----------\n",
"word: information\n",
"synset: Synset('information.n.01')\n",
"lemmas: <bound method Synset.lemma_names of Synset('information.n.01')>\n",
"synset: Synset('information.n.02')\n",
"lemmas: <bound method Synset.lemma_names of Synset('information.n.02')>\n",
"synset: Synset('information.n.03')\n",
"lemmas: <bound method Synset.lemma_names of Synset('information.n.03')>\n",
"synset: Synset('data.n.01')\n",
"lemmas: <bound method Synset.lemma_names of Synset('data.n.01')>\n",
"synset: Synset('information.n.05')\n",
"lemmas: <bound method Synset.lemma_names of Synset('information.n.05')>\n",
"----------\n",
"word: it\n",
"synset: Synset('information_technology.n.01')\n",
"lemmas: <bound method Synset.lemma_names of Synset('information_technology.n.01')>\n",
"----------\n",
"word: processes.\n"
]
}
],
"source": [
"words = sentence.split()\n",
"for word in words:\n",
" print('----------')\n",
" print('word:', word)\n",
" for synset in wordnet.synsets(word):\n",
" print('synset:', synset)\n",
" print('lemmas:', synset.lemma_names)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<bound method Lemma.name of Lemma('car.n.01.car')>\n",
"<bound method Lemma.name of Lemma('car.n.02.car')>\n",
"<bound method Lemma.name of Lemma('car.n.03.car')>\n",
"<bound method Lemma.name of Lemma('car.n.04.car')>\n",
"<bound method Lemma.name of Lemma('cable_car.n.01.car')>\n"
]
}
],
"source": [
"for lemma in wordnet.lemmas('car'):\n",
" print(lemma.name)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<bound method Synset.examples of Synset('car.n.01')>\n",
"<bound method Synset.examples of Synset('car.n.02')>\n",
"<bound method Synset.examples of Synset('car.n.03')>\n",
"<bound method Synset.examples of Synset('car.n.04')>\n",
"<bound method Synset.examples of Synset('cable_car.n.01')>\n"
]
}
],
"source": [
"for synset in wordnet.synsets('car'):\n",
" print(synset.examples)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
}
},
"nbformat": 4,
"nbformat_minor": 4
}