{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Wordnet" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import random\n", "import nltk\n", "from nltk.corpus import wordnet" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "# You only need to run this once\n", "# nltk.download('wordnet')" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Access to hardware functions is limited not only through the software application, but through the syntax the software application may use for storing and transmitting the information it processes.\n", "\n" ] } ], "source": [ "lines = open('../txt/language.txt').readlines()\n", "sentence = random.choice(lines)\n", "print(sentence)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "----------\n", "word: Access\n", "synset: Synset('entree.n.02')\n", "lemmas: \n", "synset: Synset('access.n.02')\n", "lemmas: \n", "synset: Synset('access.n.03')\n", "lemmas: \n", "synset: Synset('access.n.04')\n", "lemmas: \n", "synset: Synset('access.n.05')\n", "lemmas: \n", "synset: Synset('access.n.06')\n", "lemmas: \n", "synset: Synset('access.v.01')\n", "lemmas: \n", "synset: Synset('access.v.02')\n", "lemmas: \n", "----------\n", "word: to\n", "----------\n", "word: hardware\n", "synset: Synset('hardware.n.01')\n", "lemmas: \n", "synset: Synset('hardware.n.02')\n", "lemmas: \n", "synset: Synset('hardware.n.03')\n", "lemmas: \n", "----------\n", "word: functions\n", "synset: Synset('function.n.01')\n", "lemmas: \n", "synset: Synset('function.n.02')\n", "lemmas: \n", "synset: Synset('function.n.03')\n", "lemmas: \n", "synset: Synset('function.n.04')\n", "lemmas: \n", "synset: Synset('function.n.05')\n", "lemmas: \n", "synset: Synset('affair.n.03')\n", "lemmas: \n", "synset: Synset('routine.n.03')\n", "lemmas: \n", "synset: Synset('function.v.01')\n", "lemmas: \n", "synset: Synset('serve.v.01')\n", "lemmas: \n", "synset: Synset('officiate.v.02')\n", "lemmas: \n", "----------\n", "word: is\n", "synset: Synset('be.v.01')\n", "lemmas: \n", "synset: Synset('be.v.02')\n", "lemmas: \n", "synset: Synset('be.v.03')\n", "lemmas: \n", "synset: Synset('exist.v.01')\n", "lemmas: \n", "synset: Synset('be.v.05')\n", "lemmas: \n", "synset: Synset('equal.v.01')\n", "lemmas: \n", "synset: Synset('constitute.v.01')\n", "lemmas: \n", "synset: Synset('be.v.08')\n", "lemmas: \n", "synset: Synset('embody.v.02')\n", "lemmas: \n", "synset: Synset('be.v.10')\n", "lemmas: \n", "synset: Synset('be.v.11')\n", "lemmas: \n", "synset: Synset('be.v.12')\n", "lemmas: \n", "synset: Synset('cost.v.01')\n", "lemmas: \n", "----------\n", "word: limited\n", "synset: Synset('express.n.02')\n", "lemmas: \n", "synset: Synset('restrict.v.03')\n", "lemmas: \n", "synset: Synset('limit.v.02')\n", "lemmas: \n", "synset: Synset('specify.v.02')\n", "lemmas: \n", "synset: Synset('limited.a.01')\n", "lemmas: \n", "synset: Synset('circumscribed.s.01')\n", "lemmas: \n", "synset: Synset('limited.s.03')\n", "lemmas: \n", "synset: Synset('limited.s.04')\n", "lemmas: \n", "synset: Synset('limited.s.05')\n", "lemmas: \n", "synset: Synset('limited.s.06')\n", "lemmas: \n", "synset: Synset('limited.s.07')\n", "lemmas: \n", "----------\n", "word: not\n", "synset: Synset('not.r.01')\n", "lemmas: \n", "----------\n", "word: only\n", "synset: Synset('lone.s.03')\n", "lemmas: \n", "synset: Synset('alone.s.03')\n", "lemmas: \n", "synset: Synset('merely.r.01')\n", "lemmas: \n", "synset: Synset('entirely.r.02')\n", "lemmas: \n", "synset: Synset('only.r.03')\n", "lemmas: \n", "synset: Synset('only.r.04')\n", "lemmas: \n", "synset: Synset('only.r.05')\n", "lemmas: \n", "synset: Synset('only.r.06')\n", "lemmas: \n", "synset: Synset('only.r.07')\n", "lemmas: \n", "----------\n", "word: through\n", "synset: Synset('done.s.01')\n", "lemmas: \n", "synset: Synset('through.s.02')\n", "lemmas: \n", "synset: Synset('through.r.01')\n", "lemmas: \n", "synset: Synset('through.r.02')\n", "lemmas: \n", "synset: Synset('through.r.03')\n", "lemmas: \n", "synset: Synset('through.r.04')\n", "lemmas: \n", "synset: Synset('through.r.05')\n", "lemmas: \n", "----------\n", "word: the\n", "----------\n", "word: software\n", "synset: Synset('software.n.01')\n", "lemmas: \n", "----------\n", "word: application,\n", "----------\n", "word: but\n", "synset: Synset('merely.r.01')\n", "lemmas: \n", "----------\n", "word: through\n", "synset: Synset('done.s.01')\n", "lemmas: \n", "synset: Synset('through.s.02')\n", "lemmas: \n", "synset: Synset('through.r.01')\n", "lemmas: \n", "synset: Synset('through.r.02')\n", "lemmas: \n", "synset: Synset('through.r.03')\n", "lemmas: \n", "synset: Synset('through.r.04')\n", "lemmas: \n", "synset: Synset('through.r.05')\n", "lemmas: \n", "----------\n", "word: the\n", "----------\n", "word: syntax\n", "synset: Synset('syntax.n.01')\n", "lemmas: \n", "synset: Synset('syntax.n.02')\n", "lemmas: \n", "synset: Synset('syntax.n.03')\n", "lemmas: \n", "----------\n", "word: the\n", "----------\n", "word: software\n", "synset: Synset('software.n.01')\n", "lemmas: \n", "----------\n", "word: application\n", "synset: Synset('application.n.01')\n", "lemmas: \n", "synset: Synset('application.n.02')\n", "lemmas: \n", "synset: Synset('application.n.03')\n", "lemmas: \n", "synset: Synset('application.n.04')\n", "lemmas: \n", "synset: Synset('lotion.n.02')\n", "lemmas: \n", "synset: Synset('application.n.06')\n", "lemmas: \n", "synset: Synset('application.n.07')\n", "lemmas: \n", "----------\n", "word: may\n", "synset: Synset('may.n.01')\n", "lemmas: \n", "synset: Synset('whitethorn.n.01')\n", "lemmas: \n", "----------\n", "word: use\n", "synset: Synset('use.n.01')\n", "lemmas: \n", "synset: Synset('function.n.02')\n", "lemmas: \n", "synset: Synset('use.n.03')\n", "lemmas: \n", "synset: Synset('consumption.n.03')\n", "lemmas: \n", "synset: Synset('habit.n.02')\n", "lemmas: \n", "synset: Synset('manipulation.n.01')\n", "lemmas: \n", "synset: Synset('use.n.07')\n", "lemmas: \n", "synset: Synset('use.v.01')\n", "lemmas: \n", "synset: Synset('use.v.02')\n", "lemmas: \n", "synset: Synset('use.v.03')\n", "lemmas: \n", "synset: Synset('use.v.04')\n", "lemmas: \n", "synset: Synset('practice.v.04')\n", "lemmas: \n", "synset: Synset('use.v.06')\n", "lemmas: \n", "----------\n", "word: for\n", "----------\n", "word: storing\n", "synset: Synset('store.v.01')\n", "lemmas: \n", "synset: Synset('store.v.02')\n", "lemmas: \n", "----------\n", "word: and\n", "----------\n", "word: transmitting\n", "synset: Synset('transmission.n.01')\n", "lemmas: \n", "synset: Synset('convey.v.03')\n", "lemmas: \n", "synset: Synset('impart.v.03')\n", "lemmas: \n", "synset: Synset('air.v.03')\n", "lemmas: \n", "synset: Synset('transmit.v.04')\n", "lemmas: \n", "----------\n", "word: the\n", "----------\n", "word: information\n", "synset: Synset('information.n.01')\n", "lemmas: \n", "synset: Synset('information.n.02')\n", "lemmas: \n", "synset: Synset('information.n.03')\n", "lemmas: \n", "synset: Synset('data.n.01')\n", "lemmas: \n", "synset: Synset('information.n.05')\n", "lemmas: \n", "----------\n", "word: it\n", "synset: Synset('information_technology.n.01')\n", "lemmas: \n", "----------\n", "word: processes.\n" ] } ], "source": [ "words = sentence.split()\n", "for word in words:\n", " print('----------')\n", " print('word:', word)\n", " for synset in wordnet.synsets(word):\n", " print('synset:', synset)\n", " print('lemmas:', synset.lemma_names)\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "\n", "\n", "\n", "\n" ] } ], "source": [ "for lemma in wordnet.lemmas('car'):\n", " print(lemma.name)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "\n", "\n", "\n", "\n" ] } ], "source": [ "for synset in wordnet.synsets('car'):\n", " print(synset.examples)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.3" } }, "nbformat": 4, "nbformat_minor": 4 }