trying to solve special conflicts
commit
6d2117b3ed
File diff suppressed because one or more lines are too long
@ -0,0 +1,195 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# Markdown - HTML - print"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import pypandoc\n",
|
||||||
|
"from weasyprint import HTML, CSS\n",
|
||||||
|
"from weasyprint.fonts import FontConfiguration"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Markdown → HTML"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Pandoc: \"If you need to convert files from one markup format into another, **pandoc is your swiss-army knife**.\"\n",
|
||||||
|
"\n",
|
||||||
|
"https://pandoc.org/\n",
|
||||||
|
"\n",
|
||||||
|
"The Python library for Pandoc:\n",
|
||||||
|
"\n",
|
||||||
|
"https://github.com/bebraw/pypandoc \n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Convert a Markdown file to HTML ...\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"scrolled": true
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# ... directly from a file\n",
|
||||||
|
"html = pypandoc.convert_file('language.md', 'html')\n",
|
||||||
|
"print(html)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# ... or from a pad\n",
|
||||||
|
"\n",
|
||||||
|
"from urllib.request import urlopen\n",
|
||||||
|
"\n",
|
||||||
|
"url = 'https://pad.xpub.nl/p/language/export/txt'\n",
|
||||||
|
"response = urlopen(url)\n",
|
||||||
|
"md = response.read().decode('UTF-8')\n",
|
||||||
|
"\n",
|
||||||
|
"with open('language.md', 'w') as f:\n",
|
||||||
|
" f.write(md)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"scrolled": true
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"html = pypandoc.convert_file('language.md', 'html')\n",
|
||||||
|
"print(html)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## HTML → PDF"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"for this we can use Weasyprint again"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"html = HTML(string=html)\n",
|
||||||
|
"print(html)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"css = CSS(string='''\n",
|
||||||
|
"@page{\n",
|
||||||
|
" size: A4;\n",
|
||||||
|
" margin: 15mm;\n",
|
||||||
|
" \n",
|
||||||
|
" counter-increment: page;\n",
|
||||||
|
" \n",
|
||||||
|
" @top-left{\n",
|
||||||
|
" content: \"hello?\";\n",
|
||||||
|
" }\n",
|
||||||
|
" @top-center{\n",
|
||||||
|
" content: counter(page);\n",
|
||||||
|
" font-size: 7pt;\n",
|
||||||
|
" font-family: monospace;\n",
|
||||||
|
" color: blue;\n",
|
||||||
|
" }\n",
|
||||||
|
" @bottom-center{\n",
|
||||||
|
" content: \"this is the bottom center!\";\n",
|
||||||
|
" }\n",
|
||||||
|
" }\n",
|
||||||
|
" \n",
|
||||||
|
" body{\n",
|
||||||
|
" color: magenta;\n",
|
||||||
|
" }\n",
|
||||||
|
"''')"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"It's actually interesting and useful to have a close look at paged media properties in CSS: \n",
|
||||||
|
"\n",
|
||||||
|
"https://developer.mozilla.org/en-US/docs/Web/CSS/%40page/size"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"html.write_pdf('language.pdf', stylesheets=[css])"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": []
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.7.3"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 4
|
||||||
|
}
|
@ -0,0 +1,163 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# NLTK pos-tagged HTML → PDF"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import nltk\n",
|
||||||
|
"from weasyprint import HTML, CSS"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# open the input file\n",
|
||||||
|
"txt = open('../txt/language.txt').read()\n",
|
||||||
|
"words = nltk.word_tokenize(txt)\n",
|
||||||
|
"tagged_words = nltk.pos_tag(words)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# collect all the pieces of HTML\n",
|
||||||
|
"content = ''\n",
|
||||||
|
"content += '<h1>Language and Software Studies, by Florian Cramer</h1>'\n",
|
||||||
|
"\n",
|
||||||
|
"for word, tag in tagged_words:\n",
|
||||||
|
" content += f'<span class=\"{ tag }\">{ word }</span> '"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# write the HTML file\n",
|
||||||
|
"with open(\"language.html\", \"w\") as f:\n",
|
||||||
|
" f.write(f\"\"\"<!DOCTYPE html>\n",
|
||||||
|
"<html>\n",
|
||||||
|
"<head>\n",
|
||||||
|
" <meta charset=\"utf-8\">\n",
|
||||||
|
" <link rel=\"stylesheet\" type=\"text/css\" href=\"language.css\">\n",
|
||||||
|
" <title></title>\n",
|
||||||
|
"</head>\n",
|
||||||
|
"<body>\n",
|
||||||
|
"{ content }\n",
|
||||||
|
"</body>\n",
|
||||||
|
"\"\"\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# write a CSS file\n",
|
||||||
|
"with open(\"language.css\", \"w\") as f:\n",
|
||||||
|
" f.write(\"\"\"\n",
|
||||||
|
"\n",
|
||||||
|
"@page{\n",
|
||||||
|
" size:A4;\n",
|
||||||
|
" background-color:lightgrey;\n",
|
||||||
|
" margin:10mm;\n",
|
||||||
|
"}\n",
|
||||||
|
".JJ{\n",
|
||||||
|
" color:red;\n",
|
||||||
|
"}\n",
|
||||||
|
".VB,\n",
|
||||||
|
".VBG{\n",
|
||||||
|
" color:magenta;\n",
|
||||||
|
"}\n",
|
||||||
|
".NN,\n",
|
||||||
|
".NNP{\n",
|
||||||
|
" color:green;\n",
|
||||||
|
"}\n",
|
||||||
|
".EX{\n",
|
||||||
|
" color: blue;\n",
|
||||||
|
"}\n",
|
||||||
|
" \"\"\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# If you use @font-face in your stylesheet, you would need Weasyprint's FontConfiguration()\n",
|
||||||
|
"from weasyprint.fonts import FontConfiguration\n",
|
||||||
|
"\n",
|
||||||
|
"font_config = FontConfiguration()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# collect all the files and write the PDF\n",
|
||||||
|
"html = HTML(\"language.html\")\n",
|
||||||
|
"css = CSS(\"language.css\")\n",
|
||||||
|
"html.write_pdf('language.pdf', stylesheets=[css], font_config=font_config)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Preview your PDF in the notebook!\n",
|
||||||
|
"from IPython.display import IFrame, display\n",
|
||||||
|
"IFrame(\"language.pdf\", width=900, height=600)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": []
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.7.3"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 4
|
||||||
|
}
|
File diff suppressed because one or more lines are too long
Binary file not shown.
@ -1,433 +0,0 @@
|
|||||||
{
|
|
||||||
"cells": [
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"# Wordnet"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 1,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"import random\n",
|
|
||||||
"import nltk\n",
|
|
||||||
"from nltk.corpus import wordnet"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 2,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# You only need to run this once\n",
|
|
||||||
"# nltk.download('wordnet')"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 4,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"name": "stdout",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"Access to hardware functions is limited not only through the software application, but through the syntax the software application may use for storing and transmitting the information it processes.\n",
|
|
||||||
"\n"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"lines = open('../txt/language.txt').readlines()\n",
|
|
||||||
"sentence = random.choice(lines)\n",
|
|
||||||
"print(sentence)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 5,
|
|
||||||
"metadata": {
|
|
||||||
"scrolled": true
|
|
||||||
},
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"name": "stdout",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"----------\n",
|
|
||||||
"word: Access\n",
|
|
||||||
"synset: Synset('entree.n.02')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('entree.n.02')>\n",
|
|
||||||
"synset: Synset('access.n.02')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('access.n.02')>\n",
|
|
||||||
"synset: Synset('access.n.03')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('access.n.03')>\n",
|
|
||||||
"synset: Synset('access.n.04')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('access.n.04')>\n",
|
|
||||||
"synset: Synset('access.n.05')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('access.n.05')>\n",
|
|
||||||
"synset: Synset('access.n.06')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('access.n.06')>\n",
|
|
||||||
"synset: Synset('access.v.01')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('access.v.01')>\n",
|
|
||||||
"synset: Synset('access.v.02')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('access.v.02')>\n",
|
|
||||||
"----------\n",
|
|
||||||
"word: to\n",
|
|
||||||
"----------\n",
|
|
||||||
"word: hardware\n",
|
|
||||||
"synset: Synset('hardware.n.01')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('hardware.n.01')>\n",
|
|
||||||
"synset: Synset('hardware.n.02')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('hardware.n.02')>\n",
|
|
||||||
"synset: Synset('hardware.n.03')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('hardware.n.03')>\n",
|
|
||||||
"----------\n",
|
|
||||||
"word: functions\n",
|
|
||||||
"synset: Synset('function.n.01')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('function.n.01')>\n",
|
|
||||||
"synset: Synset('function.n.02')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('function.n.02')>\n",
|
|
||||||
"synset: Synset('function.n.03')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('function.n.03')>\n",
|
|
||||||
"synset: Synset('function.n.04')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('function.n.04')>\n",
|
|
||||||
"synset: Synset('function.n.05')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('function.n.05')>\n",
|
|
||||||
"synset: Synset('affair.n.03')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('affair.n.03')>\n",
|
|
||||||
"synset: Synset('routine.n.03')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('routine.n.03')>\n",
|
|
||||||
"synset: Synset('function.v.01')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('function.v.01')>\n",
|
|
||||||
"synset: Synset('serve.v.01')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('serve.v.01')>\n",
|
|
||||||
"synset: Synset('officiate.v.02')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('officiate.v.02')>\n",
|
|
||||||
"----------\n",
|
|
||||||
"word: is\n",
|
|
||||||
"synset: Synset('be.v.01')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('be.v.01')>\n",
|
|
||||||
"synset: Synset('be.v.02')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('be.v.02')>\n",
|
|
||||||
"synset: Synset('be.v.03')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('be.v.03')>\n",
|
|
||||||
"synset: Synset('exist.v.01')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('exist.v.01')>\n",
|
|
||||||
"synset: Synset('be.v.05')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('be.v.05')>\n",
|
|
||||||
"synset: Synset('equal.v.01')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('equal.v.01')>\n",
|
|
||||||
"synset: Synset('constitute.v.01')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('constitute.v.01')>\n",
|
|
||||||
"synset: Synset('be.v.08')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('be.v.08')>\n",
|
|
||||||
"synset: Synset('embody.v.02')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('embody.v.02')>\n",
|
|
||||||
"synset: Synset('be.v.10')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('be.v.10')>\n",
|
|
||||||
"synset: Synset('be.v.11')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('be.v.11')>\n",
|
|
||||||
"synset: Synset('be.v.12')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('be.v.12')>\n",
|
|
||||||
"synset: Synset('cost.v.01')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('cost.v.01')>\n",
|
|
||||||
"----------\n",
|
|
||||||
"word: limited\n",
|
|
||||||
"synset: Synset('express.n.02')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('express.n.02')>\n",
|
|
||||||
"synset: Synset('restrict.v.03')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('restrict.v.03')>\n",
|
|
||||||
"synset: Synset('limit.v.02')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('limit.v.02')>\n",
|
|
||||||
"synset: Synset('specify.v.02')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('specify.v.02')>\n",
|
|
||||||
"synset: Synset('limited.a.01')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('limited.a.01')>\n",
|
|
||||||
"synset: Synset('circumscribed.s.01')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('circumscribed.s.01')>\n",
|
|
||||||
"synset: Synset('limited.s.03')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('limited.s.03')>\n",
|
|
||||||
"synset: Synset('limited.s.04')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('limited.s.04')>\n",
|
|
||||||
"synset: Synset('limited.s.05')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('limited.s.05')>\n",
|
|
||||||
"synset: Synset('limited.s.06')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('limited.s.06')>\n",
|
|
||||||
"synset: Synset('limited.s.07')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('limited.s.07')>\n",
|
|
||||||
"----------\n",
|
|
||||||
"word: not\n",
|
|
||||||
"synset: Synset('not.r.01')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('not.r.01')>\n",
|
|
||||||
"----------\n",
|
|
||||||
"word: only\n",
|
|
||||||
"synset: Synset('lone.s.03')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('lone.s.03')>\n",
|
|
||||||
"synset: Synset('alone.s.03')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('alone.s.03')>\n",
|
|
||||||
"synset: Synset('merely.r.01')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('merely.r.01')>\n",
|
|
||||||
"synset: Synset('entirely.r.02')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('entirely.r.02')>\n",
|
|
||||||
"synset: Synset('only.r.03')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('only.r.03')>\n",
|
|
||||||
"synset: Synset('only.r.04')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('only.r.04')>\n",
|
|
||||||
"synset: Synset('only.r.05')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('only.r.05')>\n",
|
|
||||||
"synset: Synset('only.r.06')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('only.r.06')>\n",
|
|
||||||
"synset: Synset('only.r.07')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('only.r.07')>\n",
|
|
||||||
"----------\n",
|
|
||||||
"word: through\n",
|
|
||||||
"synset: Synset('done.s.01')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('done.s.01')>\n",
|
|
||||||
"synset: Synset('through.s.02')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('through.s.02')>\n",
|
|
||||||
"synset: Synset('through.r.01')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('through.r.01')>\n",
|
|
||||||
"synset: Synset('through.r.02')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('through.r.02')>\n",
|
|
||||||
"synset: Synset('through.r.03')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('through.r.03')>\n",
|
|
||||||
"synset: Synset('through.r.04')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('through.r.04')>\n",
|
|
||||||
"synset: Synset('through.r.05')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('through.r.05')>\n",
|
|
||||||
"----------\n",
|
|
||||||
"word: the\n",
|
|
||||||
"----------\n",
|
|
||||||
"word: software\n",
|
|
||||||
"synset: Synset('software.n.01')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('software.n.01')>\n",
|
|
||||||
"----------\n",
|
|
||||||
"word: application,\n",
|
|
||||||
"----------\n",
|
|
||||||
"word: but\n",
|
|
||||||
"synset: Synset('merely.r.01')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('merely.r.01')>\n",
|
|
||||||
"----------\n",
|
|
||||||
"word: through\n",
|
|
||||||
"synset: Synset('done.s.01')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('done.s.01')>\n",
|
|
||||||
"synset: Synset('through.s.02')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('through.s.02')>\n",
|
|
||||||
"synset: Synset('through.r.01')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('through.r.01')>\n",
|
|
||||||
"synset: Synset('through.r.02')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('through.r.02')>\n",
|
|
||||||
"synset: Synset('through.r.03')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('through.r.03')>\n",
|
|
||||||
"synset: Synset('through.r.04')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('through.r.04')>\n",
|
|
||||||
"synset: Synset('through.r.05')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('through.r.05')>\n",
|
|
||||||
"----------\n",
|
|
||||||
"word: the\n",
|
|
||||||
"----------\n",
|
|
||||||
"word: syntax\n",
|
|
||||||
"synset: Synset('syntax.n.01')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('syntax.n.01')>\n",
|
|
||||||
"synset: Synset('syntax.n.02')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('syntax.n.02')>\n",
|
|
||||||
"synset: Synset('syntax.n.03')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('syntax.n.03')>\n",
|
|
||||||
"----------\n",
|
|
||||||
"word: the\n",
|
|
||||||
"----------\n",
|
|
||||||
"word: software\n",
|
|
||||||
"synset: Synset('software.n.01')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('software.n.01')>\n",
|
|
||||||
"----------\n",
|
|
||||||
"word: application\n",
|
|
||||||
"synset: Synset('application.n.01')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('application.n.01')>\n",
|
|
||||||
"synset: Synset('application.n.02')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('application.n.02')>\n",
|
|
||||||
"synset: Synset('application.n.03')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('application.n.03')>\n",
|
|
||||||
"synset: Synset('application.n.04')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('application.n.04')>\n",
|
|
||||||
"synset: Synset('lotion.n.02')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('lotion.n.02')>\n",
|
|
||||||
"synset: Synset('application.n.06')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('application.n.06')>\n",
|
|
||||||
"synset: Synset('application.n.07')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('application.n.07')>\n",
|
|
||||||
"----------\n",
|
|
||||||
"word: may\n",
|
|
||||||
"synset: Synset('may.n.01')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('may.n.01')>\n",
|
|
||||||
"synset: Synset('whitethorn.n.01')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('whitethorn.n.01')>\n",
|
|
||||||
"----------\n",
|
|
||||||
"word: use\n",
|
|
||||||
"synset: Synset('use.n.01')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('use.n.01')>\n",
|
|
||||||
"synset: Synset('function.n.02')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('function.n.02')>\n",
|
|
||||||
"synset: Synset('use.n.03')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('use.n.03')>\n",
|
|
||||||
"synset: Synset('consumption.n.03')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('consumption.n.03')>\n",
|
|
||||||
"synset: Synset('habit.n.02')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('habit.n.02')>\n",
|
|
||||||
"synset: Synset('manipulation.n.01')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('manipulation.n.01')>\n",
|
|
||||||
"synset: Synset('use.n.07')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('use.n.07')>\n",
|
|
||||||
"synset: Synset('use.v.01')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('use.v.01')>\n",
|
|
||||||
"synset: Synset('use.v.02')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('use.v.02')>\n",
|
|
||||||
"synset: Synset('use.v.03')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('use.v.03')>\n",
|
|
||||||
"synset: Synset('use.v.04')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('use.v.04')>\n",
|
|
||||||
"synset: Synset('practice.v.04')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('practice.v.04')>\n",
|
|
||||||
"synset: Synset('use.v.06')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('use.v.06')>\n",
|
|
||||||
"----------\n",
|
|
||||||
"word: for\n",
|
|
||||||
"----------\n",
|
|
||||||
"word: storing\n",
|
|
||||||
"synset: Synset('store.v.01')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('store.v.01')>\n",
|
|
||||||
"synset: Synset('store.v.02')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('store.v.02')>\n",
|
|
||||||
"----------\n",
|
|
||||||
"word: and\n",
|
|
||||||
"----------\n",
|
|
||||||
"word: transmitting\n",
|
|
||||||
"synset: Synset('transmission.n.01')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('transmission.n.01')>\n",
|
|
||||||
"synset: Synset('convey.v.03')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('convey.v.03')>\n",
|
|
||||||
"synset: Synset('impart.v.03')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('impart.v.03')>\n",
|
|
||||||
"synset: Synset('air.v.03')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('air.v.03')>\n",
|
|
||||||
"synset: Synset('transmit.v.04')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('transmit.v.04')>\n",
|
|
||||||
"----------\n",
|
|
||||||
"word: the\n",
|
|
||||||
"----------\n",
|
|
||||||
"word: information\n",
|
|
||||||
"synset: Synset('information.n.01')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('information.n.01')>\n",
|
|
||||||
"synset: Synset('information.n.02')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('information.n.02')>\n",
|
|
||||||
"synset: Synset('information.n.03')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('information.n.03')>\n",
|
|
||||||
"synset: Synset('data.n.01')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('data.n.01')>\n",
|
|
||||||
"synset: Synset('information.n.05')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('information.n.05')>\n",
|
|
||||||
"----------\n",
|
|
||||||
"word: it\n",
|
|
||||||
"synset: Synset('information_technology.n.01')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('information_technology.n.01')>\n",
|
|
||||||
"----------\n",
|
|
||||||
"word: processes.\n"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"words = sentence.split()\n",
|
|
||||||
"for word in words:\n",
|
|
||||||
" print('----------')\n",
|
|
||||||
" print('word:', word)\n",
|
|
||||||
" for synset in wordnet.synsets(word):\n",
|
|
||||||
" print('synset:', synset)\n",
|
|
||||||
" print('lemmas:', synset.lemma_names)\n"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": []
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 6,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"name": "stdout",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"<bound method Lemma.name of Lemma('car.n.01.car')>\n",
|
|
||||||
"<bound method Lemma.name of Lemma('car.n.02.car')>\n",
|
|
||||||
"<bound method Lemma.name of Lemma('car.n.03.car')>\n",
|
|
||||||
"<bound method Lemma.name of Lemma('car.n.04.car')>\n",
|
|
||||||
"<bound method Lemma.name of Lemma('cable_car.n.01.car')>\n"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"for lemma in wordnet.lemmas('car'):\n",
|
|
||||||
" print(lemma.name)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 7,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"name": "stdout",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"<bound method Synset.examples of Synset('car.n.01')>\n",
|
|
||||||
"<bound method Synset.examples of Synset('car.n.02')>\n",
|
|
||||||
"<bound method Synset.examples of Synset('car.n.03')>\n",
|
|
||||||
"<bound method Synset.examples of Synset('car.n.04')>\n",
|
|
||||||
"<bound method Synset.examples of Synset('cable_car.n.01')>\n"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"for synset in wordnet.synsets('car'):\n",
|
|
||||||
" print(synset.examples)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": []
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"metadata": {
|
|
||||||
"kernelspec": {
|
|
||||||
"display_name": "Python 3",
|
|
||||||
"language": "python",
|
|
||||||
"name": "python3"
|
|
||||||
},
|
|
||||||
"language_info": {
|
|
||||||
"codemirror_mode": {
|
|
||||||
"name": "ipython",
|
|
||||||
"version": 3
|
|
||||||
},
|
|
||||||
"file_extension": ".py",
|
|
||||||
"mimetype": "text/x-python",
|
|
||||||
"name": "python",
|
|
||||||
"nbconvert_exporter": "python",
|
|
||||||
"pygments_lexer": "ipython3",
|
|
||||||
"version": "3.7.3"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"nbformat": 4,
|
|
||||||
"nbformat_minor": 4
|
|
||||||
}
|
|
File diff suppressed because one or more lines are too long
Loading…
Reference in New Issue