trying to solve special conflicts
commit
6d2117b3ed
File diff suppressed because one or more lines are too long
@ -0,0 +1,195 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Markdown - HTML - print"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pypandoc\n",
|
||||
"from weasyprint import HTML, CSS\n",
|
||||
"from weasyprint.fonts import FontConfiguration"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Markdown → HTML"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Pandoc: \"If you need to convert files from one markup format into another, **pandoc is your swiss-army knife**.\"\n",
|
||||
"\n",
|
||||
"https://pandoc.org/\n",
|
||||
"\n",
|
||||
"The Python library for Pandoc:\n",
|
||||
"\n",
|
||||
"https://github.com/bebraw/pypandoc \n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Convert a Markdown file to HTML ...\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"scrolled": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# ... directly from a file\n",
|
||||
"html = pypandoc.convert_file('language.md', 'html')\n",
|
||||
"print(html)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# ... or from a pad\n",
|
||||
"\n",
|
||||
"from urllib.request import urlopen\n",
|
||||
"\n",
|
||||
"url = 'https://pad.xpub.nl/p/language/export/txt'\n",
|
||||
"response = urlopen(url)\n",
|
||||
"md = response.read().decode('UTF-8')\n",
|
||||
"\n",
|
||||
"with open('language.md', 'w') as f:\n",
|
||||
" f.write(md)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"scrolled": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"html = pypandoc.convert_file('language.md', 'html')\n",
|
||||
"print(html)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## HTML → PDF"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"for this we can use Weasyprint again"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"html = HTML(string=html)\n",
|
||||
"print(html)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"css = CSS(string='''\n",
|
||||
"@page{\n",
|
||||
" size: A4;\n",
|
||||
" margin: 15mm;\n",
|
||||
" \n",
|
||||
" counter-increment: page;\n",
|
||||
" \n",
|
||||
" @top-left{\n",
|
||||
" content: \"hello?\";\n",
|
||||
" }\n",
|
||||
" @top-center{\n",
|
||||
" content: counter(page);\n",
|
||||
" font-size: 7pt;\n",
|
||||
" font-family: monospace;\n",
|
||||
" color: blue;\n",
|
||||
" }\n",
|
||||
" @bottom-center{\n",
|
||||
" content: \"this is the bottom center!\";\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
" \n",
|
||||
" body{\n",
|
||||
" color: magenta;\n",
|
||||
" }\n",
|
||||
"''')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"It's actually interesting and useful to have a close look at paged media properties in CSS: \n",
|
||||
"\n",
|
||||
"https://developer.mozilla.org/en-US/docs/Web/CSS/%40page/size"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"html.write_pdf('language.pdf', stylesheets=[css])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
@ -0,0 +1,163 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# NLTK pos-tagged HTML → PDF"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import nltk\n",
|
||||
"from weasyprint import HTML, CSS"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# open the input file\n",
|
||||
"txt = open('../txt/language.txt').read()\n",
|
||||
"words = nltk.word_tokenize(txt)\n",
|
||||
"tagged_words = nltk.pos_tag(words)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# collect all the pieces of HTML\n",
|
||||
"content = ''\n",
|
||||
"content += '<h1>Language and Software Studies, by Florian Cramer</h1>'\n",
|
||||
"\n",
|
||||
"for word, tag in tagged_words:\n",
|
||||
" content += f'<span class=\"{ tag }\">{ word }</span> '"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# write the HTML file\n",
|
||||
"with open(\"language.html\", \"w\") as f:\n",
|
||||
" f.write(f\"\"\"<!DOCTYPE html>\n",
|
||||
"<html>\n",
|
||||
"<head>\n",
|
||||
" <meta charset=\"utf-8\">\n",
|
||||
" <link rel=\"stylesheet\" type=\"text/css\" href=\"language.css\">\n",
|
||||
" <title></title>\n",
|
||||
"</head>\n",
|
||||
"<body>\n",
|
||||
"{ content }\n",
|
||||
"</body>\n",
|
||||
"\"\"\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# write a CSS file\n",
|
||||
"with open(\"language.css\", \"w\") as f:\n",
|
||||
" f.write(\"\"\"\n",
|
||||
"\n",
|
||||
"@page{\n",
|
||||
" size:A4;\n",
|
||||
" background-color:lightgrey;\n",
|
||||
" margin:10mm;\n",
|
||||
"}\n",
|
||||
".JJ{\n",
|
||||
" color:red;\n",
|
||||
"}\n",
|
||||
".VB,\n",
|
||||
".VBG{\n",
|
||||
" color:magenta;\n",
|
||||
"}\n",
|
||||
".NN,\n",
|
||||
".NNP{\n",
|
||||
" color:green;\n",
|
||||
"}\n",
|
||||
".EX{\n",
|
||||
" color: blue;\n",
|
||||
"}\n",
|
||||
" \"\"\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# If you use @font-face in your stylesheet, you would need Weasyprint's FontConfiguration()\n",
|
||||
"from weasyprint.fonts import FontConfiguration\n",
|
||||
"\n",
|
||||
"font_config = FontConfiguration()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# collect all the files and write the PDF\n",
|
||||
"html = HTML(\"language.html\")\n",
|
||||
"css = CSS(\"language.css\")\n",
|
||||
"html.write_pdf('language.pdf', stylesheets=[css], font_config=font_config)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Preview your PDF in the notebook!\n",
|
||||
"from IPython.display import IFrame, display\n",
|
||||
"IFrame(\"language.pdf\", width=900, height=600)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
File diff suppressed because one or more lines are too long
Binary file not shown.
@ -1,433 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Wordnet"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import random\n",
|
||||
"import nltk\n",
|
||||
"from nltk.corpus import wordnet"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# You only need to run this once\n",
|
||||
"# nltk.download('wordnet')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Access to hardware functions is limited not only through the software application, but through the syntax the software application may use for storing and transmitting the information it processes.\n",
|
||||
"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"lines = open('../txt/language.txt').readlines()\n",
|
||||
"sentence = random.choice(lines)\n",
|
||||
"print(sentence)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {
|
||||
"scrolled": true
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"----------\n",
|
||||
"word: Access\n",
|
||||
"synset: Synset('entree.n.02')\n",
|
||||
"lemmas: <bound method Synset.lemma_names of Synset('entree.n.02')>\n",
|
||||
"synset: Synset('access.n.02')\n",
|
||||
"lemmas: <bound method Synset.lemma_names of Synset('access.n.02')>\n",
|
||||
"synset: Synset('access.n.03')\n",
|
||||
"lemmas: <bound method Synset.lemma_names of Synset('access.n.03')>\n",
|
||||
"synset: Synset('access.n.04')\n",
|
||||
"lemmas: <bound method Synset.lemma_names of Synset('access.n.04')>\n",
|
||||
"synset: Synset('access.n.05')\n",
|
||||
"lemmas: <bound method Synset.lemma_names of Synset('access.n.05')>\n",
|
||||
"synset: Synset('access.n.06')\n",
|
||||
"lemmas: <bound method Synset.lemma_names of Synset('access.n.06')>\n",
|
||||
"synset: Synset('access.v.01')\n",
|
||||
"lemmas: <bound method Synset.lemma_names of Synset('access.v.01')>\n",
|
||||
"synset: Synset('access.v.02')\n",
|
||||
"lemmas: <bound method Synset.lemma_names of Synset('access.v.02')>\n",
|
||||
"----------\n",
|
||||
"word: to\n",
|
||||
"----------\n",
|
||||
"word: hardware\n",
|
||||
"synset: Synset('hardware.n.01')\n",
|
||||
"lemmas: <bound method Synset.lemma_names of Synset('hardware.n.01')>\n",
|
||||
"synset: Synset('hardware.n.02')\n",
|
||||
"lemmas: <bound method Synset.lemma_names of Synset('hardware.n.02')>\n",
|
||||
"synset: Synset('hardware.n.03')\n",
|
||||
"lemmas: <bound method Synset.lemma_names of Synset('hardware.n.03')>\n",
|
||||
"----------\n",
|
||||
"word: functions\n",
|
||||
"synset: Synset('function.n.01')\n",
|
||||
"lemmas: <bound method Synset.lemma_names of Synset('function.n.01')>\n",
|
||||
"synset: Synset('function.n.02')\n",
|
||||
"lemmas: <bound method Synset.lemma_names of Synset('function.n.02')>\n",
|
||||
"synset: Synset('function.n.03')\n",
|
||||
"lemmas: <bound method Synset.lemma_names of Synset('function.n.03')>\n",
|
||||
"synset: Synset('function.n.04')\n",
|
||||
"lemmas: <bound method Synset.lemma_names of Synset('function.n.04')>\n",
|
||||
"synset: Synset('function.n.05')\n",
|
||||
"lemmas: <bound method Synset.lemma_names of Synset('function.n.05')>\n",
|
||||
"synset: Synset('affair.n.03')\n",
|
||||
"lemmas: <bound method Synset.lemma_names of Synset('affair.n.03')>\n",
|
||||
"synset: Synset('routine.n.03')\n",
|
||||
"lemmas: <bound method Synset.lemma_names of Synset('routine.n.03')>\n",
|
||||
"synset: Synset('function.v.01')\n",
|
||||
"lemmas: <bound method Synset.lemma_names of Synset('function.v.01')>\n",
|
||||
"synset: Synset('serve.v.01')\n",
|
||||
"lemmas: <bound method Synset.lemma_names of Synset('serve.v.01')>\n",
|
||||
"synset: Synset('officiate.v.02')\n",
|
||||
"lemmas: <bound method Synset.lemma_names of Synset('officiate.v.02')>\n",
|
||||
"----------\n",
|
||||
"word: is\n",
|
||||
"synset: Synset('be.v.01')\n",
|
||||
"lemmas: <bound method Synset.lemma_names of Synset('be.v.01')>\n",
|
||||
"synset: Synset('be.v.02')\n",
|
||||
"lemmas: <bound method Synset.lemma_names of Synset('be.v.02')>\n",
|
||||
"synset: Synset('be.v.03')\n",
|
||||
"lemmas: <bound method Synset.lemma_names of Synset('be.v.03')>\n",
|
||||
"synset: Synset('exist.v.01')\n",
|
||||
"lemmas: <bound method Synset.lemma_names of Synset('exist.v.01')>\n",
|
||||
"synset: Synset('be.v.05')\n",
|
||||
"lemmas: <bound method Synset.lemma_names of Synset('be.v.05')>\n",
|
||||
"synset: Synset('equal.v.01')\n",
|
||||
"lemmas: <bound method Synset.lemma_names of Synset('equal.v.01')>\n",
|
||||
"synset: Synset('constitute.v.01')\n",
|
||||
"lemmas: <bound method Synset.lemma_names of Synset('constitute.v.01')>\n",
|
||||
"synset: Synset('be.v.08')\n",
|
||||
"lemmas: <bound method Synset.lemma_names of Synset('be.v.08')>\n",
|
||||
"synset: Synset('embody.v.02')\n",
|
||||
"lemmas: <bound method Synset.lemma_names of Synset('embody.v.02')>\n",
|
||||
"synset: Synset('be.v.10')\n",
|
||||
"lemmas: <bound method Synset.lemma_names of Synset('be.v.10')>\n",
|
||||
"synset: Synset('be.v.11')\n",
|
||||
"lemmas: <bound method Synset.lemma_names of Synset('be.v.11')>\n",
|
||||
"synset: Synset('be.v.12')\n",
|
||||
"lemmas: <bound method Synset.lemma_names of Synset('be.v.12')>\n",
|
||||
"synset: Synset('cost.v.01')\n",
|
||||
"lemmas: <bound method Synset.lemma_names of Synset('cost.v.01')>\n",
|
||||
"----------\n",
|
||||
"word: limited\n",
|
||||
"synset: Synset('express.n.02')\n",
|
||||
"lemmas: <bound method Synset.lemma_names of Synset('express.n.02')>\n",
|
||||
"synset: Synset('restrict.v.03')\n",
|
||||
"lemmas: <bound method Synset.lemma_names of Synset('restrict.v.03')>\n",
|
||||
"synset: Synset('limit.v.02')\n",
|
||||
"lemmas: <bound method Synset.lemma_names of Synset('limit.v.02')>\n",
|
||||
"synset: Synset('specify.v.02')\n",
|
||||
"lemmas: <bound method Synset.lemma_names of Synset('specify.v.02')>\n",
|
||||
"synset: Synset('limited.a.01')\n",
|
||||
"lemmas: <bound method Synset.lemma_names of Synset('limited.a.01')>\n",
|
||||
"synset: Synset('circumscribed.s.01')\n",
|
||||
"lemmas: <bound method Synset.lemma_names of Synset('circumscribed.s.01')>\n",
|
||||
"synset: Synset('limited.s.03')\n",
|
||||
"lemmas: <bound method Synset.lemma_names of Synset('limited.s.03')>\n",
|
||||
"synset: Synset('limited.s.04')\n",
|
||||
"lemmas: <bound method Synset.lemma_names of Synset('limited.s.04')>\n",
|
||||
"synset: Synset('limited.s.05')\n",
|
||||
"lemmas: <bound method Synset.lemma_names of Synset('limited.s.05')>\n",
|
||||
"synset: Synset('limited.s.06')\n",
|
||||
"lemmas: <bound method Synset.lemma_names of Synset('limited.s.06')>\n",
|
||||
"synset: Synset('limited.s.07')\n",
|
||||
"lemmas: <bound method Synset.lemma_names of Synset('limited.s.07')>\n",
|
||||
"----------\n",
|
||||
"word: not\n",
|
||||
"synset: Synset('not.r.01')\n",
|
||||
"lemmas: <bound method Synset.lemma_names of Synset('not.r.01')>\n",
|
||||
"----------\n",
|
||||
"word: only\n",
|
||||
"synset: Synset('lone.s.03')\n",
|
||||
"lemmas: <bound method Synset.lemma_names of Synset('lone.s.03')>\n",
|
||||
"synset: Synset('alone.s.03')\n",
|
||||
"lemmas: <bound method Synset.lemma_names of Synset('alone.s.03')>\n",
|
||||
"synset: Synset('merely.r.01')\n",
|
||||
"lemmas: <bound method Synset.lemma_names of Synset('merely.r.01')>\n",
|
||||
"synset: Synset('entirely.r.02')\n",
|
||||
"lemmas: <bound method Synset.lemma_names of Synset('entirely.r.02')>\n",
|
||||
"synset: Synset('only.r.03')\n",
|
||||
"lemmas: <bound method Synset.lemma_names of Synset('only.r.03')>\n",
|
||||
"synset: Synset('only.r.04')\n",
|
||||
"lemmas: <bound method Synset.lemma_names of Synset('only.r.04')>\n",
|
||||
"synset: Synset('only.r.05')\n",
|
||||
"lemmas: <bound method Synset.lemma_names of Synset('only.r.05')>\n",
|
||||
"synset: Synset('only.r.06')\n",
|
||||
"lemmas: <bound method Synset.lemma_names of Synset('only.r.06')>\n",
|
||||
"synset: Synset('only.r.07')\n",
|
||||
"lemmas: <bound method Synset.lemma_names of Synset('only.r.07')>\n",
|
||||
"----------\n",
|
||||
"word: through\n",
|
||||
"synset: Synset('done.s.01')\n",
|
||||
"lemmas: <bound method Synset.lemma_names of Synset('done.s.01')>\n",
|
||||
"synset: Synset('through.s.02')\n",
|
||||
"lemmas: <bound method Synset.lemma_names of Synset('through.s.02')>\n",
|
||||
"synset: Synset('through.r.01')\n",
|
||||
"lemmas: <bound method Synset.lemma_names of Synset('through.r.01')>\n",
|
||||
"synset: Synset('through.r.02')\n",
|
||||
"lemmas: <bound method Synset.lemma_names of Synset('through.r.02')>\n",
|
||||
"synset: Synset('through.r.03')\n",
|
||||
"lemmas: <bound method Synset.lemma_names of Synset('through.r.03')>\n",
|
||||
"synset: Synset('through.r.04')\n",
|
||||
"lemmas: <bound method Synset.lemma_names of Synset('through.r.04')>\n",
|
||||
"synset: Synset('through.r.05')\n",
|
||||
"lemmas: <bound method Synset.lemma_names of Synset('through.r.05')>\n",
|
||||
"----------\n",
|
||||
"word: the\n",
|
||||
"----------\n",
|
||||
"word: software\n",
|
||||
"synset: Synset('software.n.01')\n",
|
||||
"lemmas: <bound method Synset.lemma_names of Synset('software.n.01')>\n",
|
||||
"----------\n",
|
||||
"word: application,\n",
|
||||
"----------\n",
|
||||
"word: but\n",
|
||||
"synset: Synset('merely.r.01')\n",
|
||||
"lemmas: <bound method Synset.lemma_names of Synset('merely.r.01')>\n",
|
||||
"----------\n",
|
||||
"word: through\n",
|
||||
"synset: Synset('done.s.01')\n",
|
||||
"lemmas: <bound method Synset.lemma_names of Synset('done.s.01')>\n",
|
||||
"synset: Synset('through.s.02')\n",
|
||||
"lemmas: <bound method Synset.lemma_names of Synset('through.s.02')>\n",
|
||||
"synset: Synset('through.r.01')\n",
|
||||
"lemmas: <bound method Synset.lemma_names of Synset('through.r.01')>\n",
|
||||
"synset: Synset('through.r.02')\n",
|
||||
"lemmas: <bound method Synset.lemma_names of Synset('through.r.02')>\n",
|
||||
"synset: Synset('through.r.03')\n",
|
||||
"lemmas: <bound method Synset.lemma_names of Synset('through.r.03')>\n",
|
||||
"synset: Synset('through.r.04')\n",
|
||||
"lemmas: <bound method Synset.lemma_names of Synset('through.r.04')>\n",
|
||||
"synset: Synset('through.r.05')\n",
|
||||
"lemmas: <bound method Synset.lemma_names of Synset('through.r.05')>\n",
|
||||
"----------\n",
|
||||
"word: the\n",
|
||||
"----------\n",
|
||||
"word: syntax\n",
|
||||
"synset: Synset('syntax.n.01')\n",
|
||||
"lemmas: <bound method Synset.lemma_names of Synset('syntax.n.01')>\n",
|
||||
"synset: Synset('syntax.n.02')\n",
|
||||
"lemmas: <bound method Synset.lemma_names of Synset('syntax.n.02')>\n",
|
||||
"synset: Synset('syntax.n.03')\n",
|
||||
"lemmas: <bound method Synset.lemma_names of Synset('syntax.n.03')>\n",
|
||||
"----------\n",
|
||||
"word: the\n",
|
||||
"----------\n",
|
||||
"word: software\n",
|
||||
"synset: Synset('software.n.01')\n",
|
||||
"lemmas: <bound method Synset.lemma_names of Synset('software.n.01')>\n",
|
||||
"----------\n",
|
||||
"word: application\n",
|
||||
"synset: Synset('application.n.01')\n",
|
||||
"lemmas: <bound method Synset.lemma_names of Synset('application.n.01')>\n",
|
||||
"synset: Synset('application.n.02')\n",
|
||||
"lemmas: <bound method Synset.lemma_names of Synset('application.n.02')>\n",
|
||||
"synset: Synset('application.n.03')\n",
|
||||
"lemmas: <bound method Synset.lemma_names of Synset('application.n.03')>\n",
|
||||
"synset: Synset('application.n.04')\n",
|
||||
"lemmas: <bound method Synset.lemma_names of Synset('application.n.04')>\n",
|
||||
"synset: Synset('lotion.n.02')\n",
|
||||
"lemmas: <bound method Synset.lemma_names of Synset('lotion.n.02')>\n",
|
||||
"synset: Synset('application.n.06')\n",
|
||||
"lemmas: <bound method Synset.lemma_names of Synset('application.n.06')>\n",
|
||||
"synset: Synset('application.n.07')\n",
|
||||
"lemmas: <bound method Synset.lemma_names of Synset('application.n.07')>\n",
|
||||
"----------\n",
|
||||
"word: may\n",
|
||||
"synset: Synset('may.n.01')\n",
|
||||
"lemmas: <bound method Synset.lemma_names of Synset('may.n.01')>\n",
|
||||
"synset: Synset('whitethorn.n.01')\n",
|
||||
"lemmas: <bound method Synset.lemma_names of Synset('whitethorn.n.01')>\n",
|
||||
"----------\n",
|
||||
"word: use\n",
|
||||
"synset: Synset('use.n.01')\n",
|
||||
"lemmas: <bound method Synset.lemma_names of Synset('use.n.01')>\n",
|
||||
"synset: Synset('function.n.02')\n",
|
||||
"lemmas: <bound method Synset.lemma_names of Synset('function.n.02')>\n",
|
||||
"synset: Synset('use.n.03')\n",
|
||||
"lemmas: <bound method Synset.lemma_names of Synset('use.n.03')>\n",
|
||||
"synset: Synset('consumption.n.03')\n",
|
||||
"lemmas: <bound method Synset.lemma_names of Synset('consumption.n.03')>\n",
|
||||
"synset: Synset('habit.n.02')\n",
|
||||
"lemmas: <bound method Synset.lemma_names of Synset('habit.n.02')>\n",
|
||||
"synset: Synset('manipulation.n.01')\n",
|
||||
"lemmas: <bound method Synset.lemma_names of Synset('manipulation.n.01')>\n",
|
||||
"synset: Synset('use.n.07')\n",
|
||||
"lemmas: <bound method Synset.lemma_names of Synset('use.n.07')>\n",
|
||||
"synset: Synset('use.v.01')\n",
|
||||
"lemmas: <bound method Synset.lemma_names of Synset('use.v.01')>\n",
|
||||
"synset: Synset('use.v.02')\n",
|
||||
"lemmas: <bound method Synset.lemma_names of Synset('use.v.02')>\n",
|
||||
"synset: Synset('use.v.03')\n",
|
||||
"lemmas: <bound method Synset.lemma_names of Synset('use.v.03')>\n",
|
||||
"synset: Synset('use.v.04')\n",
|
||||
"lemmas: <bound method Synset.lemma_names of Synset('use.v.04')>\n",
|
||||
"synset: Synset('practice.v.04')\n",
|
||||
"lemmas: <bound method Synset.lemma_names of Synset('practice.v.04')>\n",
|
||||
"synset: Synset('use.v.06')\n",
|
||||
"lemmas: <bound method Synset.lemma_names of Synset('use.v.06')>\n",
|
||||
"----------\n",
|
||||
"word: for\n",
|
||||
"----------\n",
|
||||
"word: storing\n",
|
||||
"synset: Synset('store.v.01')\n",
|
||||
"lemmas: <bound method Synset.lemma_names of Synset('store.v.01')>\n",
|
||||
"synset: Synset('store.v.02')\n",
|
||||
"lemmas: <bound method Synset.lemma_names of Synset('store.v.02')>\n",
|
||||
"----------\n",
|
||||
"word: and\n",
|
||||
"----------\n",
|
||||
"word: transmitting\n",
|
||||
"synset: Synset('transmission.n.01')\n",
|
||||
"lemmas: <bound method Synset.lemma_names of Synset('transmission.n.01')>\n",
|
||||
"synset: Synset('convey.v.03')\n",
|
||||
"lemmas: <bound method Synset.lemma_names of Synset('convey.v.03')>\n",
|
||||
"synset: Synset('impart.v.03')\n",
|
||||
"lemmas: <bound method Synset.lemma_names of Synset('impart.v.03')>\n",
|
||||
"synset: Synset('air.v.03')\n",
|
||||
"lemmas: <bound method Synset.lemma_names of Synset('air.v.03')>\n",
|
||||
"synset: Synset('transmit.v.04')\n",
|
||||
"lemmas: <bound method Synset.lemma_names of Synset('transmit.v.04')>\n",
|
||||
"----------\n",
|
||||
"word: the\n",
|
||||
"----------\n",
|
||||
"word: information\n",
|
||||
"synset: Synset('information.n.01')\n",
|
||||
"lemmas: <bound method Synset.lemma_names of Synset('information.n.01')>\n",
|
||||
"synset: Synset('information.n.02')\n",
|
||||
"lemmas: <bound method Synset.lemma_names of Synset('information.n.02')>\n",
|
||||
"synset: Synset('information.n.03')\n",
|
||||
"lemmas: <bound method Synset.lemma_names of Synset('information.n.03')>\n",
|
||||
"synset: Synset('data.n.01')\n",
|
||||
"lemmas: <bound method Synset.lemma_names of Synset('data.n.01')>\n",
|
||||
"synset: Synset('information.n.05')\n",
|
||||
"lemmas: <bound method Synset.lemma_names of Synset('information.n.05')>\n",
|
||||
"----------\n",
|
||||
"word: it\n",
|
||||
"synset: Synset('information_technology.n.01')\n",
|
||||
"lemmas: <bound method Synset.lemma_names of Synset('information_technology.n.01')>\n",
|
||||
"----------\n",
|
||||
"word: processes.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"words = sentence.split()\n",
|
||||
"for word in words:\n",
|
||||
" print('----------')\n",
|
||||
" print('word:', word)\n",
|
||||
" for synset in wordnet.synsets(word):\n",
|
||||
" print('synset:', synset)\n",
|
||||
" print('lemmas:', synset.lemma_names)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"<bound method Lemma.name of Lemma('car.n.01.car')>\n",
|
||||
"<bound method Lemma.name of Lemma('car.n.02.car')>\n",
|
||||
"<bound method Lemma.name of Lemma('car.n.03.car')>\n",
|
||||
"<bound method Lemma.name of Lemma('car.n.04.car')>\n",
|
||||
"<bound method Lemma.name of Lemma('cable_car.n.01.car')>\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"for lemma in wordnet.lemmas('car'):\n",
|
||||
" print(lemma.name)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"<bound method Synset.examples of Synset('car.n.01')>\n",
|
||||
"<bound method Synset.examples of Synset('car.n.02')>\n",
|
||||
"<bound method Synset.examples of Synset('car.n.03')>\n",
|
||||
"<bound method Synset.examples of Synset('car.n.04')>\n",
|
||||
"<bound method Synset.examples of Synset('cable_car.n.01')>\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"for synset in wordnet.synsets('car'):\n",
|
||||
" print(synset.examples)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
File diff suppressed because one or more lines are too long
Loading…
Reference in New Issue