big sync
commit
f4b35d8a75
File diff suppressed because one or more lines are too long
@ -0,0 +1,195 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# Markdown - HTML - print"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import pypandoc\n",
|
||||||
|
"from weasyprint import HTML, CSS\n",
|
||||||
|
"from weasyprint.fonts import FontConfiguration"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Markdown → HTML"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Pandoc: \"If you need to convert files from one markup format into another, **pandoc is your swiss-army knife**.\"\n",
|
||||||
|
"\n",
|
||||||
|
"https://pandoc.org/\n",
|
||||||
|
"\n",
|
||||||
|
"The Python library for Pandoc:\n",
|
||||||
|
"\n",
|
||||||
|
"https://github.com/bebraw/pypandoc \n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Convert a Markdown file to HTML ...\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"scrolled": true
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# ... directly from a file\n",
|
||||||
|
"html = pypandoc.convert_file('language.md', 'html')\n",
|
||||||
|
"print(html)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# ... or from a pad\n",
|
||||||
|
"\n",
|
||||||
|
"from urllib.request import urlopen\n",
|
||||||
|
"\n",
|
||||||
|
"url = 'https://pad.xpub.nl/p/language/export/txt'\n",
|
||||||
|
"response = urlopen(url)\n",
|
||||||
|
"md = response.read().decode('UTF-8')\n",
|
||||||
|
"\n",
|
||||||
|
"with open('language.md', 'w') as f:\n",
|
||||||
|
" f.write(md)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"scrolled": true
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"html = pypandoc.convert_file('language.md', 'html')\n",
|
||||||
|
"print(html)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## HTML → PDF"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"for this we can use Weasyprint again"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"html = HTML(string=html)\n",
|
||||||
|
"print(html)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"css = CSS(string='''\n",
|
||||||
|
"@page{\n",
|
||||||
|
" size: A4;\n",
|
||||||
|
" margin: 15mm;\n",
|
||||||
|
" \n",
|
||||||
|
" counter-increment: page;\n",
|
||||||
|
" \n",
|
||||||
|
" @top-left{\n",
|
||||||
|
" content: \"hello?\";\n",
|
||||||
|
" }\n",
|
||||||
|
" @top-center{\n",
|
||||||
|
" content: counter(page);\n",
|
||||||
|
" font-size: 7pt;\n",
|
||||||
|
" font-family: monospace;\n",
|
||||||
|
" color: blue;\n",
|
||||||
|
" }\n",
|
||||||
|
" @bottom-center{\n",
|
||||||
|
" content: \"this is the bottom center!\";\n",
|
||||||
|
" }\n",
|
||||||
|
" }\n",
|
||||||
|
" \n",
|
||||||
|
" body{\n",
|
||||||
|
" color: magenta;\n",
|
||||||
|
" }\n",
|
||||||
|
"''')"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"It's actually interesting and useful to have a close look at paged media properties in CSS: \n",
|
||||||
|
"\n",
|
||||||
|
"https://developer.mozilla.org/en-US/docs/Web/CSS/%40page/size"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"html.write_pdf('language.pdf', stylesheets=[css])"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": []
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.7.3"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 4
|
||||||
|
}
|
@ -0,0 +1,163 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# NLTK pos-tagged HTML → PDF"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import nltk\n",
|
||||||
|
"from weasyprint import HTML, CSS"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# open the input file\n",
|
||||||
|
"txt = open('../txt/language.txt').read()\n",
|
||||||
|
"words = nltk.word_tokenize(txt)\n",
|
||||||
|
"tagged_words = nltk.pos_tag(words)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# collect all the pieces of HTML\n",
|
||||||
|
"content = ''\n",
|
||||||
|
"content += '<h1>Language and Software Studies, by Florian Cramer</h1>'\n",
|
||||||
|
"\n",
|
||||||
|
"for word, tag in tagged_words:\n",
|
||||||
|
" content += f'<span class=\"{ tag }\">{ word }</span> '"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# write the HTML file\n",
|
||||||
|
"with open(\"language.html\", \"w\") as f:\n",
|
||||||
|
" f.write(f\"\"\"<!DOCTYPE html>\n",
|
||||||
|
"<html>\n",
|
||||||
|
"<head>\n",
|
||||||
|
" <meta charset=\"utf-8\">\n",
|
||||||
|
" <link rel=\"stylesheet\" type=\"text/css\" href=\"language.css\">\n",
|
||||||
|
" <title></title>\n",
|
||||||
|
"</head>\n",
|
||||||
|
"<body>\n",
|
||||||
|
"{ content }\n",
|
||||||
|
"</body>\n",
|
||||||
|
"\"\"\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# write a CSS file\n",
|
||||||
|
"with open(\"language.css\", \"w\") as f:\n",
|
||||||
|
" f.write(\"\"\"\n",
|
||||||
|
"\n",
|
||||||
|
"@page{\n",
|
||||||
|
" size:A4;\n",
|
||||||
|
" background-color:lightgrey;\n",
|
||||||
|
" margin:10mm;\n",
|
||||||
|
"}\n",
|
||||||
|
".JJ{\n",
|
||||||
|
" color:red;\n",
|
||||||
|
"}\n",
|
||||||
|
".VB,\n",
|
||||||
|
".VBG{\n",
|
||||||
|
" color:magenta;\n",
|
||||||
|
"}\n",
|
||||||
|
".NN,\n",
|
||||||
|
".NNP{\n",
|
||||||
|
" color:green;\n",
|
||||||
|
"}\n",
|
||||||
|
".EX{\n",
|
||||||
|
" color: blue;\n",
|
||||||
|
"}\n",
|
||||||
|
" \"\"\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# If you use @font-face in your stylesheet, you would need Weasyprint's FontConfiguration()\n",
|
||||||
|
"from weasyprint.fonts import FontConfiguration\n",
|
||||||
|
"\n",
|
||||||
|
"font_config = FontConfiguration()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# collect all the files and write the PDF\n",
|
||||||
|
"html = HTML(\"language.html\")\n",
|
||||||
|
"css = CSS(\"language.css\")\n",
|
||||||
|
"html.write_pdf('language.pdf', stylesheets=[css], font_config=font_config)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Preview your PDF in the notebook!\n",
|
||||||
|
"from IPython.display import IFrame, display\n",
|
||||||
|
"IFrame(\"language.pdf\", width=900, height=600)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": []
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.7.3"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 4
|
||||||
|
}
|
@ -0,0 +1,57 @@
|
|||||||
|
@page{
|
||||||
|
size: A4;
|
||||||
|
margin: 15mm;
|
||||||
|
background-color: lightgrey;
|
||||||
|
font-family: monospace;
|
||||||
|
font-size: 8pt;
|
||||||
|
color: #7da0d4;
|
||||||
|
border:1.5x dotted red;
|
||||||
|
|
||||||
|
@top-left{
|
||||||
|
content: "liquid";
|
||||||
|
}
|
||||||
|
@top-center{
|
||||||
|
content: "bodies";
|
||||||
|
}
|
||||||
|
@top-right{
|
||||||
|
content: "natural";
|
||||||
|
}
|
||||||
|
@top-middle{
|
||||||
|
content: ""
|
||||||
|
}
|
||||||
|
@left-top{
|
||||||
|
content: "material";
|
||||||
|
}
|
||||||
|
@right-top{
|
||||||
|
content: "existence";
|
||||||
|
}
|
||||||
|
@bottom-left{
|
||||||
|
content: "flux";
|
||||||
|
}
|
||||||
|
@bottom-center{
|
||||||
|
content: "living";
|
||||||
|
}
|
||||||
|
@bottom-right{
|
||||||
|
content: "energy";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
body {
|
||||||
|
background: #f7c694;
|
||||||
|
margin: 20px;
|
||||||
|
line-height: 2;
|
||||||
|
font-family: monospace;
|
||||||
|
}
|
||||||
|
|
||||||
|
pre {
|
||||||
|
white-space: pre-wrap;
|
||||||
|
}
|
||||||
|
span.NN {
|
||||||
|
font-style: italic;
|
||||||
|
color: white;
|
||||||
|
border: 2px dashed black;
|
||||||
|
}
|
||||||
|
|
||||||
|
span.JJ {
|
||||||
|
color: #9abae3;
|
||||||
|
}
|
File diff suppressed because one or more lines are too long
@ -0,0 +1,273 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# Weasyprint"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 8,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from weasyprint import HTML, CSS\n",
|
||||||
|
"from weasyprint.fonts import FontConfiguration"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"https://weasyprint.readthedocs.io/en/latest/tutorial.html"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 3,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# If you use @font-face in your stylesheet, you would need Weasyprint's FontConfiguration()\n",
|
||||||
|
"font_config = FontConfiguration()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## HTML"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 2,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# small example HTML object\n",
|
||||||
|
"html = HTML(string='<h1>hello</h1>')"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"or in this case let's use python + nltk to make a custom HTML page with parts of speech used as CSS classes..."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 9,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"ename": "ModuleNotFoundError",
|
||||||
|
"evalue": "No module named 'nltk'",
|
||||||
|
"output_type": "error",
|
||||||
|
"traceback": [
|
||||||
|
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
||||||
|
"\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)",
|
||||||
|
"\u001b[0;32m<ipython-input-9-52b984781443>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0;32mimport\u001b[0m \u001b[0mnltk\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mopen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'txt/LIQUID1.txt'\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0mtxt\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0mwords\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnltk\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mword_tokenize\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtxt\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
||||||
|
"\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'nltk'"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"import nltk\n",
|
||||||
|
"\n",
|
||||||
|
"with open('txt/LIQUID1.txt') as f:\n",
|
||||||
|
" txt = f.read()\n",
|
||||||
|
"words = nltk.word_tokenize(txt)\n",
|
||||||
|
"tagged_words = nltk.pos_tag(words)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"content = ''\n",
|
||||||
|
"content += '<h1>LIQUID | Rachel Armstrong</h1>'\n",
|
||||||
|
"\n",
|
||||||
|
"for word, tag in tagged_words:\n",
|
||||||
|
" content+= f'<span class=\"{tag}\">{ word }</span> '\n",
|
||||||
|
" \n",
|
||||||
|
"with open(\"txt/liquid.html\", \"w\") as f:\n",
|
||||||
|
" f.write(f\"\"\"<!DOCTYPE html>\n",
|
||||||
|
"<html>\n",
|
||||||
|
"<head>\n",
|
||||||
|
" <meta charset=\"utf-8\">\n",
|
||||||
|
" <link rel=\"stylesheet\" type=\"text/css\" href=\"txt/liquid.css\">\n",
|
||||||
|
" \n",
|
||||||
|
"\n",
|
||||||
|
" \n",
|
||||||
|
" \n",
|
||||||
|
" <title>Liquid</title>\n",
|
||||||
|
"</head>\n",
|
||||||
|
"<body>\n",
|
||||||
|
"{content}\n",
|
||||||
|
"</body>\n",
|
||||||
|
"\"\"\")\n",
|
||||||
|
"\n",
|
||||||
|
"html = HTML(\"txt/liquid.html\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Saved to [language.html](txt/language.html). Fun fact: jupyter filters HTML pages that are displayed in the notebook. To see the HTML unfiltered, use an iframe (as below), or right-click and select Open in New Tab in the file list.\n",
|
||||||
|
"\n",
|
||||||
|
"Maybe useful evt. https://stackoverflow.com/questions/23358444/how-can-i-use-word-tokenize-in-nltk-and-keep-the-spaces"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"NB: The above HTML refers to the stylesheet [language.css](txt/language.css) (notice that the path is relative to the HTML page, so no need to say txt in the link)."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 10,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/html": [
|
||||||
|
"\n",
|
||||||
|
" <iframe\n",
|
||||||
|
" width=\"1024\"\n",
|
||||||
|
" height=\"600\"\n",
|
||||||
|
" src=\"txt/liquid.html\"\n",
|
||||||
|
" frameborder=\"0\"\n",
|
||||||
|
" allowfullscreen\n",
|
||||||
|
" ></iframe>\n",
|
||||||
|
" "
|
||||||
|
],
|
||||||
|
"text/plain": [
|
||||||
|
"<IPython.lib.display.IFrame at 0xaf85cc30>"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 10,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"from IPython.display import IFrame\n",
|
||||||
|
"IFrame(\"txt/liquid.html\", width=1024, height=600)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Generating the PDF!\n",
|
||||||
|
"\n",
|
||||||
|
"Now let's let weasyprint do it's stuff! Write_pdf actually calculates the layout, behaving like a web browser to render the HTML visibly and following the CSS guidelines for page media (notice the special rules in the CSS that weasy print recognizes and uses that the browser does not). Notice that the CSS file gets mentioned again explicitly (and here we need to refer to its path relative to this folder)."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 44,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"## If we had not linked the CSS in the HTML, you could specify it in this way\n",
|
||||||
|
"# css = CSS(\"txt/language.css\", font_config=font_config)\n",
|
||||||
|
"# html.write_pdf('txt/language.pdf', stylesheets=[css], font_config=font_config)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 11,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"css = CSS(\"txt/liquid.css\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 14,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"html.write_pdf('liquid1.pdf', stylesheets=[css], font_config=font_config)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 15,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/html": [
|
||||||
|
"\n",
|
||||||
|
" <iframe\n",
|
||||||
|
" width=\"1024\"\n",
|
||||||
|
" height=\"600\"\n",
|
||||||
|
" src=\"txt/liquid.pdf\"\n",
|
||||||
|
" frameborder=\"0\"\n",
|
||||||
|
" allowfullscreen\n",
|
||||||
|
" ></iframe>\n",
|
||||||
|
" "
|
||||||
|
],
|
||||||
|
"text/plain": [
|
||||||
|
"<IPython.lib.display.IFrame at 0xaf8c79b0>"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 15,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"from IPython.display import IFrame\n",
|
||||||
|
"IFrame(\"txt/liquid.pdf\", width=1024, height=600)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": []
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.7.3"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 4
|
||||||
|
}
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
Binary file not shown.
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
Binary file not shown.
After Width: | Height: | Size: 107 KiB |
@ -1,433 +0,0 @@
|
|||||||
{
|
|
||||||
"cells": [
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"# Wordnet"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 1,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"import random\n",
|
|
||||||
"import nltk\n",
|
|
||||||
"from nltk.corpus import wordnet"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 2,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# You only need to run this once\n",
|
|
||||||
"# nltk.download('wordnet')"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 4,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"name": "stdout",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"Access to hardware functions is limited not only through the software application, but through the syntax the software application may use for storing and transmitting the information it processes.\n",
|
|
||||||
"\n"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"lines = open('../txt/language.txt').readlines()\n",
|
|
||||||
"sentence = random.choice(lines)\n",
|
|
||||||
"print(sentence)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 5,
|
|
||||||
"metadata": {
|
|
||||||
"scrolled": true
|
|
||||||
},
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"name": "stdout",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"----------\n",
|
|
||||||
"word: Access\n",
|
|
||||||
"synset: Synset('entree.n.02')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('entree.n.02')>\n",
|
|
||||||
"synset: Synset('access.n.02')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('access.n.02')>\n",
|
|
||||||
"synset: Synset('access.n.03')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('access.n.03')>\n",
|
|
||||||
"synset: Synset('access.n.04')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('access.n.04')>\n",
|
|
||||||
"synset: Synset('access.n.05')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('access.n.05')>\n",
|
|
||||||
"synset: Synset('access.n.06')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('access.n.06')>\n",
|
|
||||||
"synset: Synset('access.v.01')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('access.v.01')>\n",
|
|
||||||
"synset: Synset('access.v.02')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('access.v.02')>\n",
|
|
||||||
"----------\n",
|
|
||||||
"word: to\n",
|
|
||||||
"----------\n",
|
|
||||||
"word: hardware\n",
|
|
||||||
"synset: Synset('hardware.n.01')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('hardware.n.01')>\n",
|
|
||||||
"synset: Synset('hardware.n.02')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('hardware.n.02')>\n",
|
|
||||||
"synset: Synset('hardware.n.03')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('hardware.n.03')>\n",
|
|
||||||
"----------\n",
|
|
||||||
"word: functions\n",
|
|
||||||
"synset: Synset('function.n.01')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('function.n.01')>\n",
|
|
||||||
"synset: Synset('function.n.02')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('function.n.02')>\n",
|
|
||||||
"synset: Synset('function.n.03')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('function.n.03')>\n",
|
|
||||||
"synset: Synset('function.n.04')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('function.n.04')>\n",
|
|
||||||
"synset: Synset('function.n.05')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('function.n.05')>\n",
|
|
||||||
"synset: Synset('affair.n.03')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('affair.n.03')>\n",
|
|
||||||
"synset: Synset('routine.n.03')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('routine.n.03')>\n",
|
|
||||||
"synset: Synset('function.v.01')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('function.v.01')>\n",
|
|
||||||
"synset: Synset('serve.v.01')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('serve.v.01')>\n",
|
|
||||||
"synset: Synset('officiate.v.02')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('officiate.v.02')>\n",
|
|
||||||
"----------\n",
|
|
||||||
"word: is\n",
|
|
||||||
"synset: Synset('be.v.01')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('be.v.01')>\n",
|
|
||||||
"synset: Synset('be.v.02')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('be.v.02')>\n",
|
|
||||||
"synset: Synset('be.v.03')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('be.v.03')>\n",
|
|
||||||
"synset: Synset('exist.v.01')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('exist.v.01')>\n",
|
|
||||||
"synset: Synset('be.v.05')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('be.v.05')>\n",
|
|
||||||
"synset: Synset('equal.v.01')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('equal.v.01')>\n",
|
|
||||||
"synset: Synset('constitute.v.01')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('constitute.v.01')>\n",
|
|
||||||
"synset: Synset('be.v.08')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('be.v.08')>\n",
|
|
||||||
"synset: Synset('embody.v.02')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('embody.v.02')>\n",
|
|
||||||
"synset: Synset('be.v.10')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('be.v.10')>\n",
|
|
||||||
"synset: Synset('be.v.11')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('be.v.11')>\n",
|
|
||||||
"synset: Synset('be.v.12')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('be.v.12')>\n",
|
|
||||||
"synset: Synset('cost.v.01')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('cost.v.01')>\n",
|
|
||||||
"----------\n",
|
|
||||||
"word: limited\n",
|
|
||||||
"synset: Synset('express.n.02')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('express.n.02')>\n",
|
|
||||||
"synset: Synset('restrict.v.03')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('restrict.v.03')>\n",
|
|
||||||
"synset: Synset('limit.v.02')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('limit.v.02')>\n",
|
|
||||||
"synset: Synset('specify.v.02')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('specify.v.02')>\n",
|
|
||||||
"synset: Synset('limited.a.01')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('limited.a.01')>\n",
|
|
||||||
"synset: Synset('circumscribed.s.01')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('circumscribed.s.01')>\n",
|
|
||||||
"synset: Synset('limited.s.03')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('limited.s.03')>\n",
|
|
||||||
"synset: Synset('limited.s.04')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('limited.s.04')>\n",
|
|
||||||
"synset: Synset('limited.s.05')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('limited.s.05')>\n",
|
|
||||||
"synset: Synset('limited.s.06')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('limited.s.06')>\n",
|
|
||||||
"synset: Synset('limited.s.07')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('limited.s.07')>\n",
|
|
||||||
"----------\n",
|
|
||||||
"word: not\n",
|
|
||||||
"synset: Synset('not.r.01')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('not.r.01')>\n",
|
|
||||||
"----------\n",
|
|
||||||
"word: only\n",
|
|
||||||
"synset: Synset('lone.s.03')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('lone.s.03')>\n",
|
|
||||||
"synset: Synset('alone.s.03')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('alone.s.03')>\n",
|
|
||||||
"synset: Synset('merely.r.01')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('merely.r.01')>\n",
|
|
||||||
"synset: Synset('entirely.r.02')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('entirely.r.02')>\n",
|
|
||||||
"synset: Synset('only.r.03')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('only.r.03')>\n",
|
|
||||||
"synset: Synset('only.r.04')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('only.r.04')>\n",
|
|
||||||
"synset: Synset('only.r.05')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('only.r.05')>\n",
|
|
||||||
"synset: Synset('only.r.06')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('only.r.06')>\n",
|
|
||||||
"synset: Synset('only.r.07')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('only.r.07')>\n",
|
|
||||||
"----------\n",
|
|
||||||
"word: through\n",
|
|
||||||
"synset: Synset('done.s.01')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('done.s.01')>\n",
|
|
||||||
"synset: Synset('through.s.02')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('through.s.02')>\n",
|
|
||||||
"synset: Synset('through.r.01')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('through.r.01')>\n",
|
|
||||||
"synset: Synset('through.r.02')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('through.r.02')>\n",
|
|
||||||
"synset: Synset('through.r.03')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('through.r.03')>\n",
|
|
||||||
"synset: Synset('through.r.04')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('through.r.04')>\n",
|
|
||||||
"synset: Synset('through.r.05')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('through.r.05')>\n",
|
|
||||||
"----------\n",
|
|
||||||
"word: the\n",
|
|
||||||
"----------\n",
|
|
||||||
"word: software\n",
|
|
||||||
"synset: Synset('software.n.01')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('software.n.01')>\n",
|
|
||||||
"----------\n",
|
|
||||||
"word: application,\n",
|
|
||||||
"----------\n",
|
|
||||||
"word: but\n",
|
|
||||||
"synset: Synset('merely.r.01')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('merely.r.01')>\n",
|
|
||||||
"----------\n",
|
|
||||||
"word: through\n",
|
|
||||||
"synset: Synset('done.s.01')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('done.s.01')>\n",
|
|
||||||
"synset: Synset('through.s.02')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('through.s.02')>\n",
|
|
||||||
"synset: Synset('through.r.01')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('through.r.01')>\n",
|
|
||||||
"synset: Synset('through.r.02')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('through.r.02')>\n",
|
|
||||||
"synset: Synset('through.r.03')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('through.r.03')>\n",
|
|
||||||
"synset: Synset('through.r.04')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('through.r.04')>\n",
|
|
||||||
"synset: Synset('through.r.05')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('through.r.05')>\n",
|
|
||||||
"----------\n",
|
|
||||||
"word: the\n",
|
|
||||||
"----------\n",
|
|
||||||
"word: syntax\n",
|
|
||||||
"synset: Synset('syntax.n.01')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('syntax.n.01')>\n",
|
|
||||||
"synset: Synset('syntax.n.02')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('syntax.n.02')>\n",
|
|
||||||
"synset: Synset('syntax.n.03')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('syntax.n.03')>\n",
|
|
||||||
"----------\n",
|
|
||||||
"word: the\n",
|
|
||||||
"----------\n",
|
|
||||||
"word: software\n",
|
|
||||||
"synset: Synset('software.n.01')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('software.n.01')>\n",
|
|
||||||
"----------\n",
|
|
||||||
"word: application\n",
|
|
||||||
"synset: Synset('application.n.01')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('application.n.01')>\n",
|
|
||||||
"synset: Synset('application.n.02')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('application.n.02')>\n",
|
|
||||||
"synset: Synset('application.n.03')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('application.n.03')>\n",
|
|
||||||
"synset: Synset('application.n.04')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('application.n.04')>\n",
|
|
||||||
"synset: Synset('lotion.n.02')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('lotion.n.02')>\n",
|
|
||||||
"synset: Synset('application.n.06')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('application.n.06')>\n",
|
|
||||||
"synset: Synset('application.n.07')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('application.n.07')>\n",
|
|
||||||
"----------\n",
|
|
||||||
"word: may\n",
|
|
||||||
"synset: Synset('may.n.01')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('may.n.01')>\n",
|
|
||||||
"synset: Synset('whitethorn.n.01')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('whitethorn.n.01')>\n",
|
|
||||||
"----------\n",
|
|
||||||
"word: use\n",
|
|
||||||
"synset: Synset('use.n.01')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('use.n.01')>\n",
|
|
||||||
"synset: Synset('function.n.02')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('function.n.02')>\n",
|
|
||||||
"synset: Synset('use.n.03')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('use.n.03')>\n",
|
|
||||||
"synset: Synset('consumption.n.03')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('consumption.n.03')>\n",
|
|
||||||
"synset: Synset('habit.n.02')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('habit.n.02')>\n",
|
|
||||||
"synset: Synset('manipulation.n.01')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('manipulation.n.01')>\n",
|
|
||||||
"synset: Synset('use.n.07')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('use.n.07')>\n",
|
|
||||||
"synset: Synset('use.v.01')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('use.v.01')>\n",
|
|
||||||
"synset: Synset('use.v.02')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('use.v.02')>\n",
|
|
||||||
"synset: Synset('use.v.03')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('use.v.03')>\n",
|
|
||||||
"synset: Synset('use.v.04')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('use.v.04')>\n",
|
|
||||||
"synset: Synset('practice.v.04')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('practice.v.04')>\n",
|
|
||||||
"synset: Synset('use.v.06')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('use.v.06')>\n",
|
|
||||||
"----------\n",
|
|
||||||
"word: for\n",
|
|
||||||
"----------\n",
|
|
||||||
"word: storing\n",
|
|
||||||
"synset: Synset('store.v.01')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('store.v.01')>\n",
|
|
||||||
"synset: Synset('store.v.02')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('store.v.02')>\n",
|
|
||||||
"----------\n",
|
|
||||||
"word: and\n",
|
|
||||||
"----------\n",
|
|
||||||
"word: transmitting\n",
|
|
||||||
"synset: Synset('transmission.n.01')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('transmission.n.01')>\n",
|
|
||||||
"synset: Synset('convey.v.03')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('convey.v.03')>\n",
|
|
||||||
"synset: Synset('impart.v.03')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('impart.v.03')>\n",
|
|
||||||
"synset: Synset('air.v.03')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('air.v.03')>\n",
|
|
||||||
"synset: Synset('transmit.v.04')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('transmit.v.04')>\n",
|
|
||||||
"----------\n",
|
|
||||||
"word: the\n",
|
|
||||||
"----------\n",
|
|
||||||
"word: information\n",
|
|
||||||
"synset: Synset('information.n.01')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('information.n.01')>\n",
|
|
||||||
"synset: Synset('information.n.02')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('information.n.02')>\n",
|
|
||||||
"synset: Synset('information.n.03')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('information.n.03')>\n",
|
|
||||||
"synset: Synset('data.n.01')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('data.n.01')>\n",
|
|
||||||
"synset: Synset('information.n.05')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('information.n.05')>\n",
|
|
||||||
"----------\n",
|
|
||||||
"word: it\n",
|
|
||||||
"synset: Synset('information_technology.n.01')\n",
|
|
||||||
"lemmas: <bound method Synset.lemma_names of Synset('information_technology.n.01')>\n",
|
|
||||||
"----------\n",
|
|
||||||
"word: processes.\n"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"words = sentence.split()\n",
|
|
||||||
"for word in words:\n",
|
|
||||||
" print('----------')\n",
|
|
||||||
" print('word:', word)\n",
|
|
||||||
" for synset in wordnet.synsets(word):\n",
|
|
||||||
" print('synset:', synset)\n",
|
|
||||||
" print('lemmas:', synset.lemma_names)\n"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": []
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 6,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"name": "stdout",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"<bound method Lemma.name of Lemma('car.n.01.car')>\n",
|
|
||||||
"<bound method Lemma.name of Lemma('car.n.02.car')>\n",
|
|
||||||
"<bound method Lemma.name of Lemma('car.n.03.car')>\n",
|
|
||||||
"<bound method Lemma.name of Lemma('car.n.04.car')>\n",
|
|
||||||
"<bound method Lemma.name of Lemma('cable_car.n.01.car')>\n"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"for lemma in wordnet.lemmas('car'):\n",
|
|
||||||
" print(lemma.name)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 7,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"name": "stdout",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"<bound method Synset.examples of Synset('car.n.01')>\n",
|
|
||||||
"<bound method Synset.examples of Synset('car.n.02')>\n",
|
|
||||||
"<bound method Synset.examples of Synset('car.n.03')>\n",
|
|
||||||
"<bound method Synset.examples of Synset('car.n.04')>\n",
|
|
||||||
"<bound method Synset.examples of Synset('cable_car.n.01')>\n"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"for synset in wordnet.synsets('car'):\n",
|
|
||||||
" print(synset.examples)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": []
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"metadata": {
|
|
||||||
"kernelspec": {
|
|
||||||
"display_name": "Python 3",
|
|
||||||
"language": "python",
|
|
||||||
"name": "python3"
|
|
||||||
},
|
|
||||||
"language_info": {
|
|
||||||
"codemirror_mode": {
|
|
||||||
"name": "ipython",
|
|
||||||
"version": 3
|
|
||||||
},
|
|
||||||
"file_extension": ".py",
|
|
||||||
"mimetype": "text/x-python",
|
|
||||||
"name": "python",
|
|
||||||
"nbconvert_exporter": "python",
|
|
||||||
"pygments_lexer": "ipython3",
|
|
||||||
"version": "3.7.3"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"nbformat": 4,
|
|
||||||
"nbformat_minor": 4
|
|
||||||
}
|
|
File diff suppressed because one or more lines are too long
Loading…
Reference in New Issue