{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# NLTK pos-tagged HTML → PDF" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "import nltk\n", "from weasyprint import HTML, CSS" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "# open the input file\n", "txt = open('manifesto1.txt').read()\n", "words = nltk.word_tokenize(txt)\n", "tagged_words = nltk.pos_tag(words)" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "A0Grid_30opac-01.png manifesto.pdf\n", "language.css\t nltk-frequency-distribution-Copy1.ipynb\n", "manifesto1.txt\t nltk-pos-tagger-Copy1.ipynb\n", "manifesto.css\t nltk-pos-tagging-and-weasyprint.ipynb\n", "manifesto.html\t nltk-similar-words-Copy1.ipynb\n", "manifestonltk.ipynb pattern-search-Copy1.ipynb\n" ] } ], "source": [ "!ls" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "# collect all the pieces of HTML\n", "content = ''\n", "content += '

A Liquid Manifesto

'\n", "\n", "for word, tag in tagged_words:\n", " content += f'{ word } '" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "# write the HTML file\n", "with open(\"manifesto.html\", \"w\") as f:\n", " f.write(f\"\"\"\n", "\n", "\n", " \n", " \n", " \n", "\n", "\n", "{ content }\n", "\n", "\"\"\")" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "# write a CSS file\n", "with open(\"language.css\", \"w\") as f:\n", " f.write(\"\"\"\n", "\n", "@page{\n", " size:A4;\n", " background-color:lightgrey;\n", " margin:10mm;\n", "}\n", ".JJ{\n", " color:red;\n", "}\n", ".VB,\n", ".VBG{\n", " color:magenta;\n", "}\n", ".NN,\n", ".NNP{\n", " color:green;\n", "}\n", ".EX{\n", " color: blue;\n", "}\n", " \"\"\")" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "# If you use @font-face in your stylesheet, you would need Weasyprint's FontConfiguration()\n", "from weasyprint.fonts import FontConfiguration\n", "\n", "font_config = FontConfiguration()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# collect all the files and write the PDF\n", "html = HTML(\"manifesto.html\")\n", "css = CSS(\"manifesto.css\")\n", "html.write_pdf('manifesto.pdf', stylesheets=[css], font_config=font_config)" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", " \n", " " ], "text/plain": [ "" ] }, "execution_count": 1, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Preview your PDF in the notebook!\n", "from IPython.display import IFrame, display\n", "IFrame(\".pdf\", width=900, height=600)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.3" } }, "nbformat": 4, "nbformat_minor": 4 }