You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

2.5 KiB

In [1]:
from weasyprint import HTML, CSS
from weasyprint.fonts import FontConfiguration
import nltk

font_config = FontConfiguration()

txt = open('txt/practicalvision.txt').read()
words = nltk.word_tokenize(txt) #tokenizing the text
tagged_words = nltk.pos_tag(words) #generating grammar tags for the tokens

content = ''
content += '<h1>Practical Vision, by Jalada</h1>'


for word, tag in tagged_words:
    content += f'<span class="{tag}">{word}</span> ' #for every word, generate an html tag wich includes the grammar tag as class
    if '.' in word:
        content += '<br> \n'

with open("txt/practical_viz.html", "w") as f: #save as html
    f.write(f"""<!DOCTYPE html>
<html>
<head>
    <meta charset="utf-8">
    <link rel="stylesheet" type="text/css" href="grammar_viz.css">
    <title></title>
</head>
<body>
{content}
</body>
""")

html = HTML("txt/practical_viz.html") #define as HTML the genereted html file

css = CSS(string=''' 
    body{
        size: A4;
        font-family: serif;
        font-size: 12pt;
        line-height: 1.4;
        padding: 3vw;
        color: rgba(0,0,0,0)
    }
    h1{
        width: 100%;
        text-align: center;
        font-size: 250%;
        line-height: 1.25;
        color: black;
        
    }''', font_config=font_config) #define CSS for print final A4 pdf

html.write_pdf('practical_viz.pdf', stylesheets=[css], font_config=font_config) #generate A4 pdf
In [ ]: