You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
2.5 KiB
2.5 KiB
In [1]:
from weasyprint import HTML, CSS from weasyprint.fonts import FontConfiguration import nltk font_config = FontConfiguration() txt = open('txt/practicalvision.txt').read() words = nltk.word_tokenize(txt) #tokenizing the text tagged_words = nltk.pos_tag(words) #generating grammar tags for the tokens content = '' content += '<h1>Practical Vision, by Jalada</h1>' for word, tag in tagged_words: content += f'<span class="{tag}">{word}</span> ' #for every word, generate an html tag wich includes the grammar tag as class if '.' in word: content += '<br> \n' with open("txt/practical_viz.html", "w") as f: #save as html f.write(f"""<!DOCTYPE html> <html> <head> <meta charset="utf-8"> <link rel="stylesheet" type="text/css" href="grammar_viz.css"> <title></title> </head> <body> {content} </body> """) html = HTML("txt/practical_viz.html") #define as HTML the genereted html file css = CSS(string=''' body{ size: A4; font-family: serif; font-size: 12pt; line-height: 1.4; padding: 3vw; color: rgba(0,0,0,0) } h1{ width: 100%; text-align: center; font-size: 250%; line-height: 1.25; color: black; }''', font_config=font_config) #define CSS for print final A4 pdf html.write_pdf('practical_viz.pdf', stylesheets=[css], font_config=font_config) #generate A4 pdf
In [ ]: