# October 2021, copyleft || Kamome and Funix || Speech-to-Derive * The Myth of Natural Language || Roodkapje, Rotterdam # NLTK (Natural Language ToolKit) is a library for Natural Language Process. # We will use it to get the Part Of Speech (POS) of the speech-to-text results. # # What does it mean? # # It works as grammar tagging: for instance, the sentence "Around the clouds" # would have this output: # # [('Around', 'IN'), ('the', 'DT'), ('clouds', 'NN')] # # 'IN' means 'preposition' - 'DT' means 'determiner' - 'NN' means 'noun, common, singular or mass' import nltk # to use NLTK # to create delays :: for having a few seconds to check the console import time # Open the speech-to-text result :: downloaded from the web interface >> with open('../speech.txt', 'r') as speech: # let's import the text text = speech.read() # and make python read it :) print(text) # print it! time.sleep(2) # check it in the console! tokens = nltk.word_tokenize(text) # Tokenize the words :: split each word # Elaborate the Part of Speech! It will create an array, a list pos = nltk.pos_tag(tokens) # print(pos) # print the array! # time.sleep(2) # check it in the console! # To see all the POS tags, open the terminal and copy: # # python3 # import nltk # nltk.help.upenn_tagset() # see also: # https://cheatography.com/deacondesperado/cheat-sheets/nltk-part-of-speech-tags/ # start the layouting :: html + css + paged.js >> # # declare html :: we will fill it in the process with loops # declare the first part of the text for two html files with different CSS html = '' html1 = ''' 📡 💻📘

Title!

Authors!

''' html2 = ''' 📡 💻📘

Title!

Authors!

''' # Process each element of the list for e in pos: # e is the current element, pos is the array to process print(e) if e[0] == '.': # if e is a dot, its class will be 'dot' html += " .
\n" else: # fill the html with each word and assign it as class its POS html += " "+e[0]+" \n" # Close the html text html += '''

''' # to tidy wrong " . " and " ' " position html = html.replace(' .', '.').replace(" '", "'") # Save the files! with open('../2_layout/1.html', 'w', encoding='utf-8') as index: index.write(html1) index.write(html) index.close() with open('../2_layout/2.html', 'w', encoding='utf-8') as index: index.write(html2) index.write(html) index.close()