speech2design/1_pythoning/1-2_NLTKing.py

# October 2021, copyleft || Kamome and Funix ||  Speech-to-Derive * The Myth of Natural Language || Roodkapje, Rotterdam


# NLTK (Natural Language ToolKit) is a library for Natural Language Process.
# We will use it to get the Part Of Speech (POS) of the speech-to-text results. 
# 
# What does it mean?
#
# It works as grammar tagging: for instance, the sentence "Around the clouds"
# would have this output: 
# 
#       [('Around', 'IN'), ('the', 'DT'), ('clouds', 'NN')]
#  
# 'IN' means 'preposition' - 'DT' means 'determiner' - 'NN' means 'noun, common, singular or mass'

   
import time                             # to create delays :: for having a few seconds to check the console
import nltk                             # to use NLTK

                                        # Open the speech-to-text result :: downloaded from the web interface >>

with open('../speech.txt','r') as speech:  # let's import the text
    text = speech.read()                # and make python read it :)
    print(text)                         # print it!

time.sleep(2)                           # check it in the console!

    
text = text.replace('<span class="interim"></span>','').replace('\n','. ') # delete this from the results

tokens = nltk.word_tokenize(text)       # Tokenize the words :: split each word
pos = nltk.pos_tag(tokens)              # Elaborate the Part of Speech! It will create an array, a list
print(pos)                              # print the array!

time.sleep(2)                           # check it in the console!


                                        # To see all the POS tags, open the terminal and copy:
                                        #       
                                        #       python3
                                        #       import nltk
                                        #       nltk.help.upenn_tagset()    


                                        # start the layouting :: html + css + paged.js >>  
                                        #   
                                        # declare html :: we will fill it in the process with loops                 
                                        # declare the first part of the text for two html files with different CSS

html = ''

html1 = '''
<html>
<head>
    <meta name="viewport" content="width=device-width, initial-scale=1">
    <link rel="stylesheet" href="./pagedjs_files/interface.css">
    <script src="./pagedjs_files/paged.polyfill.js"></script>
    <link rel="stylesheet" href="./styles/1.css">
    <meta charset="utf-8"/>
    <title>📡 💻📘</title>
</head>
<body>
'''

html2 = '''
<html>
<head>
    <meta name="viewport" content="width=device-width, initial-scale=1">
    <link rel="stylesheet" href="./pagedjs_files/interface.css">
    <script src="./pagedjs_files/paged.polyfill.js"></script>
    <link rel="stylesheet" href="./styles/2.css">
    <meta charset="utf-8"/>
    <title>📡 💻📘</title>
</head>
<body>
'''

                                        
                                        # Process each element of the list
    
for e in pos:                           # e is the current element, pos is the array to process

    if e[0] == '.':                     # if e is a dot, its class will be 'dot'
        html += "   <span class='dot'>.</span><br> \n"

    else:                               # fill the html with each word and assign it as class its POS
        html += "   <span class='"+e[1]+"'> "+e[0]+" </span>\n"
        
        
                                        # Close the html text
html += '''</body>                  
</html>'''

html = html.replace(' .','.').replace(" '", "'") # to tidy wrong " . " and " ' " position

                                            
                                        # Save the <html> files!

with open('../2_layout/1.html','w') as index:   
    index.write(html1)
    index.write(html)

with open('../2_layout/2.html','w') as index:
    index.write(html2)
    index.write(html)
readme ++ 3 years ago			`# October 2021, copyleft \|\| Kamome and Funix \|\| Speech-to-Derive * The Myth of Natural Language \|\| Roodkapje, Rotterdam`


comments and tidy 3 years ago			`# NLTK (Natural Language ToolKit) is a library for Natural Language Process.`
			`# We will use it to get the Part Of Speech (POS) of the speech-to-text results.`
			`#`
			`# What does it mean?`
			`#`
			`# It works as grammar tagging: for instance, the sentence "Around the clouds"`
			`# would have this output:`
			`#`
			`# [('Around', 'IN'), ('the', 'DT'), ('clouds', 'NN')]`
			`#`
			`# 'IN' means 'preposition' - 'DT' means 'determiner' - 'NN' means 'noun, common, singular or mass'`


			`import time # to create delays :: for having a few seconds to check the console`
			`import nltk # to use NLTK`

			`# Open the speech-to-text result :: downloaded from the web interface >>`

tidy 3 years ago			`with open('../speech.txt','r') as speech: # let's import the text`
comments and tidy 3 years ago			`text = speech.read() # and make python read it :)`
			`print(text) # print it!`

			`time.sleep(2) # check it in the console!`


			`text = text.replace('<span class="interim"></span>','').replace('\n','. ') # delete this from the results`

			`tokens = nltk.word_tokenize(text) # Tokenize the words :: split each word`
			`pos = nltk.pos_tag(tokens) # Elaborate the Part of Speech! It will create an array, a list`
			`print(pos) # print the array!`

			`time.sleep(2) # check it in the console!`



			`# To see all the POS tags, open the terminal and copy:`
			`#`
			`# python3`
			`# import nltk`
			`# nltk.help.upenn_tagset()`




			`# start the layouting :: html + css + paged.js >>`
			`#`
			`# declare html :: we will fill it in the process with loops`
			`# declare the first part of the text for two html files with different CSS`

			`html = ''`

			`html1 = '''`
			`<html>`
			`<head>`
			`<meta name="viewport" content="width=device-width, initial-scale=1">`
			`<link rel="stylesheet" href="./pagedjs_files/interface.css">`
			`<script src="./pagedjs_files/paged.polyfill.js"></script>`
			`<link rel="stylesheet" href="./styles/1.css">`
			`<meta charset="utf-8"/>`
			`<title>📡 💻📘</title>`
			`</head>`
			`<body>`
			`'''`

			`html2 = '''`
			`<html>`
			`<head>`
			`<meta name="viewport" content="width=device-width, initial-scale=1">`
			`<link rel="stylesheet" href="./pagedjs_files/interface.css">`
			`<script src="./pagedjs_files/paged.polyfill.js"></script>`
			`<link rel="stylesheet" href="./styles/2.css">`
			`<meta charset="utf-8"/>`
			`<title>📡 💻📘</title>`
			`</head>`
			`<body>`
			`'''`


			`# Process each element of the list`

			`for e in pos: # e is the current element, pos is the array to process`

			`if e[0] == '.': # if e is a dot, its class will be 'dot'`
			`html += " <span class='dot'>.</span><br> \n"`

			`else: # fill the html with each word and assign it as class its POS`
tidy 3 years ago			`html += " <span class='"+e[1]+"'> "+e[0]+" </span>\n"`
comments and tidy 3 years ago

			`# Close the html text`
			`html += '''</body>`
			`</html>'''`

			`html = html.replace(' .','.').replace(" '", "'") # to tidy wrong " . " and " ' " position`


			`# Save the <html> files!`

			`with open('../2_layout/1.html','w') as index:`
			`index.write(html1)`
			`index.write(html)`

			`with open('../2_layout/2.html','w') as index:`
			`index.write(html2)`
readme ++ 3 years ago			`index.write(html)`