// ARRAY DI PAROLE NORMALE, POI PYTHON -> split, for loop, POS e <span con classe POS in html
// The Myth of the Natural Language %%
// Speech2Design!!
// Welcome to the core-code of the Speech-to-text
// The tool we are gonna use is the "Web Speech API".
// What is an API?
// An API is a set of defined rules that explain how computers or applications communicate with one another.
// APIs sit between an application and the web server, acting as an intermediary layer that processes data transfer between systems.
// GLOBAL VARIABLES
let interimTranscripts = "" ; // Variable for interim results, the Speech-to-text try different worlds before to give us the most correct one.
let allTheInterim = "" ; // Variable to store *all* the interim results
let finalTranscripts = "" ; // Variable for the final transcripts
// To define bridges to the html file:
let speech = document . getElementById ( "result" ) ; // where to print the final result of the recognition
let process = document . getElementById ( "process" ) ; // and here the process, print the current sentence with interim results
// TEXT STORAGE
let textStorage = localStorage . getItem ( "speech" ) ; // This define where to save the results.
speech . innerHTML = textStorage ; // LocalStorage is a type of web storage that allows you to access a local Storage object and store the data in the browser with no expiration date.
finalTranscripts = textStorage ;
// RESET STORAGE
let resetStorage = document . getElementById ( "reset" ) ; // This will just reset, through a button, all the results got by that moment.
// Reset everything!!!
resetStorage . addEventListener ( "click" , ( ) => {
allTheInterim = "" ;
finalTranscripts = "" ;
interimTranscripts = "" ;
speech . innerHTML = "" ;
textStorage = "" ;
localStorage . setItem ( "speech" , "" ) ;
} ) ;
// SAVE FILE
let saveButton = document . getElementById ( "save" ) ; // This will let you save the results in your desktop through a button
saveButton . addEventListener ( "click" , ( ) => {
download ( "speech.txt" , localStorage . getItem ( "speech" ) ) ;
} ) ;
function download ( filename , text ) {
var element = document . createElement ( "a" ) ;
element . setAttribute ( "href" , "data:text/plain;charset=utf-8," + encodeURIComponent ( text ) ) ;
element . setAttribute ( "download" , filename ) ;
element . style . display = "none" ;
document . body . appendChild ( element ) ;
element . click ( ) ;
document . body . removeChild ( element ) ;
}
// START LISTENING
startConverting ( ) ; // Finally, here is where the magic happen.
function startConverting ( ) {
if ( "webkitSpeechRecognition" in window ) {
// Declaring here the API
let speechRecognizer = new webkitSpeechRecognition ( ) || new SpeechRecognition ( ) ;
// And here the settings, like
speechRecognizer . continuous = true ; // if the recognition should continue or stop when you finish to talk
speechRecognizer . interimResults = true ; // if you want also get the interim results
speechRecognizer . lang = "en-US" ; // which language you want to recognize (!!)
speechRecognizer . start ( ) ; // and then start :))
// EVENTS
// ON END
speechRecognizer . onend = function ( ) {
// If the Speech-to-text stops to work, it will be notified in the console...
console . log ( "Speech recognition service disconnected" ) ;
speechRecognizer . start ( ) ; // and then restart itself
} ;
// ON SOUND START
speechRecognizer . onsoundstart = function ( ) {
// When it starts the Speech-to-text, it will be notified in the console
console . log ( "Some sound is being received" ) ;
} ;
// ON ERROR
speechRecognizer . onerror = function ( event ) {
// Log the error
console . log ( event ) ;
} ;
// ON RESULT
speechRecognizer . onresult = function ( event ) {
// Here is where the Speech-to-text show itself on the web page.
interimTranscripts = "" ;
for ( let i = event . resultIndex ; i < event . results . length ; i ++ ) {
let transcript = event . results [ i ] [ 0 ] . transcript ;
if ( event . results [ i ] . isFinal ) {
finalTranscripts += transcript . trim ( ) + "\n" ;
} else {
// There are also shown the interim results and according to their "confidence" (the percentage of how much the word is correct) the color of each word could change
interimTranscripts += transcript ;
allTheInterim += ` <span style="opacity:
$ { event . results [ i ] [ 0 ] . confidence + 0.3 }
" > $ { interimTranscripts } < / s p a n > ` ;
}
}
process . innerHTML = allTheInterim ;
let final =
finalTranscripts + '<span class="interim">' + interimTranscripts + "</span>" ;
speech . innerHTML = final . replaceAll ( "\n" , "<br />" ) ;
window . scrollTo ( 0 , document . body . scrollHeight ) ;
textStorage = final ;
cleanup = localStorage . setItem (
"speech" ,
final . replace ( /(<span class="interim">)/g , "" ) . replace ( /(<\/span>)/g , "" )
) ; // Here is where is stored the recognized text in the Local Storage
} ;
} else {
// Unfortunately this API works only on Chrome...
speech . innerHTML = "At the moment this works only in Chrome, sorry" ;
}
}
// October 2021, copyleft || Kamome and Funix || Speech-to-Derive * The Myth of Natural Language || Roodkapje, Rotterdam