1 /* 2 * Copyright (C) 2012-2013 DFKI GmbH 3 * Deutsches Forschungszentrum fuer Kuenstliche Intelligenz 4 * German Research Center for Artificial Intelligence 5 * http://www.dfki.de 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a 8 * copy of this software and associated documentation files (the 9 * "Software"), to deal in the Software without restriction, including 10 * without limitation the rights to use, copy, modify, merge, publish, 11 * distribute, sublicense, and/or sell copies of the Software, and to 12 * permit persons to whom the Software is furnished to do so, subject to 13 * the following conditions: 14 * 15 * The above copyright notice and this permission notice shall be included 16 * in all copies or substantial portions of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 21 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 22 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 */ 26 27 28 newMediaPlugin = { 29 /** @memberOf Html5AudioInput# */ 30 initialize: function(callBack, mediaManagerInstance){ 31 32 /** @memberOf Html5AudioInput# */ 33 var _pluginName = 'html5AudioInput'; 34 35 /** 36 * @type mmir.LanguageManager 37 * @memberOf Html5AudioInput# 38 */ 39 var languageManager = require('languageManager'); 40 /** 41 * @type mmir.ConfigurationManager 42 * @memberOf Html5AudioInput# 43 */ 44 var configurationManager = require('configurationManager'); 45 /** 46 * @type mmir.Constants 47 * @memberOf Html5AudioInput# 48 */ 49 var constants = require('constants'); 50 /** 51 * @type mmir.CommonUtils 52 * @memberOf Html5AudioInput# 53 */ 54 var commonUtils = require('commonUtils'); 55 56 /** @memberOf Html5AudioInput# */ 57 function htmlAudioConstructor(){ 58 59 /** 60 * status-flag for indicating, if recording is in progress 61 * @memberOf Html5AudioInput# 62 */ 63 var recording = false; 64 /** @memberOf Html5AudioInput# */ 65 var freeIds = [true]; 66 /** @memberOf Html5AudioInput# */ 67 var hasActiveId = false; 68 /** 69 * @type WebSocket 70 * @memberOf Html5AudioInput# 71 */ 72 var webSocket = null; 73 /** @memberOf Html5AudioInput# */ 74 var nonFunctional = false; 75 /** @memberOf Html5AudioInput# */ 76 var lastBlob = false; 77 /** @memberOf Html5AudioInput# */ 78 var isUseIntermediateResults = false; 79 /** @memberOf Html5AudioInput# */ 80 var inputId = 0; 81 /** 82 * @type AudioContext 83 * @memberOf Html5AudioInput# 84 */ 85 var audio_context=null; 86 /** 87 * @type LocalMediaStream 88 * @memberOf Html5AudioInput# 89 */ 90 var stream = null; 91 /** 92 * @type RecorderExt 93 * @memberOf Html5AudioInput# 94 */ 95 var recorder=null; 96 /** @memberOf Html5AudioInput# */ 97 var totalText = ''; 98 /** 99 * the function that is called on the recognized text that came back from the server 100 * @memberOf Html5AudioInput# 101 */ 102 var textProcessor = function(e,id){}; 103 /** 104 * @type WebWorker 105 * @memberOf Html5AudioInput# 106 */ 107 var silenceDetection = null; 108 // /** 109 // * @type LocalMediaStream 110 // * @memberOf Html5AudioInput# 111 // */ 112 // var silenceDetectionInput = null; 113 /** @memberOf Html5AudioInput# */ 114 var endOfSpeechDetection = false; 115 /** 116 * @type Function 117 * @memberOf Html5AudioInput# 118 */ 119 var currentFailureCallback = null; 120 121 122 /** 123 * for gathering partial ASR results when using startRecord: 124 * @memberOf Html5AudioInput# 125 */ 126 var recordAsrResultCache = []; 127 /** @memberOf Html5AudioInput# */ 128 var recordAsrResultSorter = function(a,b){return a.id - b.id;}; 129 /** @memberOf Html5AudioInput# */ 130 var asrResultCacheToString = function(cache){ 131 var size = cache.length; 132 var sb = new Array(size);//use "StringBuffer" for concatenating partial results 133 for(var i = 0; i < size; ++i){ 134 sb[i] = cache[i].text; 135 } 136 return sb.join(''); 137 }; 138 /** @memberOf Html5AudioInput# */ 139 function findLowestFreeId(){ 140 for (var i=0;i<freeIds.length;i++){ 141 if (freeIds[i]){ 142 freeIds[i] = false; 143 return i; 144 } 145 } 146 freeIds.push(false); 147 return freeIds.length-1; 148 } 149 /** @memberOf Html5AudioInput# */ 150 var recordAsrResultAggregator = function printResult(res,id){ 151 recordAsrResultCache.push({ 152 text: res, 153 id:id 154 }); 155 recordAsrResultCache.sort(recordAsrResultSorter); 156 157 //FIXME debug output: 158 // console.debug( asrResultCacheToString(recordAsrResultCache) ); 159 }; 160 161 /** @memberOf Html5AudioInput# */ 162 function webSocketSend(msg){ 163 if(!webSocket || webSocket.readyState >= 2){//INVALID or CLOSING/CLOSED 164 webSocket = null;//<- avoid close() call in initializer 165 initializeWebSocket( function(){ webSocket.send(msg); }); 166 } 167 else if(webSocket.readyState == 0){//CONNECTING 168 if(webSocket.onInitStack){ 169 webSocket.onInitStack.push(msg); 170 } 171 else { 172 webSocket.onInitStack = [msg]; 173 } 174 } 175 else{ 176 try{//FIXME this should not be necessary... 177 webSocket.send(msg); 178 } catch(err){ 179 console.error(err); 180 } 181 } 182 183 } 184 185 /** initializes the connection to the googleMediator-server, 186 * where the audio will be sent in order to be recognized. 187 * 188 * @memberOf Html5AudioInput# 189 */ 190 function initializeWebSocket(oninit){ 191 if (webSocket){ 192 webSocket.close(); 193 } 194 webSocket = new WebSocket(configurationManager.getString( [_pluginName, "webSocketAddress"] )); 195 196 /** @memberOf Html5AudioInput.webSocket# */ 197 webSocket.onopen = function () { 198 if(oninit){ 199 console.log("invoking on-init callback for websocket"); 200 oninit(); 201 } 202 203 if(this.onInitStack){ 204 for(var i=0, size = this.onInitStack; i < size; ++i){ 205 this.send(this.onInitStack[i]); 206 } 207 delete this.onInitStack; 208 } 209 }; 210 /** @memberOf Html5AudioInput.webSocket# */ 211 webSocket.onmessage = function(e) { 212 if (e.data.substring(0,5) == 'ERROR'){ 213 console.error('Serverside Error '+e.data.substring(6)); 214 return;/////////////////// EARLY EXIT //////////////////// 215 } 216 var id = e.data.substring(0,e.data.indexOf("_")); 217 this.send("clear "+ id); 218 freeIds[id] = true; 219 var jsonText = e.data.substring(e.data.indexOf("_")+1, e.data.length); 220 221 //FIXME debug output: 222 console.debug('HTML5-Speech-Recoginition_received ASR: '+jsonText ); 223 if(jsonText && jsonText.length > 0){//FIXME 224 var jsonResponse = JSON.parse(jsonText); 225 if (jsonResponse.hypotheses.length>0){ 226 if(textProcessor){ 227 textProcessor(jsonResponse.hypotheses[0].utterance, id); 228 } 229 230 //aggregate / gather text-parts into the recordAsrResultCache: 231 recordAsrResultAggregator(jsonResponse.hypotheses[0].utterance, id); 232 } 233 // //ELSE: empty result (nothing was recognized) 234 // // -> still need to notify the the textProcessor 235 // // FIXME really, this is only necessary when stopping the ASR/recording (but would need to recoginze this case...) 236 // else if(textProcessor){ 237 // textProcessor('', id); 238 // } 239 else if(lastBlob || isUseIntermediateResults){ 240 textProcessor(''); 241 } 242 lastBlob = false; 243 } 244 else if(lastBlob || isUseIntermediateResults){ 245 textProcessor(''); 246 } 247 lastBlob = false; 248 }; 249 /** @memberOf Html5AudioInput.webSocket# */ 250 webSocket.onerror = function(e) { 251 252 recorder && recorder.stop(); 253 lastBlob=false; 254 silenceDetection && silenceDetection.postMessage({command: 'cancel'}); 255 256 if (currentFailureCallback){ 257 currentFailureCallback(e); 258 } 259 else { 260 console.error('Websocket Error: '+e + (e.code? ' CODE: '+e.code : '')+(e.reason? ' REASON: '+e.reason : '')); 261 } 262 }; 263 /** @memberOf Html5AudioInput.webSocket# */ 264 webSocket.onclose = function(e) { 265 console.info('Websocket closed!'+(e.code? ' CODE: '+e.code : '')+(e.reason? ' REASON: '+e.reason : '')); 266 }; 267 } 268 269 /** @memberOf Html5AudioInput# */ 270 function createAudioScriptProcessor(audioContext, bufferSize, numberOfInputChannels, numberOfOutputChannels){ 271 if(audioContext.context.createJavaScriptNode){ 272 return audioContext.context.createJavaScriptNode(bufferSize, numberOfInputChannels, numberOfOutputChannels); 273 } 274 else if(audioContext.context.createScriptProcessor){ 275 return audioContext.context.createScriptProcessor(bufferSize, numberOfInputChannels, numberOfOutputChannels); 276 } 277 else { 278 throw Error('Could not create script-processor for AudioContext: context provides no function for generating processor!'); 279 } 280 281 } 282 283 /** 284 * creates a new AudioNode, that communicates sound to the silence detector 285 * @memberOf Html5AudioInput# 286 */ 287 function startNewInputNode(){ 288 // if (silenceDetectionInput) { 289 // silenceDetectionInput.onaudioprocess= function(e){}; 290 // } 291 // var input = audio_context.createMediaStreamSource(stream); 292 // silenceDetectionInput = createAudioScriptProcessor(input, configurationManager.get([_pluginName, "soundPackageSize"]), 2, 2); 293 // silenceDetectionInput.onaudioprocess = function(e){ 294 // if (recording){ 295 // silenceDetection.postMessage({ 296 // command: 'isSilent', 297 // buffer: e.inputBuffer.getChannelData(0) 298 // }); 299 // } 300 // }; 301 // input.connect(silenceDetectionInput); 302 // silenceDetectionInput.connect(input.context.destination); 303 304 } 305 306 /** 307 * creates Silence detector and recorder and connects them to the input stream 308 * @param {LocalMediaStream} inputstream 309 * @memberOf Html5AudioInput# 310 */ 311 function startUserMedia(inputstream){ 312 var buffer = 0; 313 stream = inputstream; 314 var input = audio_context.createMediaStreamSource(stream); 315 var recorderWorkerPath = constants.getWorkerPath()+'recorderWorkerExt.js'; 316 recorder = new Recorder(input, {workerPath: recorderWorkerPath}); 317 318 //FIXME experimental callback/listener for on-start-record -> API may change! 319 var onStartRecordListeners = mediaManagerInstance.getListeners('onallowrecord'); 320 for(var i=0, size = onStartRecordListeners.length; i < size; ++i){ 321 onStartRecordListeners[i](input, audio_context, recorder); 322 } 323 324 325 // silenceDetection = new Worker(configurationManager.get([_pluginName, "silenceDetectorPath"])); 326 // silenceDetection.onmessage = function (e){ 327 // console.log(e.data); 328 // if (e.data=='Send partial!'){ 329 // recorder && recorder.exportWAV(function(blob, id){ 330 // console.log("wav exported"); 331 // if(blob.size>2000000) { 332 // alert("Message too large. You need to pause from time to time."); 333 // console.log("Message too large. You need to pause from time to time."); 334 // } else { 335 // //mediaManagerInstance.playWAV(blob,function(){},function(){alert("could not play blob");}); 336 // if (!hasActiveId) { 337 // 338 // webSocketSend("language "+configurationManager.getLanguage());//FIXME 339 // 340 // inputId = findLowestFreeId(); 341 // hasActiveId = true; 342 // webSocketSend("start "+ inputId); 343 // buffer = configurationManager.get([_pluginName, "silenceBuffer"]); 344 // } else { 345 // buffer = 0; 346 // } 347 // webSocketSend(blob); 348 // } 349 // }, buffer,inputId); 350 // } 351 // if (e.data=='Silence detected!'){ 352 // // send record to server! 353 // recorder && recorder.exportWAV(function(blob, id){ 354 // console.log("wav exported"); 355 // if(blob.size>2000000) { 356 // alert("Message too large. You need to pause from time to time."); 357 // console.log("Message too large. You need to pause from time to time."); 358 // } else { 359 // //mediaManagerInstance.playWAV(blob,function(){},function(){alert("could not play blob");}); 360 // if (!hasActiveId) { 361 // inputId = findLowestFreeId(); 362 // hasActiveId = true; 363 // webSocketSend("start "+ inputId); 364 // buffer = configurationManager.get([_pluginName, "silenceBuffer"]); 365 // } else { 366 // buffer = 0; 367 // } 368 // webSocketSend(blob); 369 // webSocketSend("stop"); 370 // webSocketSend("analyze "+ inputId); 371 // hasActiveId = false; 372 // 373 // //debug output: 374 // console.debug('HTML5-Speech-Recoginition_sent audio to recognizer... '); 375 // 376 // //test 377 // Recorder.forceDownload( blob, "myRecording" + ((inputId<10)?"0":"") + inputId + ".wav" ); 378 // } 379 // }, buffer,inputId); 380 // if (endOfSpeechDetection){ 381 // recorder && recorder.stop(); 382 // silenceDetection && silenceDetection.postMessage({command: 'stop'}); 383 // } 384 // } 385 // if (e.data=='clear'){ 386 // recorder.clear(configurationManager.get([_pluginName, "silenceBuffer"])); 387 // } 388 // }; 389 // silenceDetection.postMessage({ 390 // command: 'init', 391 // config: { 392 // sampleRate: input.context.sampleRate, 393 // noiseTreshold : configurationManager.get([_pluginName, "silenceDetector.noiseTreshold"]), 394 // pauseCount : configurationManager.get([_pluginName, "silenceDetector.pauseCount"]), 395 // resetCount : configurationManager.get([_pluginName, "silenceDetector.resetCount"]) 396 // } 397 // }); 398 399 silenceDetection = recorder.processor; 400 /** 401 * @function 402 * @memberOf Html5AudioInput.recorder# 403 */ 404 recorder.beforeonmessage = function (e){ 405 if(mediaManagerInstance._log.isDebug()) mediaManagerInstance._log.log(e.data); 406 407 var isProcessed = false; 408 if (e.data=='Send partial!'){ 409 410 isProcessed = true; 411 412 recorder && recorder.exportWAV( 413 /** @memberOf Html5AudioInput.recorder# */ 414 function onSendPartial(blob, id){ 415 if(mediaManagerInstance._log.isDebug()) mediaManagerInstance._log.log("wav exported"); 416 // if(blob.size>2000000) { 417 // alert("Message too large. You need to pause from time to time."); 418 // console.log("Message too large. You need to pause from time to time."); 419 // } else { 420 //mediaManagerInstance.playWAV(blob,function(){},function(){alert("could not play blob");}); 421 if (!hasActiveId) { 422 423 webSocketSend("language "+ languageManager.getLanguage());//FIXME use languageManager.getLanguageConfig(_pluginName) instead? 424 425 inputId = findLowestFreeId(); 426 hasActiveId = true; 427 webSocketSend("start "+ inputId); 428 buffer = configurationManager.get([_pluginName, "silenceBuffer"]); 429 } else { 430 buffer = 0; 431 } 432 webSocketSend(blob); 433 // } 434 }, buffer,inputId); 435 } 436 else if (e.data=='Silence detected!'){ 437 438 isProcessed = true; 439 440 // send record to server! 441 recorder && recorder.exportWAV( 442 /** @memberOf Html5AudioInput.recorder# */ 443 function onSilenceDetected(blob, id){ 444 if(mediaManagerInstance._log.isDebug()) mediaManagerInstance._log.log("wav exported"); 445 if(blob.size>2000000) { 446 //TODO trigger callback / listener instead of aler-box 447 alert("Message too large. You need to pause from time to time."); 448 console.log("Message too large. You need to pause from time to time."); 449 recorder.clear(); 450 } else { 451 //mediaManagerInstance.playWAV(blob,function(){},function(){alert("could not play blob");}); 452 if (!hasActiveId) { 453 inputId = findLowestFreeId(); 454 hasActiveId = true; 455 webSocketSend("start "+ inputId); 456 buffer = configurationManager.get([_pluginName, "silenceBuffer"]); 457 } else { 458 buffer = 0; 459 } 460 webSocketSend(blob); 461 webSocketSend("stop"); 462 webSocketSend("analyze "+ inputId); 463 hasActiveId = false; 464 465 //FIXME experimental callback/listener for on-detect-sentence -> API may change! 466 var onDetectSentenceListeners = mediaManagerInstance.getListeners('ondetectsentence'); 467 for(var i=0, size = onDetectSentenceListeners.length; i < size; ++i){ 468 onDetectSentenceListeners[i](blob, inputId); 469 } 470 } 471 }, buffer,inputId); 472 if (endOfSpeechDetection){ 473 recorder && recorder.stop(); 474 silenceDetection && silenceDetection.postMessage({command: 'stop'}); 475 } 476 } 477 else if (e.data=='clear'){ 478 479 isProcessed = true; 480 481 recorder.clear(); 482 } 483 else if(e.data=='Silence Detection initialized' || e.data=='Silence Detection started' || e.data=='Silence Detection stopped'){ 484 485 isProcessed = true; 486 487 } 488 489 490 if(isProcessed === true){ 491 return false; 492 } 493 }; 494 495 /** @memberOf Html5AudioInput.recorder# */ 496 var silenceDetectionConfig = { 497 sampleRate: input.context.sampleRate, 498 noiseTreshold : configurationManager.get([_pluginName, "silenceDetector.noiseTreshold"]), 499 pauseCount : configurationManager.get([_pluginName, "silenceDetector.pauseCount"]), 500 resetCount : configurationManager.get([_pluginName, "silenceDetector.resetCount"]) 501 }; 502 503 //initialize silence-detection: 504 silenceDetection.postMessage({ 505 command: 'initDetection', 506 config: silenceDetectionConfig 507 }); 508 }//END: startUserMedia 509 510 try { 511 // unify the different kinds of HTML5 implementations 512 //window.AudioContext = window.AudioContext || window.webkitAudioContext; 513 navigator.getUserMedia = navigator.getUserMedia || navigator.webkitGetUserMedia || navigator.mozGetUserMedia; 514 //window.URL = window.URL || window.webkitURL; 515 // audio_context = new webkitAudioContext; 516 517 if(typeof AudioContext !== 'undefined'){ 518 audio_context = new AudioContext; 519 } 520 else {//if(typeof webkitAudioContext !== 'undefined'){ 521 audio_context = new webkitAudioContext; 522 } 523 } 524 catch (e) { 525 console.error('No web audio support in this browser! Error: '+(e.stack? e.stack : e)); 526 nonFunctional = true; 527 if (currentFailureCallback) 528 currentFailureCallback(e); 529 } 530 531 if( nonFunctional !== true ) try { 532 initializeWebSocket(); 533 } catch (e) { 534 console.error('Could not reach the voice recognition server!'); 535 nonFunctional = true; 536 if (currentFailureCallback) 537 currentFailureCallback(e); 538 } 539 540 if (nonFunctional) { 541 return {};///////////////////////////// EARLY EXIT ////////////////////////////// 542 } 543 544 // get audioInputStream 545 navigator.getUserMedia({audio: true}, startUserMedia, function(e) {}); 546 547 //invoke the passed-in initializer-callback and export the public functions: 548 return { 549 /** 550 * @public 551 * @memberOf Html5AudioInput.prototype 552 * @see mmir.MediaManager#startRecord 553 */ 554 startRecord: function(successCallback, failureCallback, intermediateResults){ 555 lastBlob = false; 556 for (var k = 0; k < freeIds.length; k++){ 557 webSocketSend("clear "+k); 558 } 559 totalText = ''; 560 isUseIntermediateResults = intermediateResults? true : false; 561 if(intermediateResults){ 562 textProcessor = successCallback; 563 } else { 564 textProcessor = function(e, onEnd){ 565 totalText = totalText + ' '+e; 566 }; 567 } 568 endOfSpeechDetection = false; 569 if (failureCallback){ 570 currentFailureCallback = failureCallback; 571 } 572 silenceDetection && startNewInputNode(); 573 recording=true; 574 recorder && recorder.clear(); 575 recorder && recorder.record(); 576 silenceDetection && silenceDetection.postMessage({command: 'start'}); 577 }, 578 /** 579 * @public 580 * @memberOf Html5AudioInput.prototype 581 * @see mmir.MediaManager#stopRecord 582 */ 583 stopRecord: function(successCallback,failureCallback){//blobHandler){ 584 if (failureCallback){ 585 currentFailureCallback = failureCallback; 586 } 587 setTimeout(function(){ 588 recorder && recorder.stop(); 589 if (successCallback){ 590 /** @memberOf media.plugin.html5AudioInput.prototype */ 591 textProcessor = function(e){ 592 if (lastBlob) { 593 successCallback(totalText+ ' ' + e); 594 } 595 lastBlob = false; 596 }; 597 } 598 lastBlob = true; 599 silenceDetection && silenceDetection.postMessage({command: 'stop'}); 600 }, 100); 601 602 }, 603 /** 604 * @public 605 * @memberOf Html5AudioInput.prototype 606 * @see mmir.MediaManager#recognize 607 */ 608 recognize: function(successCallback,failureCallback){ 609 lastBlob = false; 610 totalText=''; 611 if (successCallback){ 612 textProcessor = successCallback; 613 } 614 if (failureCallback){ 615 currentFailureCallback = failureCallback; 616 } 617 endOfSpeechDetection = true; 618 silenceDetection && startNewInputNode(); 619 recording=true; 620 recorder && recorder.clear(); 621 recorder && recorder.record(); 622 silenceDetection && silenceDetection.postMessage({command: 'start'}); 623 624 }, 625 /** 626 * @public 627 * @memberOf Html5AudioInput.prototype 628 * @see mmir.MediaManager#cancelRecognition 629 */ 630 cancelRecognition: function(successCallback,failureCallback){ 631 if (failureCallback){ 632 currentFailureCallback = failureCallback; 633 } 634 635 recorder && recorder.stop(); 636 lastBlob = true; 637 silenceDetection && silenceDetection.postMessage({command: 'stop'}); 638 if (successCallback){ 639 successCallback(); 640 } 641 } 642 };//END: return 643 };//END: htmlAudioConstructor() 644 645 // the code starts here, loads the necessary scripts and then calls htmlAudioConstructor 646 // commonUtils.loadScript(constants.getWorkerPath()+'recorderWorkerExt.js',function(){ 647 commonUtils.loadScript(constants.getMediaPluginPath()+'recorderExt.js', function(){ 648 callBack(htmlAudioConstructor()); 649 }); 650 // }); 651 }//END: initialize() 652 653 };