1 /* 2 * Copyright (C) 2012-2013 DFKI GmbH 3 * Deutsches Forschungszentrum fuer Kuenstliche Intelligenz 4 * German Research Center for Artificial Intelligence 5 * http://www.dfki.de 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a 8 * copy of this software and associated documentation files (the 9 * "Software"), to deal in the Software without restriction, including 10 * without limitation the rights to use, copy, modify, merge, publish, 11 * distribute, sublicense, and/or sell copies of the Software, and to 12 * permit persons to whom the Software is furnished to do so, subject to 13 * the following conditions: 14 * 15 * The above copyright notice and this permission notice shall be included 16 * in all copies or substantial portions of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 21 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 22 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 */ 26 27 newMediaPlugin = { 28 29 /** @memberOf WebkitAudioInput# */ 30 initialize: function(callBack, mediaManager, logvalue){ 31 32 33 ////////////////// START MIC-LEVELS: analyzer for microphone-levels-changed & listener mechanism //////////// 34 /** 35 * @type navigator 36 * @memberOf WebkitAudioInput# 37 */ 38 var html5Navigator = navigator; 39 /** 40 * @type AudioContext 41 * @memberOf WebkitAudioInput# 42 */ 43 var _audioContext; 44 /** @memberOf WebkitAudioInput# */ 45 var nonFunctional = false; 46 try { 47 // unify the different kinds of HTML5 implementations 48 //window.AudioContext = window.AudioContext || window.webkitAudioContext; 49 html5Navigator.__getUserMedia = html5Navigator.getUserMedia || html5Navigator.webkitGetUserMedia || html5Navigator.mozGetUserMedia; 50 //window.URL = window.URL || window.webkitURL; 51 // _audioContext = new webkitAudioContext; 52 53 if(typeof AudioContext !== 'undefined'){ 54 _audioContext = new AudioContext; 55 } 56 else {//if(typeof webkitAudioContext !== 'undefined'){ 57 _audioContext = new webkitAudioContext; 58 } 59 } 60 catch (e) { 61 console.error('No web audio support in this browser! Error: '+(e.stack? e.stack : e)); 62 nonFunctional = true; 63 } 64 /** 65 * Switch for generally disabling "microphone-level changed" calculations 66 * (otherwise calculation becomes active/inactive depending on whether or 67 * not a listener is registered to event {@link #MIC_CHANGED_EVT_NAME}) 68 * 69 * <p> 70 * TODO make this configurable?... 71 * 72 * @memberOf WebkitAudioInput# 73 */ 74 var isMicLevelsEnabled = true; 75 /** MIC-LEVELS: the maximal value to occurs in the input data 76 * <p> 77 * FIXME verify / check if this is really the maximal possible value... 78 * @contant 79 * @memberOf WebkitAudioInput# 80 */ 81 var MIC_MAX_VAL = 2;// 82 /** MIC-LEVELS: the maximal value for level changes (used for normalizing change-values) 83 * @constant 84 * @memberOf WebkitAudioInput# */ 85 var MIC_MAX_NORM_VAL = -90;// -90 dB ... ??? 86 87 /** MIC-LEVELS: normalization factor for values: adjust value, so that is 88 * more similar to the results from the other input-modules 89 * @constant 90 * @memberOf WebkitAudioInput# */ 91 var MIC_NORMALIZATION_FACTOR = 3.5;//adjust value, so that is more similar to the results from the other input-modules 92 /** MIC-LEVELS: time interval / pauses between calculating level changes 93 * @constant 94 * @memberOf WebkitAudioInput# */ 95 var MIC_QUERY_INTERVALL = 128; 96 /** MIC-LEVELS: threshold for calculating level changes 97 * @constant 98 * @memberOf WebkitAudioInput# */ 99 var LEVEL_CHANGED_THRESHOLD = 1.5; 100 /** 101 * MIC-LEVELS: Name for the event that is emitted, when the input-mircophone's level change. 102 * 103 * @private 104 * @constant 105 * @memberOf WebkitAudioInput# 106 */ 107 var MIC_CHANGED_EVT_NAME = 'miclevelchanged'; 108 109 /** 110 * HELPER normalize the levels-changed value to MIC_MAX_NORM_VAL 111 * @deprecated currently un-used 112 * @memberOf WebkitAudioInput# 113 */ 114 var normalize = function (v){ 115 return MIC_MAX_NORM_VAL * v / MIC_MAX_VAL; 116 }; 117 /** 118 * HELPER calculate the RMS value for list of audio values 119 * @deprecated currently un-used 120 * @memberOf WebkitAudioInput# 121 */ 122 var getRms = function (buffer, size){ 123 if(!buffer || size === 0){ 124 return 0; 125 } 126 127 var sum = 0, i = 0; 128 for(; i < size; ++i){ 129 sum += buffer[i]; 130 } 131 var avg = sum / size; 132 133 var meansq = 0; 134 for(i=0; i < size; ++i){ 135 meansq += Math.pow(buffer[i] - avg, 2); 136 } 137 138 var avgMeansq = meansq / size; 139 140 return Math.sqrt(avgMeansq); 141 }; 142 /** 143 * HELPER determine if a value has change in comparison with a previous value 144 * (taking the LEVEL_CHANGED_THRESHOLD into account) 145 * @memberOf WebkitAudioInput# 146 */ 147 var hasChanged = function(value, previousValue){ 148 var res = typeof previousValue === 'undefined' || Math.abs(value - previousValue) > LEVEL_CHANGED_THRESHOLD; 149 return res; 150 }; 151 /** 152 * @type LocalMediaStream 153 * @memberOf WebkitAudioInput# 154 * @see https://developer.mozilla.org/en-US/docs/Web/API/MediaStream_API#LocalMediaStream 155 */ 156 var _currentInputStream; 157 /** 158 * @type AnalyserNode 159 * @memberOf WebkitAudioInput# 160 * @see https://developer.mozilla.org/en-US/docs/Web/API/AnalyserNode 161 */ 162 var _audioAnalyzer; 163 164 /** 165 * HELPER callback for getUserMedia: creates the microphone-levels-changed "analyzer" 166 * and fires mic-levels-changed events for registered listeners 167 * @param {LocalMediaStream} inputstream 168 * @memberOf WebkitAudioInput# 169 */ 170 function _startUserMedia(inputstream){ 171 console.log('webkitAudioInput: start analysing audio input...'); 172 var buffer = 0; 173 var prevDb; 174 175 //we only need one analysis: if there is one active from a previous start 176 // -> do stop it, before storing the new inputstream in _currentInputStream 177 if(_currentInputStream){ 178 _stopAudioAnalysis(); 179 } 180 181 _currentInputStream = inputstream; 182 183 if(_isAnalysisCanceled === true){ 184 //ASR was stopped, before the audio-stream for the analysis became available: 185 // -> stop analysis now, since ASR is not active (and close the audio stream without doing anything) 186 _stopAudioAnalysis(); 187 return;//////////////// EARLY EXIT ////////////////////// 188 } 189 190 var inputNode = _audioContext.createMediaStreamSource(_currentInputStream); 191 192 ///////////////////// VIZ /////////////////// 193 // recorder = recorderInstance; 194 195 _audioAnalyzer = _audioContext.createAnalyser(); 196 _audioAnalyzer.fftSize = 2048; 197 // _audioAnalyzer.smoothingTimeConstant = 0.9;//NOTE: value 1 will smooth everything *completely* -> do not use 1 198 inputNode.connect(_audioAnalyzer); 199 200 // audioRecorder = new Recorder( _currentInputStream ); 201 // recorder = new Recorder(_currentInputStream, {workerPath: recorderWorkerPath}); 202 203 // updateAnalysers(); 204 205 var updateAnalysis = function(){ 206 if(!_currentInputStream){ 207 return; 208 } 209 210 var size = _audioAnalyzer.fftSize;//.frequencyBinCount;// 211 var data = new Uint8Array(size);//new Float32Array(size);// 212 _audioAnalyzer.getByteTimeDomainData(data);//.getFloatFrequencyData(data);//.getByteFrequencyData(data);//.getFloatTimeDomainData(data);// 213 214 var min = 32768; 215 var max = -32768; 216 var total = 0; 217 for(var i=0; i < size; ++i){ 218 var datum = Math.abs(data[i]); 219 if (datum < min) 220 min = datum; 221 if (datum > max) 222 max = datum; 223 224 total += datum; 225 } 226 var avg = total / size; 227 // console.info('audio ['+min+', '+max+'], avg '+avg); 228 229 // var rms = getRms(data, size); 230 // var db = 20 * Math.log(rms);// / 0.0002); 231 232 // console.info('audio rms '+rms+', db '+db); 233 234 /* RMS stands for Root Mean Square, basically the root square of the 235 * average of the square of each value. */ 236 var rms = 0, val; 237 for (var i = 0; i < data.length; i++) { 238 val = data[i] - avg; 239 rms += val * val; 240 } 241 rms /= data.length; 242 rms = Math.sqrt(rms); 243 244 var db = rms; 245 // console.info('audio rms '+rms); 246 247 //actually fire the change-event on all registered listeners: 248 if(hasChanged(db, prevDb)){ 249 // console.info('audio rms changed: '+prevDb+' -> '+db); 250 prevDb = db; 251 252 //adjust value 253 db *= MIC_NORMALIZATION_FACTOR; 254 255 mediaManager._fireEvent(MIC_CHANGED_EVT_NAME, [db]); 256 } 257 258 259 if(_isAnalysisActive && _currentInputStream){ 260 setTimeout(updateAnalysis, MIC_QUERY_INTERVALL); 261 } 262 }; 263 updateAnalysis(); 264 ///////////////////// VIZ /////////////////// 265 266 } 267 268 269 /** internal flag: is/should mic-levels analysis be active? 270 * @memberOf WebkitAudioInput# 271 */ 272 var _isAnalysisActive = false; 273 /** internal flag: is/should mic-levels analysis be active? 274 * @memberOf WebkitAudioInput# 275 */ 276 var _isAnalysisCanceled = false; 277 /** HELPER start-up mic-levels analysis (and fire events for registered listeners) 278 * @memberOf WebkitAudioInput# 279 */ 280 function _startAudioAnalysis(){ 281 if(_isAnalysisActive === true){ 282 return; 283 } 284 _isAnalysisCanceled = false; 285 _isAnalysisActive = true; 286 html5Navigator.__getUserMedia({audio: true}, _startUserMedia, function(e) { 287 console.error("webkitAudioInput: failed _startAudioAnalysis, error for getUserMedia ", e); 288 _isAnalysisActive = false; 289 }); 290 } 291 292 /** HELPER stop mic-levels analysis 293 * @memberOf WebkitAudioInput# 294 */ 295 function _stopAudioAnalysis(){ 296 if(_currentInputStream){ 297 var stream = _currentInputStream; 298 _currentInputStream = void(0); 299 //DISABLED: MediaStream.stop() is deprecated -> instead: stop all tracks individually 300 // stream.stop(); 301 try{ 302 if(stream.active){ 303 var list = stream.getTracks(), track; 304 for(var i=list.length-1; i >= 0; --i){ 305 track = list[i]; 306 if(track.readyState !== 'ended'){ 307 track.stop(); 308 } 309 } 310 } 311 } catch (err){ 312 console.log('webkitAudioInput: a problem occured while stopping audio input analysis: '+err); 313 } 314 _isAnalysisCanceled = false; 315 _isAnalysisActive = false; 316 317 console.log('webkitAudioInput: stopped analysing audio input!'); 318 } 319 else if(_isAnalysisActive === true){ 320 console.warn('webkitAudioInput: stopped analysing audio input process, but no valid audio stream present!'); 321 _isAnalysisCanceled = true; 322 _isAnalysisActive = false; 323 } 324 } 325 326 /** HELPER determine whether to start/stop audio-analysis based on 327 * listeners getting added/removed on the MediaManager 328 * @memberOf WebkitAudioInput# 329 */ 330 function _updateMicLevelAnalysis(actionType, handler){ 331 332 //start analysis now, if necessary 333 if( actionType === 'added' && 334 recording === true && 335 _isAnalysisActive === false && 336 isMicLevelsEnabled === true 337 ){ 338 _startAudioAnalysis(); 339 } 340 //stop analysis, if there is no listener anymore 341 else if(actionType === 'removed' && 342 _isAnalysisActive === true && 343 mediaManager.hasListeners(MIC_CHANGED_EVT_NAME) === false 344 ){ 345 _stopAudioAnalysis(); 346 } 347 } 348 //observe changes on listener-list for mic-levels-changed-event 349 mediaManager._addListenerObserver(MIC_CHANGED_EVT_NAME, _updateMicLevelAnalysis); 350 351 352 ////////////////// START MIC-LEVELS: analyzer for microphone-levels-changed & listener mechanism //////////// 353 354 /** @memberOf WebkitAudioInput# */ 355 var _pluginName = 'webkitAudioInput'; 356 /** 357 * @type mmir.LanguageManager 358 * @memberOf WebkitAudioInput# 359 */ 360 var languageManager = require('languageManager'); 361 362 //detect feature avaibility: 363 if(typeof webkitSpeechRecognition === 'undefined'){ 364 365 //... browser does NOT support this speech-input-module: create warning message and dummy functions for the MediaManager 366 367 console.warn('Could not load webkitAudioInput plugin: API webkitSpeechRecognition is not available!'); 368 369 //FIXME this error message is a quick an dirty hack -- there should be a more general way for defining the error message... 370 var msg = 'Unfortunately, your internet browser' 371 +'\ndoes not support speech input.' 372 +'\n\nPlease use Google Chrome,' 373 +'\nif you want to use speech input.' 374 +'\n\nhttp://www.google.com/chrome'; 375 376 //invoke the passed-in initializer-callback and export the public functions: 377 callBack( { 378 /** 379 * @public 380 * @memberOf WebkitAudioInput.dummy.prototype 381 * @see mmir.MediaManager#startRecord 382 */ 383 startRecord: function(successCallback, failureCallback){ 384 alert(msg); 385 if(failureCallback) 386 failureCallback(); 387 } 388 /** 389 * @public 390 * @memberOf WebkitAudioInput.dummy.prototype 391 * @see mmir.MediaManager#startRecord 392 */ 393 , stopRecord: function(successCallback,failureCallback){ 394 alert(msg); 395 if(failureCallback) 396 failureCallback(); 397 } 398 /** 399 * @public 400 * @memberOf WebkitAudioInput.dummy.prototype 401 * @see mmir.MediaManager#startRecord 402 */ 403 , recognize: function(successCallback,failureCallback){ 404 alert(msg); 405 if(failureCallback) 406 failureCallback(); 407 } 408 /** 409 * @public 410 * @memberOf WebkitAudioInput.dummy.prototype 411 * @see mmir.MediaManager#startRecord 412 */ 413 , cancelRecognition: function(successCallBack,failureCallBack){ 414 alert(msg); 415 if(failureCallback) 416 failureCallback(); 417 } 418 }); 419 return;////////////////////// EARLY EXIT /////////////////////////// 420 } 421 422 /** 423 * @constant 424 * @memberOf WebkitAudioInput# */ 425 var EVENT_RESULT_FIELD = "transcript"; 426 /** 427 * @constant 428 * @memberOf WebkitAudioInput# */ 429 var EVENT_SCORE_FIELD = "confidence"; 430 /** 431 * @constant 432 * @memberOf WebkitAudioInput# */ 433 var UNSTABLE_LIMIT = 0.01; 434 435 /** 436 * Result types (returned by the native/Cordova plugin) 437 * 438 * @type Enum 439 * @constant 440 * @memberOf WebkitAudioInput# 441 */ 442 var RESULT_TYPES = { 443 "FINAL": "FINAL", 444 "INTERIM": "INTERIM", 445 "INTERMEDIATE": "INTERMEDIATE", 446 "RECOGNITION_ERROR": "RECOGNITION_ERROR", 447 "RECORDING_BEGIN": "RECORDING_BEGIN", 448 "RECORDING_DONE": "RECORDING_DONE" 449 }; 450 451 /** @type webkitSpeechRecognition 452 * @memberOf WebkitAudioInput# */ 453 var recognition = new webkitSpeechRecognition(); 454 /** @type Function 455 * @memberOf WebkitAudioInput# */ 456 var currentSuccessCallback; 457 /** @type Function 458 * @memberOf WebkitAudioInput# */ 459 var currentFailureCallback; 460 /** @memberOf WebkitAudioInput# */ 461 var final_recognition_result = ""; 462 /** @type Function 463 * @memberOf WebkitAudioInput# */ 464 var default_error_function; 465 /** @type Function 466 * @memberOf WebkitAudioInput# */ 467 var helper_error_handler; 468 469 470 // flags 471 472 /** @memberOf WebkitAudioInput# */ 473 var recording = false; 474 /** @memberOf WebkitAudioInput# */ 475 var active = false; 476 /** @memberOf WebkitAudioInput# */ 477 var aborted = false; 478 // var restart_counter = 0; 479 /** @memberOf WebkitAudioInput# */ 480 var intermediate_results = false; 481 482 483 /** @memberOf WebkitAudioInput# */ 484 // loglevel - shows: 485 // 0 - errors 486 // 1 - warning, errors 487 // 2 - info, warning, errors 488 // 3 - logs, info, warning, errors 489 // 4 - debugs, logs, info, warning, errors 490 var loglevel = 4;//FIXME logvalue | 0; 491 492 /** 493 * field for storing the previous (main) recontion result 494 * (this is used for calculating "unstable" parts, see {@link #helper_extract_results}) 495 * @type String 496 * @memberOf WebkitAudioInput# 497 */ 498 var _prevResult; 499 /** 500 * create callback-arguments for ASR-result callback: 501 * 502 * @returns Array with 503 * [ String result, 504 * Number score, 505 * String type ["INTERIM" | "FINAL" ], 506 * Array<Results> alternatives, //OPTIONAL 507 * String unstable //OPTIONAL (NOTE: not supported by this plugin, i.e. webkitSpeechInput) 508 * ] 509 * 510 * @memberOf WebkitAudioInput# 511 */ 512 var helper_extract_results = function(eventResultsObject){ 513 var res = []; 514 var size = eventResultsObject.length; 515 516 if(size < 1){ 517 return res; 518 } 519 520 //ASSERT size >= 1 521 522 var result = eventResultsObject[0][EVENT_RESULT_FIELD]; 523 // [0]: main result 524 res.push(result); 525 // [1]: main confidence score 526 res.push(eventResultsObject[0][EVENT_SCORE_FIELD]); 527 528 // [2]: result type 529 if(eventResultsObject.isFinal){ 530 res.push(recording? RESULT_TYPES.INTERMEDIATE : RESULT_TYPES.FINAL); 531 } 532 else { 533 res.push(RESULT_TYPES.INTERIM); 534 } 535 536 // [3]: array with alternative results 537 if(size > 1){ 538 var altRes = []; 539 for(var i=1; i < size; ++i){ 540 altRes.push({ 541 result: eventResultsObject[i][EVENT_RESULT_FIELD], 542 score: eventResultsObject[i][EVENT_SCORE_FIELD] 543 }); 544 } 545 res.push(altRes); 546 } 547 else { 548 549 //if no alternative results: add undefined-entry: 550 res.push(void(0)); 551 } 552 553 // [4]: UNSTABLE part for main result 554 555 //NOTE "unstable" part of ASR result is not supported by webkitSpeechInput... 556 //HACK: detect unstable for non-final results: 557 // * set to unstable if confidence is lower than UNSTABLE_LIMIT 558 // * otherwise (ie. result is basically STABLE), try 559 // to detect an UNSTABLE part using the previous result 560 // (if previous result contained more than the current stable one...) 561 if( ! eventResultsObject.isFinal){ 562 563 //set to unstable, if result has a LOW score 564 if(res[1] <= UNSTABLE_LIMIT){ 565 //add result as "unstable": 566 res.push(result); 567 //set main-result to empty 568 res[0] = ""; 569 } 570 //try to recover unstable part: 571 else if(res[1] > UNSTABLE_LIMIT && _prevResult && _prevResult.length > length){ 572 573 //try to detect stable part: detect matching prefix with previous result 574 var prefixIndex = 0; 575 var size = result.length; 576 var ch = result.charAt(prefixIndex).toLowerCase(); 577 while(size > prefixIndex && ch === _prevResult.charAt(prefixIndex).toLowerCase()){ 578 ch = result.charAt(++prefixIndex).toLowerCase(); 579 } 580 581 //-> use REST from matching prefix as UNSTABLE text 582 //NOTE: use simplification (i.e. simpler code) ignore matches <= 1, ie. prefixIndex > 0 583 if(prefixIndex > 0 && prefixIndex + 1 < _prevResult.length){ 584 585 //add REST to detected PREFIX as "unstable": 586 res.push(_prevResult.substring(prefixIndex+1)); 587 588 console.info('found unstable ASR part: "'+_prevResult.substring(prefixIndex+1)+'"'); 589 } 590 else { 591 // -> we have relatively stable result, that has no unstable postfix -> reset _prevResult; 592 _prevResult = void(0); 593 } 594 } 595 596 //remember current (main) result STRING, if it "adds information": 597 if(!_prevResult || result.length >= _prevResult.length){ 598 _prevResult = result; 599 } 600 601 } 602 else { 603 //if FINAL, reset field for previous-result 604 _prevResult = void(0); 605 } 606 607 608 return res; 609 }; 610 611 /** 612 * Counter for error-in-a-row: 613 * each time an error is encountered, this counter is increased. 614 * On starting/canceling, or on an internal success/result callback, 615 * the counter is reset. 616 * 617 * Thus, this counter keeps track how many times in a row 618 * the (internal) error-callback was triggered. 619 * 620 * NOTE: this is currently used, to try restarting <code>max_error_retry</code> 621 * times the ASR, even on "critical" errors (during repeat-mode). 622 * 623 * @see #max_error_retry 624 * 625 * @memberOf AndroidAudioInput# 626 */ 627 var error_counter = 0; 628 629 /** 630 * Maximal number of errors-in-a-row for trying to restart 631 * recognition in repeat-mode. 632 * 633 * @see #error_counter 634 * 635 * @memberOf AndroidAudioInput# 636 * @default 5 637 */ 638 var max_error_retry = 5; 639 640 /** 641 * default helper for error-events: 642 * 643 * determines, if RESTART is allowed/possible (in case of RECORDing mode), 644 * AND otherwise triggers the current failure-callbacks. 645 * 646 * SIDE-EFFECTS: sets private field aborted:=true if RESTART is NOT possible. 647 * 648 * @returns {Boolean} true, if the function could process the error 649 * (i.e. return false for unknown errors; these should be handled by 650 * the invoking code of this helper function) 651 * 652 * @memberOf WebkitAudioInput# 653 */ 654 helper_error_handler = function(event) { 655 656 var type = event.error; 657 658 switch(type){ 659 case "no-speech": 660 if (loglevel >= 1){ 661 console.info("[webkitAudioInput.Warn] event " + type); 662 } 663 // no errorcallback, just restart (if in RECORD mode)... 664 return true; 665 666 //////////////// 667 // "serious" errors: cannot not automatically restart... 668 669 // Audio capture failed. 670 case "audio-capture": 671 // if analysing-audio for microphone levels (via getUserMedia) 672 // is enabled, the error may have been caused by the browser/device 673 // due to the fact, that it does not allow multiple/parallel access 674 // to the microphone resource... 675 // -> try once again, but with disabled analysing-audio feature: 676 if(isMicLevelsEnabled === true){ 677 isMicLevelsEnabled = false; 678 return true; 679 } 680 681 // ...otherwise: do not restart! 682 683 // Some network communication that was required to complete the recognition failed. 684 case "network": 685 // do not restart! 686 687 //for "serious errors": if errors-in-a-row-counter is under the limit, DO try to restart 688 if(error_counter < max_error_retry){ 689 return true; 690 } 691 692 // Speech input was aborted somehow, maybe by some user-agent-specific behavior such as UI that lets the user cancel speech input. 693 case "aborted": 694 // do not restart! 695 696 // The user agent is not allowing any speech input to occur for reasons of security, privacy or user preference. 697 case "not-allowed": 698 // user denied access -> do not automatically restart! 699 700 // The user agent is not allowing the web application requested speech service, but would allow some speech service, to be used either because the user agent doesn't support the selected one or because of reasons of security, privacy or user preference. 701 case "service-not-allowed": 702 // user agent denied access -> do not automatically restart! 703 704 // There was an error in the speech recognition grammar or semantic tags, or the grammar format or semantic tag format is unsupported. 705 case "bad-grammar": 706 // do not automatically restart! 707 708 // The language was not supported. 709 case "language-not-supported": 710 // do not automatically restart!, change the language 711 aborted = true; 712 if (loglevel >= 1){ 713 console.warn("[webkitAudioInput.Warn] event " + type); 714 } 715 currentFailureCallback && currentFailureCallback(event.error); 716 return true; 717 718 default: 719 //for unknown errors: return false 720 break; 721 722 }//END: switch 723 724 return false; 725 }; //END: helper_error_handler(event){... 726 727 /** @memberOf WebkitAudioInput# */ 728 default_error_function = function(event){ 729 730 ++error_counter; 731 732 // if (helper_error_handler.hasOwnProperty(event.error)){ 733 // helper_error_handler[event.error](event); 734 // } else { 735 if( ! helper_error_handler(event) ){ 736 737 if (currentFailureCallback){ 738 currentFailureCallback(event.error); 739 } else { 740 console.error("[webkitAudioInput.Error] event " + event.error); 741 } 742 } 743 }; 744 745 // set remaining event-handler functions 746 747 /** 748 * Side-Effects: 749 * 750 * sets recognition-status to "active" 751 * 752 * starts audio-analysis (if listeners are registered for mic-levels-changed event) 753 * 754 * @memberOf WebkitAudioInput.recognition# 755 */ 756 recognition.onaudiostart = function(event){ 757 active = true; 758 // if audio can start, then we have been successful in starting the voice recognition 759 // so: reset counter 760 // TODO: check if this is really correct 761 // restart_counter=0; 762 if (loglevel >= 4){ 763 console.debug("[webkitAudioInput.Debug] Audio START"); 764 console.debug("[webkitAudioInput.Debug] active: " + active); 765 } 766 767 if(isMicLevelsEnabled === true){ 768 _startAudioAnalysis(); 769 } 770 }; 771 /** @memberOf WebkitAudioInput.recognition# */ 772 recognition.onspeechstart = function(event){ 773 if (loglevel >= 4){ 774 console.debug("[webkitAudioInput.Debug] Speech START"); 775 } 776 }; 777 /** @memberOf WebkitAudioInput.recognition# */ 778 recognition.onsoundstart = function(event){ 779 if (loglevel >= 4){ 780 console.debug("[webkitAudioInput.Debug] Sound START"); 781 } 782 }; 783 /** @memberOf WebkitAudioInput.recognition# */ 784 recognition.onaudioend = function(event){ 785 active = false; 786 if (loglevel >= 4){ 787 console.debug("[webkitAudioInput.Debug] Audio END"); 788 } 789 790 // _stopAudioAnalysis(); MOVED to onend: in some cases, onaudioend will not be triggered, but onend will always get triggered 791 }; 792 /** @memberOf WebkitAudioInput.recognition# */ 793 recognition.onspeechend = function(event){ 794 if (loglevel >= 4){ 795 console.debug("[webkitAudioInput.Debug] Speech END"); 796 } 797 }; 798 /** @memberOf WebkitAudioInput.recognition# */ 799 recognition.onsoundend = function(event){ 800 if (loglevel >= 4){ 801 console.debug("[webkitAudioInput.Debug] Sound END"); 802 } 803 }; 804 /** @memberOf WebkitAudioInput.recognition# */ 805 recognition.onstart = function(event){ 806 if (loglevel >= 4){ 807 console.debug("[webkitAudioInput.Debug] asr START"); 808 } 809 }; 810 /** 811 * Side-Effects: 812 * 813 * sets recognition-status to "inactive" 814 * 815 * re-starts recognition if in "recoring" mode OR calls stopped-callback 816 * 817 * @memberOf WebkitAudioInput.recognition# 818 */ 819 recognition.onend = function(event){ 820 active = false; 821 if (loglevel >= 4){ 822 console.debug("[webkitAudioInput.Debug] asr END"); 823 console.debug("[webkitAudioInput.Debug] active: " + active); 824 } 825 826 //NOTE there may be no analysis open, but stopping here (and not e.g. in onaudioen) 827 // will ensure that we _always_ remove analysis, if it is present: 828 _stopAudioAnalysis(); 829 830 // TODO: check if it is alright if we stop restarting the asr when reset_counter is greater than 3 831 // --> this would mean, we can never start the asr again in this instance... bad choice 832 if ((aborted === false) && (recording === true)){ 833 // restart_counter++; 834 recognition.start(); 835 } 836 //FIXME this is a HACK for the stopRecord function ... 837 else if(recognition._stopRecordCallback){ 838 var theCallback = recognition._stopRecordCallback; 839 //this is a "1-time callback" -> remove it... 840 delete recognition._stopRecordCallback; 841 //... and trigger the callback: 842 theCallback.call(recording, event); 843 } 844 }; 845 846 /** 847 * @type function 848 * @memberOf WebkitAudioInput.recognition# 849 */ 850 recognition.onerror = default_error_function; 851 852 853 /** 854 * set maximum number of SpeechRecognitionAlternatives per result. 855 * 856 * TODO make this configurable 857 * 858 * @type Number 859 * @memberOf WebkitAudioInput.recognition# 860 */ 861 recognition.maxAlternatives = 1; 862 863 //invoke the passed-in initializer-callback and export the public functions: 864 callBack ({ 865 /** 866 * @public 867 * @memberOf WebkitAudioInput.prototype 868 * @see mmir.MediaManager#startRecord 869 */ 870 startRecord: function(successCallback, failureCallback, intermediateResults){ 871 872 // TODO: failureCallback parameter 873 var errMsg; 874 if (active == true){ 875 876 errMsg = "[webkitAudioInput.Warn] Voice recognition already running."; 877 878 if(failureCallback){ 879 880 failureCallback(errMsg); 881 882 if (loglevel >= 1){ 883 console.warn(errMsg); 884 } 885 } 886 else { 887 console.warn(errMsg); 888 } 889 return; 890 } 891 892 aborted = false; 893 recording = true; 894 error_counter = 0; 895 896 _prevResult = void(0); 897 898 // flush any old results 899 final_recognition_result = ""; 900 901 // set intermediate_results - for access by stopRecord 902 intermediate_results = intermediateResults; 903 904 // set recognition language 905 var langStr = languageManager.getLanguageConfig(_pluginName); 906 if(!langStr){ 907 //default: 908 langStr = "en-US"; 909 } 910 recognition.lang = langStr; 911 912 // do not stop recognition on silence 913 recognition.continuous = true; 914 915 // get results continuously 916 recognition.interimResults = (loglevel >= 4) ? true : false; 917 918 currentFailureCallback = failureCallback; 919 currentSuccessCallback = successCallback; 920 921 recognition.onerror = default_error_function; 922 923 var self = this; 924 925 // - see https://dvcs.w3.org/hg/speech-api/raw-file/tip/speechapi.html#speechreco-event 926 recognition.onresult = function (event) { 927 var finalResult = ''; 928 929 930 error_counter = 0; 931 932 var evtResults = event.results[event.resultIndex]; 933 if (loglevel >= 4){ 934 // console.debug("[webkitAudioInput.Debug] " + "interim: " + event.results[event.resultIndex][0][EVENT_RESULT_FIELD] + " ("+event.results[event.resultIndex][0].confidence+")"); 935 936 console.debug("[webkitAudioInput.Debug] interim: " + JSON.stringify(event.results)); 937 } 938 939 // if event.results[event.resultIndex].isFinal is true, then there is a pause. 940 if (evtResults.isFinal) { 941 if (loglevel >= 4){ 942 console.debug("[webkitAudioInput.Debug] final"); 943 } 944 945 finalResult = evtResults[0][EVENT_RESULT_FIELD]; 946 947 if (intermediate_results == true){ 948 949 //INTERMEDIATE results mode: only message last ASR to callback: 950 951 // call callback method with result 952 // final_recognition_result += " " + finalResult; 953 final_recognition_result += finalResult; 954 // currentSuccessCallback && currentSuccessCallback(finalResult); 955 currentSuccessCallback && currentSuccessCallback.apply(self, helper_extract_results(evtResults) ); 956 957 } else { 958 959 //FINAL results mode: message collected ASR results to callback: 960 961 // final_recognition_result += " " + finalResult; 962 final_recognition_result += finalResult; 963 964 //audio-input already closed --> this is the last invocation of the callback, so send final result 965 if (recording == false){ 966 currentSuccessCallback && currentSuccessCallback.call(self,final_recognition_result); 967 } 968 } 969 970 } 971 //for intermediate result (only if we have a callback): 972 else if (intermediate_results == true && currentSuccessCallback){ 973 currentSuccessCallback.apply(self, helper_extract_results(evtResults) ); 974 } 975 }; 976 977 // start the recognition 978 try{ 979 980 recognition.start(); 981 982 } catch (exc){ 983 984 errMsg = "[webkitAudioInput.Error] Could not start voice recognition: "+ exc; 985 986 if(failureCallback){ 987 988 failureCallback(errMsg,exc); 989 990 if (loglevel >= 1){ 991 console.error(errMsg, exc); 992 } 993 } 994 else { 995 console.error(errMsg, exc); 996 } 997 } 998 }, 999 /** 1000 * @public 1001 * @memberOf WebkitAudioInput.prototype 1002 * @see mmir.MediaManager#stopRecord 1003 */ 1004 stopRecord: function(successCallback,failureCallback){ 1005 // TODO: at end of recording return whole recognized stuff in successcallback 1006 recording = false; 1007 1008 var isSuccessTriggered = false; 1009 1010 var self = this; 1011 1012 // recognize (recognition.continuous == true) or stopRecord (recognition.continuous == false) 1013 if (recognition.continuous == false){ 1014 1015 recognition.onresult = function (event) { 1016 var finalResult = ''; 1017 1018 if (loglevel >= 4){ 1019 console.debug("[webkitAudioInput.Debug] interim: " + event.results[event.resultIndex][0][EVENT_RESULT_FIELD]); 1020 } 1021 1022 var evtResults = event.results[event.resultIndex]; 1023 // if event.results[event.resultIndex].isFinal is true, then there is a pause. 1024 if (evtResults.isFinal) { 1025 if (loglevel >= 4){ 1026 console.debug("[webkitAudioInput.Debug] final"); 1027 } 1028 1029 finalResult = evtResults[0][EVENT_RESULT_FIELD]; 1030 1031 // is it called for the last time (recording == false) 1032 if (recording == false){ 1033 final_recognition_result += finalResult; 1034 1035 if (intermediate_results == true){ 1036 // currentSuccessCallback && currentSuccessCallback(finalResult); 1037 currentSuccessCallback && currentSuccessCallback.apply(self, helper_extract_results(evtResults) ); 1038 } else { 1039 currentSuccessCallback && currentSuccessCallback.call(self, final_recognition_result); 1040 } 1041 1042 if(successCallback){ 1043 if(isSuccessTriggered){ 1044 console.warn('stopRecord: success callback was already triggered!');//FIXME debug 1045 } 1046 isSuccessTriggered = true; 1047 successCallback.call(self, final_recognition_result); 1048 } 1049 } else { 1050 // final_recognition_result += " " + finalResult; 1051 final_recognition_result += finalResult; 1052 if (intermediate_results == true){ 1053 currentSuccessCallback && currentSuccessCallback.call(self, finalResult); 1054 } 1055 } 1056 1057 } 1058 else { 1059 currentSuccessCallback && currentSuccessCallback.apply(self, helper_extract_results(evtResults) ); 1060 } 1061 }; 1062 } 1063 // TODO: recognition.onstop = function(){successCallback} 1064 1065 //HACK: set an "internal" callback, that will be checked in the onend-listener (see above) 1066 // (NOTE: the onstop()-listener does not seem to get called ...) 1067 recognition._stopRecordCallback = function(evt){ 1068 if(successCallback && !isSuccessTriggered){ 1069 // console.debug('stopRecord: calling success callback onstop (without last ASR result)');//FIXM debug 1070 isSuccessTriggered = true; 1071 successCallback.call(self,'', -1, 'RECORDING_DONE'); 1072 } 1073 }; 1074 1075 try{ 1076 1077 recognition.stop(); 1078 1079 } catch (exc){ 1080 1081 var errMsg = "[webkitAudioInput.Error] Could not stop voice recognition: "+ exc; 1082 1083 if(failureCallback){ 1084 1085 failureCallback(errMsg); 1086 1087 if (loglevel >= 1){ 1088 console.error(errMsg, exc); 1089 } 1090 } 1091 else { 1092 console.error(errMsg, exc); 1093 } 1094 } 1095 }, 1096 1097 1098 /** 1099 * 1100 * <p> 1101 * NOTE: doesn't require interimResult - because it stops after first pause; would make no sense 1102 * 1103 * <p> 1104 * NOTE: no end event, if recognize() is stopped via stopRecord() 1105 * 1106 * @public 1107 * @memberOf WebkitAudioInput.prototype 1108 * @see mmir.MediaManager#recognize 1109 */ 1110 recognize: function(successCallback,failureCallback){ 1111 1112 console.warn("DO NOT USE AT THE MOMENT\nUnexpected behavior: if recognition is stopped (via 'stopRecord()'), the 'end' is not thrown. The recognizer is still active, but not usable."); 1113 1114 var errMsg; 1115 if (active == true){ 1116 1117 errMsg = "[webkitAudioInput.Warn] Voice recognition already running."; 1118 1119 if(failureCallback){ 1120 1121 failureCallback(errMsg); 1122 1123 if (loglevel >= 1){ 1124 console.warn(errMsg); 1125 } 1126 } 1127 else { 1128 console.warn(errMsg); 1129 } 1130 return; 1131 } 1132 1133 aborted = false; 1134 recording = true; 1135 error_counter = 0; 1136 1137 _prevResult = void(0); 1138 1139 // flush any old results 1140 final_recognition_result = ""; 1141 1142 // recognition.lang = "en-US"; 1143 var langStr = languageManager.getLanguageConfig(_pluginName); 1144 if(!langStr){ 1145 //default: 1146 langStr = "en-US"; 1147 } 1148 recognition.lang = langStr; 1149 1150 // stop recognition on silence 1151 recognition.continuous = false; 1152 1153 // not needed for recognize 1154 // // set intermediate_results - for access by stopRecord 1155 recognition.interimResults = (loglevel >= 4) ? true : false; 1156 1157 currentFailureCallback = failureCallback; 1158 currentSuccessCallback = successCallback; 1159 1160 recognition.onerror = default_error_function; 1161 1162 var self = this; 1163 // - see https://dvcs.w3.org/hg/speech-api/raw-file/tip/speechapi.html#speechreco-event 1164 recognition.onresult = function (event) { 1165 // var finalResult = ''; 1166 1167 if (loglevel >= 4){ 1168 console.debug("[webkitAudioInput.Debug] " + "interim: " + event.results[event.resultIndex][0][EVENT_RESULT_FIELD]); 1169 } 1170 1171 // if event.results[event.resultIndex].isFinal is true, then there is a pause. 1172 if (event.results[event.resultIndex].isFinal) { 1173 if (loglevel >= 4){ 1174 console.debug("[webkitAudioInput.Debug] " + "final"); 1175 } 1176 1177 //stop recording - finish after one sentence! 1178 //NOTE do this before calling helper_extract_results(), in order to make the result type FINAL 1179 recording = false; 1180 1181 var returnArgs = helper_extract_results(event.results[event.resultIndex]); 1182 1183 // TODO: dirty hack - somehow it does not throw end event after recognition if recognize is used 1184 self.cancelRecognition(); 1185 currentSuccessCallback && currentSuccessCallback.apply(self, returnArgs);//finalResult); 1186 } 1187 }; 1188 1189 // start the recognition 1190 try{ 1191 1192 recognition.start(); 1193 1194 } catch (exc){ 1195 1196 errMsg = "[webkitAudioInput.Error] Could not start voice recognition: "+ exc; 1197 1198 if(failureCallback){ 1199 1200 failureCallback(errMsg, exc); 1201 1202 if (loglevel >= 1){ 1203 console.error(errMsg, exc); 1204 } 1205 } 1206 else { 1207 console.error(errMsg, exc); 1208 } 1209 } 1210 }, 1211 /** 1212 * @public 1213 * @memberOf WebkitAudioInput.prototype 1214 * @see mmir.MediaManager#cancelRecognition 1215 */ 1216 cancelRecognition: function(successCallback,failureCallback){ 1217 recording = false; 1218 aborted = true; 1219 error_counter = 0; 1220 1221 var self = this; 1222 // callback used if an error occurred - includes abort 1223 // gets event as argument - see https://dvcs.w3.org/hg/speech-api/raw-file/tip/speechapi.html#speechreco-error 1224 // * if aborted - call successCallback 1225 1226 recognition.onerror = function(event){ 1227 if ((event.error == "aborted") && (aborted == true)){ 1228 aborted = false; 1229 recognition.onerror = default_error_function; 1230 successCallback && successCallback.call(self,event.error); 1231 } else { 1232 // currentFailureCallback(event.error); 1233 default_error_function.call(self,event.error); 1234 } 1235 }; 1236 1237 recognition.abort(); 1238 }, 1239 /** 1240 * for debugging - NOTE use with caution, be removed in the future 1241 * @private 1242 * @memberOf WebkitAudioInput.prototype 1243 */ 1244 getLoglevel: function(){ 1245 return loglevel; 1246 }, 1247 /** 1248 * for debugging - NOTE use with caution, be removed in the future 1249 * @default 0: set loglevel to 0 1250 * @private 1251 * @memberOf WebkitAudioInput.prototype 1252 */ 1253 setLoglevel: function(logvalue){ 1254 loglevel = logvalue | 0; 1255 return loglevel; 1256 } 1257 }); 1258 1259 1260 } 1261 1262 }; 1263