Source: env/media/webspeechAudioInput.js

  1. define(['mmirf/mediaManager', 'mmirf/configurationManager', 'mmirf/languageManager', 'mmirf/logger'], function(mediaManager, config, lang, Logger){
  2. /**
  3. * @class WebspeechAudioInput
  4. * @memberOf mmir.env.media
  5. * @hideconstructor
  6. */
  7. return {
  8. /** @memberOf mmir.env.media.WebspeechAudioInput# */
  9. initialize: function(callBack, __mediaManager, ctxId, moduleConfig){
  10. /** @memberOf mmir.env.media.WebspeechAudioInput#
  11. * @readonly
  12. * @protected
  13. * @default "webspeechAudioInput"
  14. */
  15. var _pluginName = 'webspeechAudioInput';
  16. /**
  17. * @type mmir.tools.Logger
  18. * @memberOf mmir.env.media.WebspeechAudioInput#
  19. * @private
  20. */
  21. var logger = Logger.create(_pluginName);
  22. /** @type SpeechRecognition
  23. * @private
  24. * @memberOf mmir.env.media.WebspeechAudioInput# */
  25. var SpeechRecognitionImpl;
  26. //detect feature avaibility:
  27. if(typeof webkitSpeechRecognition !== 'undefined'){
  28. SpeechRecognitionImpl = webkitSpeechRecognition;
  29. } else if(typeof SpeechRecognition !== 'undefined'){
  30. SpeechRecognitionImpl = SpeechRecognition;
  31. }
  32. if(!SpeechRecognitionImpl){
  33. //... browser does NOT support this speech-input-module: create warning message and dummy functions for the MediaManager
  34. logger.error('Could not load '+_pluginName+' plugin: API SpeechRecognition is not available!');
  35. //FIXME this error message is a quick an dirty hack -- there should be a more general way for defining the error message...
  36. var msg = 'Unfortunately, your internet browser'
  37. +'\ndoes not support Web Speech Recognition.'
  38. +'\n\nPlease use Google Chrome,'
  39. +'\nif you want to use speech input.'
  40. +'\n\nhttps://www.google.com/chrome';
  41. //invoke the passed-in initializer-callback and export the public functions:
  42. callBack(
  43. /** @class mmir.env.media.WebspeechAudioInput.DisabledWebspeechAudioInput */
  44. {
  45. /**
  46. * create & show error dialog
  47. * @private
  48. * @memberOf mmir.env.media.WebspeechAudioInput.DisabledWebspeechAudioInput.prototype
  49. */
  50. __triggerError: function(options, successCallback, failureCallback){
  51. if(typeof options === 'function'){
  52. failureCallback = successCallback;
  53. successCallback = options;
  54. options = void(0);
  55. }
  56. if(options){
  57. successCallback = successCallback? successCallback : options.success;
  58. failureCallback = failureCallback? failureCallback : options.error;
  59. }
  60. alert(msg);
  61. if(failureCallback)
  62. failureCallback(msg);
  63. }
  64. /**
  65. * will show error dialog
  66. * @public
  67. * @memberOf mmir.env.media.WebspeechAudioInput.DisabledWebspeechAudioInput.prototype
  68. * @see mmir.MediaManager#startRecord
  69. */
  70. , startRecord: function(options, successCallback, failureCallback){
  71. this.__triggerError(options, successCallback, failureCallback);
  72. }
  73. /**
  74. * will show error dialog
  75. * @public
  76. * @memberOf mmir.env.media.WebspeechAudioInput.DisabledWebspeechAudioInput.prototype
  77. * @see mmir.MediaManager#startRecord
  78. */
  79. , stopRecord: function(options, successCallback,failureCallback){
  80. this.__triggerError(options, successCallback, failureCallback);
  81. }
  82. /**
  83. * will show error dialog
  84. * @public
  85. * @memberOf mmir.env.media.WebspeechAudioInput.DisabledWebspeechAudioInput.prototype
  86. * @see mmir.MediaManager#startRecord
  87. */
  88. , recognize: function(options, successCallback,failureCallback){
  89. this.__triggerError(options, successCallback, failureCallback);
  90. }
  91. /**
  92. * will show error dialog
  93. * @public
  94. * @memberOf mmir.env.media.WebspeechAudioInput.DisabledWebspeechAudioInput.prototype
  95. * @see mmir.MediaManager#startRecord
  96. */
  97. , cancelRecognition: function(successCallback,failureCallback){
  98. this.__triggerError(successCallback, failureCallback);
  99. }
  100. });
  101. return;////////////////////// EARLY EXIT ///////////////////////////
  102. }
  103. /**
  104. * @constant
  105. * @private
  106. * @memberOf mmir.env.media.WebspeechAudioInput# */
  107. var EVENT_RESULT_FIELD = "transcript";
  108. /**
  109. * @constant
  110. * @private
  111. * @memberOf mmir.env.media.WebspeechAudioInput# */
  112. var EVENT_SCORE_FIELD = "confidence";
  113. /**
  114. * @constant
  115. * @private
  116. * @memberOf mmir.env.media.WebspeechAudioInput# */
  117. var UNSTABLE_LIMIT = 0.01;
  118. /** @memberOf mmir.env.media.WebspeechAudioInput#
  119. * @private */
  120. var DEFAULT_LANGUAGE = 'en-US';
  121. /** @memberOf mmir.env.media.WebspeechAudioInput#
  122. * @private */
  123. var DEFAULT_ALTERNATIVE_RESULTS = 1;
  124. /**
  125. * Result types (returned by the native/Cordova plugin)
  126. *
  127. * @type Enum
  128. * @constant
  129. * @private
  130. * @memberOf mmir.env.media.WebspeechAudioInput#
  131. */
  132. var RESULT_TYPES = {
  133. "FINAL": "FINAL",
  134. "INTERIM": "INTERIM",
  135. "INTERMEDIATE": "INTERMEDIATE",
  136. "RECOGNITION_ERROR": "RECOGNITION_ERROR",
  137. "RECORDING_BEGIN": "RECORDING_BEGIN",
  138. "RECORDING_DONE": "RECORDING_DONE"
  139. };
  140. /** @memberOf mmir.env.media.WebspeechAudioInput#
  141. * @private */
  142. var micLevelsImplFile = 'webMicLevels';
  143. /** @type SpeechRecognition
  144. * @private
  145. * @memberOf mmir.env.media.WebspeechAudioInput# */
  146. var recognition = new SpeechRecognitionImpl();
  147. /** @type Function
  148. * @private
  149. * @memberOf mmir.env.media.WebspeechAudioInput# */
  150. var currentSuccessCallback;
  151. /** @type Function
  152. * @private
  153. * @memberOf mmir.env.media.WebspeechAudioInput# */
  154. var currentFailureCallback;
  155. /** @memberOf mmir.env.media.WebspeechAudioInput#
  156. * @private */
  157. var final_recognition_result = "";
  158. /** @type Function
  159. * @private
  160. * @memberOf mmir.env.media.WebspeechAudioInput# */
  161. var default_error_function;
  162. /** @type Function
  163. * @private
  164. * @memberOf mmir.env.media.WebspeechAudioInput# */
  165. var helper_error_handler;
  166. // flags
  167. /** @memberOf mmir.env.media.WebspeechAudioInput#
  168. * @private */
  169. var recording = false;
  170. /** @memberOf mmir.env.media.WebspeechAudioInput#
  171. * @private */
  172. var active = false;
  173. /** @memberOf mmir.env.media.WebspeechAudioInput#
  174. * @private */
  175. var aborted = false;
  176. // var restart_counter = 0;
  177. /** @memberOf mmir.env.media.WebspeechAudioInput#
  178. * @private */
  179. var intermediate_results = false;
  180. var loglevel = config.get([_pluginName, 'logLevel']);
  181. if(typeof loglevel !== 'undefined'){
  182. logger.setLevel(loglevel);
  183. }
  184. /**
  185. * field for storing the previous (main) recontion result
  186. * (this is used for calculating "unstable" parts, see {@link #helper_extract_results})
  187. * @type String
  188. * @private
  189. * @memberOf mmir.env.media.WebspeechAudioInput#
  190. */
  191. var _prevResult;
  192. /**
  193. * create callback-arguments for ASR-result callback:
  194. *
  195. * @returns Array with
  196. * <pre>
  197. * [ String result,
  198. * Number score,
  199. * String type ["INTERIM" | "FINAL" ],
  200. * Array<Results> alternatives, //OPTIONAL
  201. * String unstable //OPTIONAL
  202. * ]
  203. * </pre>
  204. *
  205. * @memberOf mmir.env.media.WebspeechAudioInput#
  206. * @private
  207. * @function
  208. */
  209. var helper_extract_results = function(eventResultsObject){
  210. var res = [];
  211. var size = eventResultsObject.length;
  212. if(size < 1){
  213. return res;
  214. }
  215. //ASSERT size >= 1
  216. var result = eventResultsObject[0][EVENT_RESULT_FIELD];
  217. // [0]: main result
  218. res.push(result);
  219. // [1]: main confidence score
  220. res.push(eventResultsObject[0][EVENT_SCORE_FIELD]);
  221. // [2]: result type
  222. if(eventResultsObject.isFinal){
  223. res.push(recording? RESULT_TYPES.INTERMEDIATE : RESULT_TYPES.FINAL);
  224. }
  225. else {
  226. res.push(RESULT_TYPES.INTERIM);
  227. }
  228. // [3]: array with alternative results
  229. if(size > 1){
  230. var altRes = [];
  231. for(var i=1; i < size; ++i){
  232. altRes.push({
  233. result: eventResultsObject[i][EVENT_RESULT_FIELD],
  234. score: eventResultsObject[i][EVENT_SCORE_FIELD]
  235. });
  236. }
  237. res.push(altRes);
  238. }
  239. else {
  240. //if no alternative results: add undefined-entry:
  241. res.push(void(0));
  242. }
  243. // [4]: UNSTABLE part for main result
  244. //NOTE "unstable" part of ASR result is not "natively" supported by webkitSpeechInput...
  245. //HACK: detect unstable for non-final results:
  246. // * set to unstable if confidence is lower than UNSTABLE_LIMIT
  247. // * otherwise (ie. result is basically STABLE), try
  248. // to detect an UNSTABLE part using the previous result
  249. // (if previous result contained more than the current stable one...)
  250. if( ! eventResultsObject.isFinal){
  251. //set to unstable, if result has a LOW score
  252. if(res[1] <= UNSTABLE_LIMIT){
  253. //add result as "unstable":
  254. res.push(result);
  255. //set main-result to empty
  256. res[0] = "";
  257. }
  258. //try to recover unstable part:
  259. else if(res[1] > UNSTABLE_LIMIT && _prevResult && _prevResult.length > length){
  260. //try to detect stable part: detect matching prefix with previous result
  261. var prefixIndex = 0;
  262. var size = result.length;
  263. var ch = result.charAt(prefixIndex).toLowerCase();
  264. while(size > prefixIndex && ch === _prevResult.charAt(prefixIndex).toLowerCase()){
  265. ch = result.charAt(++prefixIndex).toLowerCase();
  266. }
  267. //-> use REST from matching prefix as UNSTABLE text
  268. //NOTE: use simplification (i.e. simpler code) ignore matches <= 1, ie. prefixIndex > 0
  269. if(prefixIndex > 0 && prefixIndex + 1 < _prevResult.length){
  270. //add REST to detected PREFIX as "unstable":
  271. res.push(_prevResult.substring(prefixIndex+1));
  272. if(logger.isi()) logger.info('found unstable ASR part: "'+_prevResult.substring(prefixIndex+1)+'"');
  273. }
  274. else {
  275. // -> we have relatively stable result, that has no unstable postfix -> reset _prevResult;
  276. _prevResult = void(0);
  277. }
  278. }
  279. //remember current (main) result STRING, if it "adds information":
  280. if(!_prevResult || result.length >= _prevResult.length){
  281. _prevResult = result;
  282. }
  283. }
  284. else {
  285. //if FINAL, reset field for previous-result
  286. _prevResult = void(0);
  287. }
  288. return res;
  289. };
  290. /**
  291. * Counter for error-in-a-row:
  292. * each time an error is encountered, this counter is increased.
  293. * On starting/canceling, or on an internal success/result callback,
  294. * the counter is reset.
  295. *
  296. * Thus, this counter keeps track how many times in a row
  297. * the (internal) error-callback was triggered.
  298. *
  299. * NOTE: this is currently used, to try restarting <code>max_error_retry</code>
  300. * times the ASR, even on "critical" errors (during repeat-mode).
  301. *
  302. * @see #max_error_retry
  303. *
  304. * @memberOf AndroidAudioInput#
  305. * @private
  306. */
  307. var error_counter = 0;
  308. /**
  309. * Maximal number of errors-in-a-row for trying to restart
  310. * recognition in repeat-mode.
  311. *
  312. * @see #error_counter
  313. *
  314. * @memberOf mmir.env.media.WebspeechAudioInput#
  315. * @default 5
  316. * @private
  317. */
  318. var max_error_retry = 5;
  319. /**
  320. * default helper for error-events:
  321. *
  322. * determines, if RESTART is allowed/possible (in case of RECORDing mode),
  323. * AND otherwise triggers the current failure-callbacks.
  324. *
  325. * SIDE-EFFECTS: sets private field aborted:=true if RESTART is NOT possible.
  326. *
  327. * @returns {Boolean} true, if the function could process the error
  328. * (i.e. return false for unknown errors; these should be handled by
  329. * the invoking code of this helper function)
  330. *
  331. * @memberOf mmir.env.media.WebspeechAudioInput#
  332. * @private
  333. * @function helper_error_handler
  334. */
  335. helper_error_handler = function(event) {
  336. var type = event.error;
  337. switch(type){
  338. case "no-speech":
  339. if (logger.isi()) logger.info("event " + type);
  340. // no errorcallback, just restart (if in RECORD mode)...
  341. return true;
  342. ////////////////
  343. // "serious" errors: cannot not automatically restart...
  344. // Audio capture failed.
  345. case "audio-capture":
  346. // if analysing-audio for microphone levels (via getUserMedia)
  347. // is enabled, the error may have been caused by the browser/device
  348. // due to the fact, that it does not allow multiple/parallel access
  349. // to the microphone resource...
  350. // -> try once again, but with disabled analysing-audio feature:
  351. if(mediaManager.micLevelsAnalysis.enabled()){
  352. mediaManager.micLevelsAnalysis.enabled(false);
  353. return true;
  354. }
  355. // ...otherwise: do not restart!
  356. // Some network communication that was required to complete the recognition failed.
  357. case "network":
  358. // do not restart!
  359. //for "serious errors": if errors-in-a-row-counter is under the limit, DO try to restart
  360. if(error_counter < max_error_retry){
  361. return true;
  362. }
  363. // Speech input was aborted somehow, maybe by some user-agent-specific behavior such as UI that lets the user cancel speech input.
  364. case "aborted":
  365. // do not restart!
  366. // The user agent is not allowing any speech input to occur for reasons of security, privacy or user preference.
  367. case "not-allowed":
  368. // user denied access -> do not automatically restart!
  369. // The user agent is not allowing the web application requested speech service, but would allow some speech service, to be used either because the user agent doesn't support the selected one or because of reasons of security, privacy or user preference.
  370. case "service-not-allowed":
  371. // user agent denied access -> do not automatically restart!
  372. // There was an error in the speech recognition grammar or semantic tags, or the grammar format or semantic tag format is unsupported.
  373. case "bad-grammar":
  374. // do not automatically restart!
  375. // The language was not supported.
  376. case "language-not-supported":
  377. // do not automatically restart!, change the language
  378. aborted = true;
  379. if (logger.isw()) logger.warn("event " + type);
  380. currentFailureCallback && currentFailureCallback(event.error);
  381. return true;
  382. default:
  383. //for unknown errors: return false
  384. break;
  385. }//END: switch
  386. return false;
  387. }; //END: helper_error_handler(event){...
  388. /** @memberOf mmir.env.media.WebspeechAudioInput#
  389. * @private */
  390. default_error_function = function(event){
  391. ++error_counter;
  392. // if (helper_error_handler.hasOwnProperty(event.error)){
  393. // helper_error_handler[event.error](event);
  394. // } else
  395. if( ! helper_error_handler(event) ){
  396. if (currentFailureCallback){
  397. currentFailureCallback(event.error);
  398. } else {
  399. logger.error("event " + event.error);
  400. }
  401. }
  402. };
  403. // set remaining event-handler functions
  404. /**
  405. * Side-Effects:
  406. *
  407. * sets recognition-status to "active"
  408. *
  409. * starts audio-analysis (if listeners are registered for mic-levels-changed event)
  410. *
  411. * @memberOf mmir.env.media.WebspeechAudioInput.recognition#
  412. * @private
  413. */
  414. recognition.onaudiostart = function(event){
  415. active = true;
  416. // if audio can start, then we have been successful in starting the voice recognition
  417. // so: reset counter
  418. // TODO: check if this is really correct
  419. // restart_counter=0;
  420. if (logger.isd()){
  421. logger.debug("Audio START");
  422. logger.debug("active: " + active);
  423. }
  424. mediaManager.micLevelsAnalysis.start();
  425. };
  426. /** @memberOf mmir.env.media.WebspeechAudioInput.recognition#
  427. * @private */
  428. recognition.onspeechstart = function(event){
  429. if (logger.isd()) logger.debug("Speech START");
  430. };
  431. /** @memberOf mmir.env.media.WebspeechAudioInput.recognition#
  432. * @private */
  433. recognition.onsoundstart = function(event){
  434. if (logger.isd()) logger.debug("Sound START");
  435. };
  436. /** @memberOf mmir.env.media.WebspeechAudioInput.recognition#
  437. * @private */
  438. recognition.onaudioend = function(event){
  439. active = false;
  440. if (logger.isd()) logger.debug("Audio END");
  441. // mediaManager.micLevelsAnalysis.stop();// MOVED to onend: in some cases, onaudioend will not be triggered, but onend will always get triggered
  442. };
  443. /** @memberOf mmir.env.media.WebspeechAudioInput.recognition#
  444. * @private */
  445. recognition.onspeechend = function(event){
  446. if (logger.isd()) logger.debug("Speech END");
  447. };
  448. /** @memberOf mmir.env.media.WebspeechAudioInput.recognition#
  449. * @private */
  450. recognition.onsoundend = function(event){
  451. if (logger.isd()) logger.debug("Sound END");
  452. };
  453. /** @memberOf mmir.env.media.WebspeechAudioInput.recognition#
  454. * @private */
  455. recognition.onstart = function(event){
  456. if (logger.isd()) logger.debug("Recognition START");
  457. };
  458. /**
  459. * Side-Effects:
  460. *
  461. * sets recognition-status to "inactive"
  462. *
  463. * re-starts recognition if in "recoring" mode OR calls stopped-callback
  464. *
  465. * @memberOf mmir.env.media.WebspeechAudioInput.recognition#
  466. * @private
  467. */
  468. recognition.onend = function(event){
  469. active = false;
  470. if (logger.isd()) logger.debug("Recognition END (active: "+active+")");
  471. //NOTE there may be no analysis open, but stopping here (and not e.g. in onaudioen)
  472. // will ensure that we _always_ remove analysis, if it is present:
  473. mediaManager.micLevelsAnalysis.stop();
  474. // TODO: check if it is all right, if we stop restarting the asr when reset_counter is greater than 3
  475. // --> this would mean, we can never start the asr again in this instance... bad choice
  476. if ((aborted === false) && (recording === true)){
  477. // restart_counter++;
  478. recognition.start();
  479. }
  480. //FIXME this is a HACK for the stopRecord function ...
  481. else if(recognition._stopRecordCallback){
  482. var theCallback = recognition._stopRecordCallback;
  483. //this is a "1-time callback" -> remove it...
  484. delete recognition._stopRecordCallback;
  485. //... and trigger the callback:
  486. theCallback.call(recording, event);
  487. }
  488. };
  489. /**
  490. * @type function
  491. * @memberOf mmir.env.media.WebspeechAudioInput.recognition#
  492. * @private
  493. */
  494. recognition.onerror = default_error_function;
  495. /**
  496. * set maximum number of SpeechRecognitionAlternatives per result.
  497. *
  498. * @type Number
  499. * @memberOf mmir.env.media.WebspeechAudioInput.recognition#
  500. * @private
  501. */
  502. recognition.maxAlternatives = DEFAULT_ALTERNATIVE_RESULTS;
  503. /** @type webspeechAudioInput
  504. * @private
  505. * @memberOf mmir.env.media.WebspeechAudioInput# */
  506. var pluginExports = {
  507. /**
  508. * Start speech recognition (without <em>end-of-speech</em> detection):
  509. * after starting, the recognition continues until {@link #stopRecord} is called.
  510. *
  511. * @async
  512. *
  513. * @param {PlainObject} [options] OPTIONAL
  514. * options for Automatic Speech Recognition:
  515. * <pre>{
  516. * success: OPTIONAL Function, the status-callback (see arg statusCallback)
  517. * , error: OPTIONAL Function, the error callback (see arg failureCallback)
  518. * , language: OPTIONAL String, the language for recognition (if omitted, the current language setting is used)
  519. * , intermediate: OTPIONAL Boolean, set true for receiving intermediate results (NOTE not all ASR engines may support intermediate results)
  520. * , results: OTPIONAL Number, set how many recognition alternatives should be returned at most (NOTE not all ASR engines may support this option)
  521. * , mode: OTPIONAL "search" | "dictation", set how many recognition alternatives should be returned at most (NOTE not all ASR engines may support this option)
  522. * , eosPause: OTPIONAL "short" | "long", length of pause after speech for end-of-speech detection (NOTE not all ASR engines may support this option)
  523. * , disableImprovedFeedback: OTPIONAL Boolean, disable improved feedback when using intermediate results (NOTE not all ASR engines may support this option)
  524. * }</pre>
  525. *
  526. * @param {Function} [statusCallback] OPTIONAL
  527. * callback function that is triggered when, recognition starts, text results become available, and recognition ends.
  528. * The callback signature is:
  529. * <pre>
  530. * callback(
  531. * text: String | "",
  532. * confidence: Number | Void,
  533. * status: "FINAL"|"INTERIM"|"INTERMEDIATE"|"RECORDING_BEGIN"|"RECORDING_DONE",
  534. * alternatives: Array<{result: String, score: Number}> | Void,
  535. * unstable: String | Void
  536. * )
  537. * </pre>
  538. *
  539. * Usually, for status <code>"FINAL" | "INTERIM" | "INTERMEDIATE"</code> text results are returned, where
  540. * <pre>
  541. * "INTERIM": an interim result, that might still change
  542. * "INTERMEDIATE": a stable, intermediate result
  543. * "FINAL": a (stable) final result, before the recognition stops
  544. * </pre>
  545. * If present, the <code>unstable</code> argument provides a preview for the currently processed / recognized text.
  546. *
  547. * <br>NOTE that when using <code>intermediate</code> mode, status-calls with <code>"INTERMEDIATE"</code> may
  548. * contain "final intermediate" results, too.
  549. *
  550. * <br>NOTE: if used in combination with <code>options.success</code>, this argument will supersede the options
  551. *
  552. * @param {Function} [failureCallback] OPTIONAL
  553. * callback function that is triggered when an error occurred.
  554. * The callback signature is:
  555. * <code>callback(error)</code>
  556. *
  557. * <br>NOTE: if used in combination with <code>options.error</code>, this argument will supersede the options
  558. *
  559. * @memberOf mmir.env.media.WebspeechAudioInput.prototype
  560. * @see mmir.MediaManager#startRecord
  561. */
  562. startRecord: function(options, statusCallback, failureCallback, intermediateResults){//argument intermediateResults is deprecated (use options.intermediate instead)
  563. if(typeof options === 'function'){
  564. intermediateResults = failureCallback;
  565. failureCallback = statusCallback;
  566. statusCallback = options;
  567. options = void(0);
  568. }
  569. if(!options){
  570. options = {};
  571. }
  572. options.success = statusCallback? statusCallback : options.success;
  573. options.error = failureCallback? failureCallback : options.error;
  574. options.intermediate = typeof intermediateResults === 'boolean'? intermediateResults : !!options.intermediate;
  575. options.language = options.language? options.language : lang.getLanguageConfig(_pluginName) || DEFAULT_LANGUAGE;
  576. options.results = options.results? options.results : DEFAULT_ALTERNATIVE_RESULTS;
  577. //TODO
  578. // options.disableImprovedFeedback =
  579. // options.mode =
  580. // options.eosPause =
  581. var errMsg;
  582. if (active == true){
  583. errMsg = "Voice recognition already running.";
  584. if(options.error){
  585. options.error('[WARN] '+_pluginName+': '+errMsg);
  586. } else {
  587. logger.warn(errMsg);
  588. }
  589. return;////////////////////////// EARLY EXIT //////////////////////////
  590. }
  591. aborted = false;
  592. recording = mediaManager.micLevelsAnalysis.active(true);
  593. error_counter = 0;
  594. _prevResult = void(0);
  595. // flush any old results
  596. final_recognition_result = "";
  597. // set recognition language
  598. recognition.lang = options.language;
  599. // set max. alternative results:
  600. recognition.maxAlternatives = options.results;
  601. // do not stop recognition on silence
  602. recognition.continuous = true;
  603. // set intermediate_results - for access by stopRecord
  604. intermediate_results = !!options.intermediate;
  605. // get results continuously
  606. recognition.interimResults = intermediate_results;
  607. currentFailureCallback = options.error;
  608. currentSuccessCallback = options.success;
  609. recognition.onerror = default_error_function;
  610. var self = this;
  611. // - see https://dvcs.w3.org/hg/speech-api/raw-file/tip/speechapi.html#speechreco-event
  612. recognition.onresult = function (event) {
  613. var finalResult = '';
  614. error_counter = 0;
  615. var evtResults = event.results[event.resultIndex];
  616. if (logger.isd()){
  617. // logger.debug("interim: " + event.results[event.resultIndex][0][EVENT_RESULT_FIELD] + " ("+event.results[event.resultIndex][0].confidence+")");
  618. logger.debug("interim: " + JSON.stringify(event.results));
  619. }
  620. // if event.results[event.resultIndex].isFinal is true, then there is a pause.
  621. if (evtResults.isFinal) {
  622. if (logger.isd())logger.debug("final result");
  623. finalResult = evtResults[0][EVENT_RESULT_FIELD];
  624. if (intermediate_results == true){
  625. //INTERMEDIATE results mode: only post last ASR to callback:
  626. // call callback method with result
  627. // final_recognition_result += " " + finalResult;
  628. final_recognition_result += finalResult;
  629. currentSuccessCallback && currentSuccessCallback.apply(self, helper_extract_results(evtResults) );
  630. } else {
  631. //FINAL results mode: message collected ASR results to callback:
  632. // final_recognition_result += " " + finalResult;
  633. final_recognition_result += finalResult;
  634. //audio-input already closed --> this is the last invocation of the callback, so send final result
  635. if (recording == false){
  636. currentSuccessCallback && currentSuccessCallback.call(self,final_recognition_result);
  637. }
  638. }
  639. }
  640. //for intermediate result (only if we have a callback):
  641. else if (intermediate_results == true && currentSuccessCallback){
  642. currentSuccessCallback.apply(self, helper_extract_results(evtResults) );
  643. }
  644. };
  645. // start the recognition
  646. try{
  647. recognition.start();
  648. } catch (exc){
  649. errMsg = "Could not start voice recognition: ";
  650. if(options.error){
  651. options.error('[ERROR] '+_pluginName+': ' + errMsg + (exc && exc.stack? exc.stack : exc));
  652. } else {
  653. logger.error(errMsg + exc, exc);
  654. }
  655. }
  656. },
  657. /**
  658. * @public
  659. * @memberOf mmir.env.media.WebspeechAudioInput.prototype
  660. * @see mmir.MediaManager#stopRecord
  661. */
  662. stopRecord: function(options, statusCallback, failureCallback){
  663. if(typeof options === 'function'){
  664. failureCallback = statusCallback;
  665. statusCallback = options;
  666. options = void(0);
  667. }
  668. if(options){
  669. statusCallback = statusCallback? statusCallback : options.success;
  670. failureCallback = failureCallback? failureCallback : options.error;
  671. }
  672. recording = mediaManager.micLevelsAnalysis.active(false);
  673. var isSuccessTriggered = false;
  674. var self = this;
  675. // recognize (recognition.continuous == true) or stopRecord (recognition.continuous == false)
  676. if (recognition.continuous == false){
  677. recognition.onresult = function (event) {
  678. var finalResult = '';
  679. if (logger.isd()) logger.debug("interim: " + event.results[event.resultIndex][0][EVENT_RESULT_FIELD]);
  680. var evtResults = event.results[event.resultIndex];
  681. // if event.results[event.resultIndex].isFinal is true, then there is a pause.
  682. if (evtResults.isFinal) {
  683. if (logger.isd()) logger.debug("final result");
  684. finalResult = evtResults[0][EVENT_RESULT_FIELD];
  685. // is it called for the last time (recording == false)
  686. if (recording == false){
  687. final_recognition_result += finalResult;
  688. if (intermediate_results == true){
  689. // currentSuccessCallback && currentSuccessCallback(finalResult);
  690. currentSuccessCallback && currentSuccessCallback.apply(self, helper_extract_results(evtResults) );
  691. } else {
  692. currentSuccessCallback && currentSuccessCallback.call(self, final_recognition_result);
  693. }
  694. if(statusCallback){
  695. if(isSuccessTriggered){
  696. logger.info('stopRecord: success callback was already triggered!');
  697. }
  698. isSuccessTriggered = true;
  699. statusCallback.call(self, final_recognition_result);
  700. }
  701. } else {
  702. // final_recognition_result += " " + finalResult;
  703. final_recognition_result += finalResult;
  704. if (intermediate_results == true){
  705. currentSuccessCallback && currentSuccessCallback.call(self, finalResult);
  706. }
  707. }
  708. }
  709. else {
  710. currentSuccessCallback && currentSuccessCallback.apply(self, helper_extract_results(evtResults) );
  711. }
  712. };
  713. }
  714. // TODO: recognition.onstop = function(){statusCallback}
  715. //HACK: set an "internal" callback, that will be checked in the onend-listener (see above)
  716. // (NOTE: the onstop()-listener does not seem to get called ...)
  717. recognition._stopRecordCallback = function(evt){
  718. if(statusCallback && !isSuccessTriggered){
  719. // logger.debug('stopRecord: calling success callback onstop (without last ASR result)');//FIXM debug
  720. isSuccessTriggered = true;
  721. statusCallback.call(self,'', -1, 'RECORDING_DONE');
  722. }
  723. };
  724. try{
  725. recognition.stop();
  726. } catch (exc){
  727. var errMsg = "Could not stop voice recognition: "+ exc;
  728. if(failureCallback){
  729. failureCallback(errMsg);
  730. logger.error(errMsg, exc);
  731. }
  732. else {
  733. logger.error(errMsg, exc);
  734. }
  735. }
  736. },
  737. /**
  738. * Start speech recognition with <em>end-of-speech</em> detection:
  739. *
  740. * the recognizer automatically tries to detect when speech has finished and
  741. * triggers the status-callback accordingly with results.
  742. *
  743. * <p>
  744. * NOTE: no end event, if recognize() is stopped via stopRecord()
  745. *
  746. * @public
  747. * @memberOf mmir.env.media.WebspeechAudioInput.prototype
  748. * @see mmir.MediaManager#recognize
  749. * @see #startRecord
  750. */
  751. recognize: function(options, statusCallback, failureCallback, intermediateResults){//argument intermediateResults is deprecated (use options.intermediate instead)
  752. if(typeof options === 'function'){
  753. intermediateResults = failureCallback;
  754. failureCallback = statusCallback;
  755. statusCallback = options;
  756. options = void(0);
  757. }
  758. if(!options){
  759. options = {};
  760. }
  761. options.success = statusCallback? statusCallback : options.success;
  762. options.error = failureCallback? failureCallback : options.error;
  763. options.intermediate = typeof intermediateResults === 'boolean'? intermediateResults : !!options.intermediate;
  764. options.language = options.language? options.language : lang.getLanguageConfig(_pluginName) || DEFAULT_LANGUAGE;
  765. options.results = options.results? options.results : DEFAULT_ALTERNATIVE_RESULTS;
  766. //TODO
  767. // options.disableImprovedFeedback =
  768. // options.mode =
  769. // options.eosPause =
  770. var errMsg;
  771. if (active == true){
  772. errMsg = "Voice recognition already running.";
  773. if(failureCallback){
  774. failureCallback('[WARN] '+_pluginName+': '+errMsg);
  775. logger.warn(errMsg);
  776. }
  777. else {
  778. logger.warn(errMsg);
  779. }
  780. return;
  781. }
  782. aborted = false;
  783. recording = mediaManager.micLevelsAnalysis.active(true);
  784. error_counter = 0;
  785. _prevResult = void(0);
  786. // flush any old results
  787. final_recognition_result = "";
  788. recognition.lang = options.language;
  789. // set max. alternative results:
  790. recognition.maxAlternatives = options.results;
  791. // stop recognition on silence
  792. recognition.continuous = false;
  793. //set intermediate_results - for access by stopRecord
  794. recognition.interimResults = options.intermediate;
  795. currentFailureCallback = options.error;
  796. currentSuccessCallback = options.success;
  797. recognition.onerror = default_error_function;
  798. var self = this;
  799. // - see https://dvcs.w3.org/hg/speech-api/raw-file/tip/speechapi.html#speechreco-event
  800. recognition.onresult = function (event) {
  801. // var finalResult = '';
  802. if (logger.isd()) logger.debug("interim: " + event.results[event.resultIndex][0][EVENT_RESULT_FIELD]);
  803. // if event.results[event.resultIndex].isFinal is true, then there is a pause.
  804. if (event.results[event.resultIndex].isFinal) {
  805. if (logger.isd()) logger.debug("final result");
  806. //stop recording - finish after one sentence!
  807. //NOTE do this before calling helper_extract_results(), in order to make the result type FINAL
  808. recording = mediaManager.micLevelsAnalysis.active(false);
  809. var returnArgs = helper_extract_results(event.results[event.resultIndex]);
  810. // TODO: dirty hack - somehow it does not throw end event after recognition if recognize is used
  811. self.cancelRecognition();
  812. currentSuccessCallback && currentSuccessCallback.apply(self, returnArgs);//finalResult);
  813. }
  814. };
  815. // start the recognition
  816. try{
  817. recognition.start();
  818. } catch (exc){
  819. errMsg = "Could not start voice recognition: ";
  820. if(options.error){
  821. options.error('[ERROR] '+_pluginName+': ' + errMsg + (exc && exc.stack? exc.stack : exc));
  822. } else {
  823. logger.error(errMsg + exc, exc);
  824. }
  825. }
  826. },
  827. /**
  828. * @public
  829. * @memberOf mmir.env.media.WebspeechAudioInput.prototype
  830. * @see mmir.MediaManager#cancelRecognition
  831. */
  832. cancelRecognition: function(successCallback, failureCallback){
  833. recording = mediaManager.micLevelsAnalysis.active(false);
  834. aborted = true;
  835. error_counter = 0;
  836. currentFailureCallback = failureCallback;
  837. var self = this;
  838. // callback used if an error occurred - includes abort
  839. // gets event as argument - see https://dvcs.w3.org/hg/speech-api/raw-file/tip/speechapi.html#speechreco-error
  840. // * if aborted - call successCallback
  841. recognition.onerror = function(event){
  842. if ((event.error == "aborted") && (aborted == true)){
  843. aborted = false;
  844. recognition.onerror = default_error_function;
  845. successCallback && successCallback.call(self,event.error);
  846. } else {
  847. // currentFailureCallback(event.error);
  848. default_error_function.call(self,event.error);
  849. }
  850. };
  851. recognition.abort();
  852. },
  853. /**
  854. * for debugging - NOTE use with caution, be removed in the future
  855. * @private
  856. * @see see Logger#getLevel
  857. * @memberOf mmir.env.media.WebspeechAudioInput.prototype
  858. */
  859. getLoglevel: function(){
  860. return logger.getLevel();
  861. },
  862. /**
  863. * for debugging - NOTE use with caution, be removed in the future
  864. * @default 'warning' (see {@link mmir.tools.Logger#setLevel})
  865. * @private
  866. * @memberOf mmir.env.media.WebspeechAudioInput.prototype
  867. */
  868. setLoglevel: function(logvalue){
  869. logger.setLevel(logvalue);
  870. return logger.getLevel();
  871. }
  872. };
  873. if(!mediaManager.micLevelsAnalysis){
  874. //load mic-levels-analysis before invoking initializer-callback
  875. mediaManager.micLevelsAnalysis = true;//<- indicate that micLevelsAnalysis will be loaded (in case other plugins want to load it)
  876. //load mic-levels-analysis implementation into mediaManager's default context (i.e. omit 4th argument),
  877. // since mic-levels-analysis should be used as singleton
  878. mediaManager.loadFile(micLevelsImplFile, function success(){
  879. logger.debug('initialized microphone-levels analysis for '+_pluginName);
  880. //invoke the passed-in initializer-callback and export the public functions:
  881. callBack(pluginExports);
  882. }, function error(err){
  883. logger.error('ERROR: using stub implementation for microphone-levels analysis, because loading the implementation file '+implPath+' failed: '+err);
  884. /**
  885. * @class mmir.env.media.WebspeechAudioInput.MicLevelsAnalysisStub
  886. * @extends mmir.env.media.MicLevelsAnalysis
  887. * @hideconstructor
  888. */
  889. mediaManager.micLevelsAnalysis = {
  890. _active: false,
  891. start: function(){
  892. logger.info('STUB::micLevelsAnalysis.start()');
  893. },
  894. stop: function(){
  895. logger.info('STUB::micLevelsAnalysis.stop()');
  896. },
  897. enable: function(enable){
  898. logger.info('STUB::micLevelsAnalysis.enable('+(typeof enable === 'undefined'? '': enable)+') -> false');
  899. return false;
  900. },
  901. active: function(active){
  902. this._active = typeof active === 'undefined'? this._active: active;
  903. logger.info('STUB::micLevelsAnalysis.active('+(typeof active === 'undefined'? '': active)+') -> ' + this._active);
  904. return active;
  905. }
  906. };
  907. //invoke the passed-in initializer-callback and export the public functions:
  908. callBack(pluginExports);
  909. });
  910. }
  911. else {
  912. //micLevelsAnalysis already loaded
  913. // -> immediately invoke initializer-callback and export the public functions:
  914. callBack(pluginExports);
  915. }
  916. }//END: initialize()
  917. };
  918. });//END define