Source: semantic/semanticInterpreter.js

  1. define(['mmirf/resources', 'mmirf/grammarConverter', 'mmirf/logger', 'module', 'require'
  2. ],
  3. /**
  4. * @name SemanticInterpreter
  5. * @memberOf mmir
  6. * @static
  7. * @class
  8. * @hideconstructor
  9. *
  10. * @requires require
  11. */
  12. function (
  13. res, GrammarConverter, Logger, module, require
  14. ){
  15. /**
  16. * The instance for the singleton SemanticInterpreter
  17. *
  18. * @type SemanticInterpreter
  19. * @private
  20. *
  21. * @memberOf mmir.SemanticInterpreter#
  22. */
  23. var instance = null;
  24. /**
  25. * @private
  26. * @type mmir.tools.Logger
  27. * @memberOf mmir.SemanticInterpreter#
  28. */
  29. var logger = Logger.create(module);
  30. /**
  31. * The version number for the format of generated (JavaScript) grammars.
  32. *
  33. * This number is "written into" the generated grammars and then
  34. * used as argument, when the grammar adds itself via
  35. * <code>addGrammar(id, func, versionNumber)</code>.
  36. *
  37. * See generator function build_grammar() within createAndAddGrammar().
  38. *
  39. * NOTE: This version number must be increased, when the way changes, how
  40. * grammars are generated.
  41. * Or more precisely: when previously generated grammars cannot
  42. * be used anymore, after the generation mechanism has been changed.
  43. *
  44. * @constant
  45. * @private
  46. *
  47. * @memberOf mmir.SemanticInterpreter#
  48. */
  49. var GRAMMAR_FILE_FORMAT_VERSION = 7;
  50. /**
  51. * @constructs SemanticInterpreter
  52. * @memberOf mmir.SemanticInterpreter#
  53. * @private
  54. * @ignore
  55. */
  56. function constructor(){
  57. /**
  58. * "map" for grammar implementations (e.g. for different languages)
  59. *
  60. * @private
  61. *
  62. * @memberOf mmir.SemanticInterpreter#
  63. */
  64. var grammarImplMap = {};
  65. /**
  66. * list of IDs for grammar implementations (e.g. for different languages).
  67. *
  68. * This list contains the "keys" of all current entries in <tt>grammarImplMap</tt>.
  69. *
  70. * @private
  71. * @type Array<String>
  72. *
  73. * @memberOf mmir.SemanticInterpreter#
  74. */
  75. var grammarImplList = [];
  76. /**
  77. * id (i.e. the <em>key</em> for map <tt>grammarImplMap</tt>) for currently used
  78. * grammar.
  79. *
  80. * If for invocations of interpret(..) etc. function the ID/languageCode
  81. * argument is missing/omitted, then this id will be used.
  82. *
  83. * NOTE: if not <tt>NULL</tt>, the grammar must be available, either
  84. * as compiled JS file (which must be already loaded, i.e. already present in <tt>grammarImplMap</tt>), or
  85. * as JSON grammar file (which must be available at <tt>/config/languages/[ID]/grammar.json</tt>
  86. *
  87. * @type String
  88. * @private
  89. *
  90. * @memberOf mmir.SemanticInterpreter#
  91. */
  92. var currentGrammarId = null;
  93. /**
  94. * @type String
  95. * @private
  96. * @memberOf mmir.SemanticInterpreter#
  97. */
  98. var currentGrammarEningeId = null;
  99. /**
  100. * If true, the async versions of the grammar engines are loaded,
  101. * i.e. compilation of grammar parsers will be asynchronously done in a WebWorker
  102. *
  103. * @type Boolean
  104. * @private
  105. * @default false
  106. * @memberOf mmir.SemanticInterpreter#
  107. */
  108. var _isAsyncCompileMode = false;
  109. /**
  110. * If true, strict JavaScript mode will be disabled when generating grammars
  111. *
  112. * @type Boolean
  113. * @private
  114. * @default false
  115. * @memberOf mmir.SemanticInterpreter#
  116. */
  117. var _disableStrictMode = false;
  118. /**
  119. * If true, pre-processing input-phrase (before running interpretion) will
  120. * include meta-data for changed positions (due to pre-processing) in input-string
  121. *
  122. * E.g. can be used to map semantic-results (matched tokens/utterances where
  123. * e.g. stopwords would be removed) on the raw input-string.
  124. *
  125. * @type Boolean
  126. * @private
  127. * @default true
  128. * @memberOf mmir.SemanticInterpreter#
  129. */
  130. var _isCalcProcPos = true;
  131. /**
  132. * @type String
  133. * @constant
  134. * @private
  135. * @memberOf mmir.SemanticInterpreter#
  136. */
  137. var DEFAULT_GRAMMAR_ENGINE = 'jscc';
  138. /**
  139. * @type String
  140. * @constant
  141. * @private
  142. * @memberOf mmir.SemanticInterpreter#
  143. */
  144. var GRAMMAR_MODULE_ID_PREFIX = 'mmirf/';
  145. /**
  146. * @type String
  147. * @constant
  148. * @private
  149. * @memberOf mmir.SemanticInterpreter#
  150. */
  151. var GRAMMAR_MODULE_ID_POSTFIX = 'Gen';
  152. /**
  153. * @type String
  154. * @constant
  155. * @private
  156. * @memberOf mmir.SemanticInterpreter#
  157. */
  158. var GRAMMAR_ASYNC_MODULE_MODIFIER = 'Async';
  159. /**
  160. * @private
  161. * @memberOf mmir.SemanticInterpreter#
  162. */
  163. var doSetGrammarEngine = function(id, asyncCompileMode, disableStrictMode){
  164. currentGrammarEningeId = id;
  165. if(typeof asyncCompileMode !== 'undefined'){
  166. _isAsyncCompileMode = !!asyncCompileMode;
  167. }
  168. if(typeof disableStrictMode === 'boolean'){
  169. _disableStrictMode = disableStrictMode;
  170. }
  171. };
  172. /**
  173. * @private
  174. * @memberOf mmir.SemanticInterpreter#
  175. */
  176. var doGetGrammarEngine = function(){
  177. if(currentGrammarEningeId){
  178. return currentGrammarEningeId;
  179. }
  180. return DEFAULT_GRAMMAR_ENGINE;
  181. };
  182. /**
  183. * Flag for enabling/disabling processing of SemanticInterpreter.
  184. *
  185. * If disabled, interpret(), applyPreProcessing() will return <tt>null</tt> values.
  186. *
  187. * NOTE: if no grammar for any language is available, the SemanticInterpreter should be disabled.
  188. *
  189. * Setting a language, automatically enables the the SemanticInterpreter.
  190. *
  191. * @type Boolean
  192. * @private
  193. * @memberOf mmir.SemanticInterpreter#
  194. */
  195. var _isEnabled = false;
  196. /**
  197. * @private
  198. * @memberOf mmir.SemanticInterpreter#
  199. */
  200. var doSetEnabled = function(isEnabled){
  201. _isEnabled = isEnabled;
  202. };
  203. /**
  204. * @private
  205. * @memberOf mmir.SemanticInterpreter#
  206. */
  207. var doCheckIsEnabled = function(){
  208. return _isEnabled;
  209. };
  210. /**
  211. * Add/register grammar for use with {@link #interpret}
  212. *
  213. * NOTE: if no other grammar is available yet, <tt>currentGrammarId</tt> will be set to <tt>id</tt>.
  214. *
  215. * NOTE: if currently disabled, calling this function automatically enables ( setEnabled(TRUE) ),
  216. * the semantic interpreter.
  217. *
  218. * @function
  219. * @param id {String} ID for the grammar (e.g. an ISO-639 language code)
  220. * @param grammarImpl {mmir.grammar.GrammarConverter|Function} the executable JavaScript grammar implementation
  221. * IF {mmir.grammar.GrammarConverter}: the impl. with valid member {Function} {@link mmir.grammar.GrammarConverter.executeGrammar()}
  222. * IF {Function}: the {Function} {@link mmir.grammar.GrammarConverter#executeGrammar()} -
  223. * In this case, if no GrammarConverter instance fo <tt>id</tt> is present, a new one will be created;
  224. * The stopwords must already be set, be part of the options-argument
  225. * (see doc for <code>fileFormatNo</code>), or must additionally be set for the GrammarConverter
  226. * instance (e.g. using {@link mmir.SemanticInterpreter.setStopwords})
  227. * @param {Number|PlainObject} [fileFormatNo] OPTIONAL
  228. * If Number and the number given does not match {@link #GRAMMAR_FILE_FORMAT_VERSION}
  229. * the file format is assumed to be out-dated and an Error will be thrown.
  230. *
  231. * If PlainObject, i.e. an options object, the following properties are evaluated
  232. * (all properties are optional):
  233. * <pre>fileFormat: NUMBER, default: undefined</pre>
  234. * (desc. see above)
  235. * <pre>execMode: 'sync' | 'async', default: 'sync'</pre>
  236. * if 'async' then the grammar is executed asynchronously, i.e. interpret()
  237. * must be invoked with a callback function in order to retrieve the result
  238. * <pre>stopwords: Array<string>, default: null</pre>
  239. * if given, the grammar (GrammarConverter) will be set with this stopword list, i.e. <code>grammar.setStopwords(stopwords)</code>
  240. *
  241. * @throws Error if <code>fileFormatNo</code> is given, but does not match GRAMMAR_FILE_FORMAT_VERSION.
  242. *
  243. * @private
  244. * @memberOf mmir.SemanticInterpreter#
  245. */
  246. var doAddGrammar = function(id, grammarImpl, fileFormatNo){
  247. var execMode = 'sync';
  248. var stopwords = null;
  249. if(fileFormatNo && typeof fileFormatNo === 'object'){
  250. execMode = fileFormatNo.execMode;
  251. stopwords = fileFormatNo.stopwords;
  252. //lastly: overwrite fileFormatNo with the corresponding property:
  253. fileFormatNo = fileFormatNo.fileFormat;
  254. }
  255. //check if the added grammar has correct format
  256. if(fileFormatNo && fileFormatNo != GRAMMAR_FILE_FORMAT_VERSION){
  257. //grammar has old / out-dated format:
  258. throw new Error('Grammar file has wrong format: need grammar file with format version '
  259. +GRAMMAR_FILE_FORMAT_VERSION+', but got: '+fileFormatNo
  260. + '. Please update generated grammar (delete '
  261. + res.getGeneratedGrammarsPath() +' and re-build grammars).'
  262. );
  263. }
  264. //the grammar function must be "wrapped" in a GrammarConverter instance
  265. // ... if not, do so now:
  266. if( ! (grammarImpl instanceof GrammarConverter)){
  267. var gc = doGetGrammar(id, true);
  268. //if for this ID (= language code) no grammar-converter
  269. // exists yet, create a now one
  270. // (otherwise, re-use the existing one)
  271. if(!gc){
  272. gc = new GrammarConverter();
  273. }
  274. gc.setGrammarFunction(grammarImpl, execMode === 'async');
  275. grammarImpl = gc;
  276. }
  277. var isAlreadyPresent = checkHasGrammar(id);
  278. grammarImplMap[id] = grammarImpl;
  279. if( ! isAlreadyPresent){
  280. //DISABLED: this may produce side effects (now: current grammar must be explicitly set using setCurrentGrammar(lang))
  281. // if(grammarImplList.length === 0){
  282. // currentGrammarId = id;
  283. // }
  284. grammarImplList.push(id);
  285. }
  286. if(stopwords){
  287. grammarImpl.setStopWords(stopwords);
  288. }
  289. doSetEnabled(true);
  290. };
  291. /**
  292. * @private
  293. * @memberOf mmir.SemanticInterpreter#
  294. */
  295. var doSetStopwords = function(id, stopwordArray){
  296. doGetGrammar(id).setStopWords(stopwordArray);
  297. };
  298. /**
  299. * HELPER retrieve the executable grammar:
  300. * if already loaded, return the grammar instance, otherwise load & compile.
  301. *
  302. * @param {String} id
  303. * the ID (e.g. language code) for the grammar
  304. * @param {Boolean} [doNotResolve] OPTIONAL
  305. * if <code>false</code> AND the request grammar is not loaded yet,
  306. * then the grammar will NOT be loaded (if omitted or <code>true</code>
  307. * missing grammars will automatically be loaded and compiled)
  308. * @param {Function} [callback] OPTIONAL
  309. * if grammar has to be loaded (and compiled), the provided callback
  310. * will be called, after completion with the corresponding GrammarConverter instance:
  311. * <code>callback(newGrammarConverter)</code>.
  312. *
  313. * @return {GrammarExecFunction}
  314. * the exectuable grammar (i.e. execution function), if the grammar is
  315. * already loaded (if grammar has to loaded and compiled, you need to
  316. * wait for the callback-call and then re-invoke doGetGrammar()).
  317. *
  318. * @private
  319. * @memberOf mmir.SemanticInterpreter#
  320. */
  321. var doGetGrammar = function(id, doNotResolve, callback){//NOTE: this should stay private
  322. if(!id){
  323. if(!currentGrammarId){
  324. throw new Error('Could not retrieve grammar: required grammar ID is missing');
  325. }
  326. else {
  327. id = currentGrammarId;
  328. }
  329. }
  330. //shift arguments, if necessary:
  331. if(!callback && typeof doNotResolve === 'function'){
  332. callback = doNotResolve;
  333. doNotResolve = false;
  334. }
  335. var isDefaultCallback = false;
  336. if(!callback && logger.isInfo()){
  337. //create a "debug-info callback"
  338. isDefaultCallback = true;
  339. callback = function(){
  340. if(logger.isInfo()) logger.info('created executable grammar for "'+id+'" from source '+instance.get_json_grammar_url(id));
  341. };
  342. }
  343. if(!doNotResolve && ! checkHasGrammar(id) ){
  344. //DISABLED: check for executable grammar (that was not loaded yet), before trying to compile json-grammar
  345. // -> this would pull in too many dependencies(?) ...
  346. //
  347. // if(instance.exists_gen_grammar(id)){
  348. //
  349. // require('mmirf/commonUtils').loadCompiledGrammars(res.getGeneratedGrammarsPath(), function(){
  350. //
  351. // if(!isDefaultCallback) callback();
  352. // else if(logger.isInfo()) logger.info('initialized executable grammar for "'+id+'".');
  353. //
  354. // }, require('mmirf/languageManager').getLanguages().filter(function(lang){ return lang !== id}))
  355. //
  356. // } else {
  357. //
  358. // var jsonGrammarUrl = instance.get_json_grammar_url(id);
  359. // createAndAddGrammar(jsonGrammarUrl, id, callback);
  360. // }
  361. var jsonGrammarUrl = instance.get_json_grammar_url(id);
  362. createAndAddGrammar(jsonGrammarUrl, id, callback);
  363. }
  364. else if(callback && !isDefaultCallback){
  365. callback(grammarImplMap[id]);
  366. }
  367. return grammarImplMap[id];
  368. };
  369. /**
  370. * Check if grammar is register
  371. *
  372. * @param {string} id the grammar ID
  373. * @return {Boolean} true, if grammar with ID is registered
  374. *
  375. * @private
  376. * @memberOf mmir.SemanticInterpreter#
  377. */
  378. var checkHasGrammar = function(id){
  379. return typeof grammarImplMap[id] !== 'undefined';
  380. };
  381. /**
  382. * Remove a registered grammar
  383. *
  384. * @param {string} id the grammar ID to remove
  385. *
  386. * @private
  387. * @memberOf mmir.SemanticInterpreter#
  388. */
  389. var doRemoveGrammar = function(id){
  390. if( checkHasGrammar(id) ){
  391. //remove from impl.-map:
  392. delete grammarImplMap[id];
  393. //remove from ID-list
  394. for(var i=0, size = grammarImplList.length; i < size; ++i){
  395. if(grammarImplList[i]==id){
  396. grammarImplList.splice(i, 1);
  397. break;
  398. }
  399. }
  400. }
  401. };
  402. //TODO move create/build into GrammarConverter
  403. /**
  404. * @param {String|JSONObject} doRecompile
  405. * IF {String}: the String's contents will be used as a String-representation of the JSON grammar
  406. * IF {Object}: the Object will be used as JSON representation for the grammar
  407. *
  408. * @param {String} [generatedParserLanguageCode] OPTIONAL
  409. * if param doRecompile is used, this String specifies the
  410. * language for the generated grammatic-parser. If omitted, the default "de" (German) will be used.
  411. * NOTE: this must be a valid ISO language code!
  412. *
  413. * @param {Function} [callback] OPTIONAL
  414. * a callback that is invoked after the grammar was created and added to the SemanticInterpreter.
  415. * The callback-function will be invoked with corrsponding GrammarConverter instance, i.e. <code>callback(newGrammarConverter);</code>
  416. * @function
  417. *
  418. * @private
  419. * @memberOf mmir.SemanticInterpreter#
  420. */
  421. function createAndAddGrammar(doRecompile, generatedParserLanguageCode, callback){
  422. var gc = new GrammarConverter();
  423. //callback that will be used after the JSON file for the grammar was loaded:
  424. function build_grammar(theConverterInstance){//<- argument is the GrammarConverter instance
  425. var genId = doGetGrammarEngine();//one of ['jscc' | 'pegjs' | 'jison'];
  426. var genName = GRAMMAR_MODULE_ID_PREFIX + genId + (_isAsyncCompileMode? GRAMMAR_ASYNC_MODULE_MODIFIER : '') + GRAMMAR_MODULE_ID_POSTFIX;
  427. var compileOptions = {
  428. fileVersion: GRAMMAR_FILE_FORMAT_VERSION,
  429. strict: !_disableStrictMode
  430. };
  431. var onModuleLoaded = function onLoad(gen){
  432. //initialize the generator (initialization may be async -> need callback/Promise)
  433. // (-> if already initialized, the then-callback will be invoked immediately)
  434. gen.init().then(function onInit(){
  435. //actually start compilation of the grammar definition:
  436. // usually this involves 2 steps:
  437. // (1) converting the JSON grammar into a specific ParserParser syntax (e.g. JS/CC syntax)
  438. // (2) compiling this syntax using the corresponding Parser-Generator
  439. // -> the resulting parser-function will then be registered on the SemanticInterpreter instance
  440. // (using its addGrammar() function) along with the stopword definition (using the setStopwords() function)
  441. gen.compileGrammar(theConverterInstance, generatedParserLanguageCode, compileOptions, function onCompiled(convertedInstance){
  442. //add the grammar-parser-text and grammar-definition-text to the newly registered Grammar-instance
  443. // (-> registering is done within the compileGrammar() function!)
  444. var registeredGrammarInstance = doGetGrammar(generatedParserLanguageCode, true);
  445. if(registeredGrammarInstance){
  446. registeredGrammarInstance.setGrammarSource(convertedInstance.getGrammarSource());
  447. registeredGrammarInstance.setGrammarDef(convertedInstance.getGrammarDef());
  448. }
  449. else {
  450. logger.error('A problem occured during generation of grammar for "'+generatedParserLanguageCode+'"');
  451. }
  452. //invoke callback if present:
  453. if(callback){
  454. callback(registeredGrammarInstance);
  455. }
  456. });
  457. });
  458. };//END: onModuleLoaded([jsccGen])
  459. //FIXME webpack emits a warning, if normal require() is used -> TODO find other way than using mmir.require() for getting rid of the warning (i.e. avoid adding dependency for mmirf/core!)
  460. var req = typeof WEBPACK_BUILD !== 'undefined' && WEBPACK_BUILD? require('mmirf/core').require : require;
  461. req([genName], onModuleLoaded, function(_err){
  462. //if async-module could not be loaded, try sync-module
  463. if(_isAsyncCompileMode){
  464. logger.warn('Cannot use asynchronous compilation for '+genId+
  465. ': no async module available, using sync compilation instead...'
  466. );
  467. genName = GRAMMAR_MODULE_ID_PREFIX + genId + GRAMMAR_MODULE_ID_POSTFIX;
  468. req([genName], onModuleLoaded);
  469. }
  470. });
  471. }//END function build_grammar
  472. if(typeof doRecompile === 'string'){// arg. is URL for JSON grammar definition
  473. //interpret STRING as URL for the JSON grammar:
  474. gc.loadGrammar(build_grammar, function(err){
  475. var errMsg = err;
  476. if(err){
  477. if(err.stack){
  478. errMsg = err.stack;
  479. } else {
  480. try{
  481. errMsg = JSON.stringify(err);
  482. } catch(e){}
  483. }
  484. }
  485. throw new Error('Could not find JSON grammar file at "'+doRecompile+'": '+errMsg);
  486. }, doRecompile, true
  487. );
  488. } else if(typeof doRecompile === 'object'){// arg. is JSONObject (ie. JSON grammar definition)
  489. //ASSERT if doRecompile === null => throws error!
  490. gc.json_grammar_definition = doRecompile;
  491. build_grammar(gc);
  492. } else {
  493. logger.error('__createAndAddGrammar(): could not build grammar due to missing argumens');
  494. }
  495. }
  496. /**
  497. * @private
  498. * @memberOf mmir.SemanticInterpreter#
  499. */
  500. var process_asr_semantic = function(phrase, langCode, callback){
  501. if(!doCheckIsEnabled()){
  502. logger.warn('interpret(): currently disabled!');
  503. return null;
  504. }
  505. if(langCode && (typeof langCode === 'function' || typeof langCode === 'object')){
  506. callback = langCode;
  507. langCode = void(0);
  508. }
  509. var options;
  510. if(callback && typeof callback === 'object'){
  511. options = callback;
  512. callback = options.callback;
  513. } else {
  514. options = {};
  515. }
  516. if(typeof options.debug === 'undefined'){
  517. options.debug = logger.isDebug();
  518. }
  519. if(typeof options.trace === 'undefined'){
  520. options.trace = logger.isVerbose();
  521. }
  522. var execGrammar = function(grammarConverter, phrase, langCode, parseOptions, callback){
  523. //pre-process pharse (e.g. mask umlauts, remove stopwords)
  524. var positions = _isCalcProcPos? {} : void(0);//<- for storing modification information during pre-processing
  525. var strPreparedPhrase = grammarConverter.preproc( phrase.toLowerCase(), positions );
  526. if(logger.isDebug()) logger.debug('process_asr_semantic('+langCode+'): removed stopwords, now parsing phrase "'+strPreparedPhrase+'"');//debug
  527. if(callback){
  528. grammarConverter.executeGrammar( strPreparedPhrase, parseOptions, function(result){
  529. //post-process result (e.g. unmask umlauts etc)
  530. result = grammarConverter.postproc(result, positions);
  531. result.preproc = positions;
  532. callback(result);//TODO return copy instead of original instance?
  533. });
  534. } else {
  535. var result = grammarConverter.executeGrammar( strPreparedPhrase, parseOptions );
  536. //post-process result (e.g. unmask umlauts etc)
  537. result = grammarConverter.postproc(result, positions);
  538. result.preproc = positions;
  539. return result;//TODO return copy instead of original instance?
  540. }
  541. };//END OF: var execGrammar = function...
  542. var grammarReadyCallback;
  543. if(callback){
  544. grammarReadyCallback = function(){
  545. var grammarConverter = doGetGrammar(langCode);
  546. if(grammarConverter.isAsyncExec()){
  547. execGrammar(grammarConverter, phrase, langCode, options, callback);
  548. } else {
  549. callback(execGrammar(grammarConverter, phrase, langCode, options));
  550. }
  551. };
  552. }
  553. var grammarConverter = doGetGrammar(langCode, grammarReadyCallback);
  554. if(!grammarConverter && ! grammarReadyCallback){
  555. throw new Error('no grammar available for '+(langCode || currentGrammarId)+' (and no callback provided for asnyc invocation)');
  556. }
  557. if(!grammarReadyCallback){
  558. return execGrammar(grammarConverter, phrase, langCode, options);
  559. }
  560. };
  561. /**
  562. * @private
  563. * @memberOf mmir.SemanticInterpreter#
  564. */
  565. var doApplyPreproc = function(thePhrase, lang, processingSteps){
  566. if(!doCheckIsEnabled()){
  567. logger.warn('doProcessStopwords(): currently disabled!');
  568. return null;
  569. }
  570. var grammarConverter = doGetGrammar(lang);
  571. if(!grammarConverter){
  572. throw new Error('No grammar for ID '+lang);
  573. }
  574. return grammarConverter.preproc(thePhrase, null, processingSteps);
  575. };
  576. /** @lends mmir.SemanticInterpreter.prototype */
  577. var _tmpInstance = { // public members
  578. /**
  579. * @param {String} phrase
  580. * the phrase that will be parsed
  581. * @param {String} langCode
  582. * the language code (identifier) for the parser/grammar
  583. * @param {Function|ParseOptions} [callback] OPTIONAL
  584. * parsing-options or a callback:
  585. * options.callback: FUNCTION the callback function (see below)
  586. * options.debug: BOOLEAN enabling debug output
  587. * (by default the logger's log-level <= 'debug' is used)
  588. * options.trace: BOOLEAN enabling verbose/tracing output;
  589. * may not be supported by all grammar engines
  590. * (by default the logger's log-level <= 'verbose' is used)
  591. * NOTE: some grammar engines may support additional parsing options
  592. * If a callback function: receives the return value
  593. * (instead of receiving the result as return value from
  594. * this function directly).
  595. * The signature for the callback is: <code>callback(result: Object)</code>
  596. * (i.e. the result that would be returned by this function itself is
  597. * passed as argument into the callback function; see also documentation
  598. * for <em>returns</em>).
  599. * NOTE: in case, the grammar for the requested <code>langCode</code>
  600. * is not compiled yet (i.e. not present as executable JavaScript),
  601. * the corresponding JSON definition of the grammar needs to be
  602. * compiled first, before processing the ASR's semantics is possible.
  603. * In this case, a <code>callback</code> function <strong>MUST</strong>
  604. * be supplied in order to receive a result (since compilation of the
  605. * grammar may be <em>asynchronous</em>).
  606. *
  607. * @returns {Object}
  608. * the parsing result (as processed by the parser / grammar;
  609. * usually a JSON-like object).
  610. * WARNING: if a <code>callback</code> function was provided, then
  611. * there is no return object.
  612. *
  613. * @public
  614. * @memberOf mmir.SemanticInterpreter.prototype
  615. */
  616. interpret: function(phrase, langCode, callback){
  617. return process_asr_semantic(phrase, langCode, callback);
  618. },
  619. /**
  620. * Removes stopwords using the stopword-list from the parser/grammar
  621. * for <code>lang</code>.
  622. *
  623. *
  624. * @deprecated use {@link #applyPreProcessing} instead
  625. *
  626. * @param {String} thePhrase
  627. * the Phrase for which stopwords should be removed
  628. * @param {String} lang
  629. * the language code (identifier) for the parser/grammar
  630. *
  631. * @public
  632. * @see #applyPreProcessing
  633. */
  634. removeStopwords: function(thePhrase, lang, processingSteps){
  635. logger.warn('using deprecated function removeStopwords(): should use applyPreProcessing() instead.');
  636. return this.applyPreProcessing(thePhrase, lang, processingSteps);
  637. },
  638. /**
  639. * Applies pre-processing for the corresponding parser/grammar
  640. * of <code>lang</code> (e.g. removes stopwords using the stopword-list etc).
  641. *
  642. * NOTE: <code>{@link #interpret}</code> automatically applies pre-processing
  643. * i.e. there is no need to manually do this when using <code>{@link #interpret}</code>).
  644. *
  645. * IMPORTANT: this helper function actually invokes {@link mmir.grammar.GrammarConverter#preproc}
  646. * which by default removes stopwords; if the corresponding GrammarConverter instance
  647. * has been set with a non-default pre-processing chain, results may be differ
  648. * (i.e. may not remove stopwords).
  649. *
  650. * @param {String} thePhrase
  651. * the Phrase for which stopwords should be removed
  652. * @param {String} [lang]
  653. * the language code (identifier) for the parser/grammar
  654. * (if omitted the currently set grammar is used)
  655. * @param {Array<ProcessingStep>} [processingSteps] OPTIONAL
  656. * if given, use <code>processingSteps</code> instead of the
  657. * GrammarConverter's configured pre-processing chain.
  658. * NOTE positional argument (i.e. must specify <code>pos</code> too)
  659. *
  660. * @public
  661. * @see mmir.grammar.GrammarConverter#preproc
  662. */
  663. applyPreProcessing: function(thePhrase, lang, processingSteps){
  664. return doApplyPreproc(thePhrase, lang, processingSteps);
  665. },
  666. /** NOTE: the grammar must be compiled/registered first
  667. * @param {String} id
  668. * the ID (identifier) / language code for grammar
  669. * @public
  670. * @see mmir.grammar.GrammarConverter#getGrammarDef
  671. */
  672. getGrammarDefinitionText: function(id){
  673. return doGetGrammar(id).getGrammarDef();
  674. },
  675. /** NOTE: the grammar must be compiled/registered first
  676. * @param {String} id
  677. * the ID (identifier) / language code for grammar
  678. * @public
  679. * @see mmir.grammar.GrammarConverter#getGrammarSource
  680. */
  681. getGrammarParserText: function(id){
  682. return doGetGrammar(id).getGrammarSource();
  683. },
  684. /**
  685. * Get the grammar converter instance (of registered grammar)
  686. * @public
  687. * @param {String} [id]
  688. * the ID (identifier) / language code for grammar
  689. * if omitted: the currently active grammar
  690. * @returns {mmir.grammar.GrammarConverter} the grammar converter
  691. *
  692. * @see #addGrammar
  693. * @see #setCurrentGrammar
  694. */
  695. getGrammarConverter: function(id){
  696. return doGetGrammar(id, true);//<- if no grammar is loaded for this ID, do NOT try to load it!
  697. },
  698. /**
  699. * @copydoc #createAndAddGrammar
  700. * @public
  701. * @param {String|JSONObject} rawGrammarSrc
  702. * @param {String} id
  703. * @param {Function} [callback]
  704. * @returns {SemanticInterpreter.prototype}
  705. */
  706. createGrammar: function(rawGrammarSrc, id, callback){
  707. if(!id){
  708. throw new Error('missing ID for generated grammar');//TODO
  709. }
  710. createAndAddGrammar(rawGrammarSrc, id, callback);
  711. return this;
  712. },
  713. /**
  714. * @copydoc #doAddGrammar
  715. * @public
  716. * @function
  717. */
  718. addGrammar: doAddGrammar,
  719. /**
  720. * @copydoc #doAddGrammar
  721. * @public
  722. * @function
  723. */
  724. setStopwords: doSetStopwords,
  725. // getGrammar: doGetGrammar, <- set to private
  726. /**
  727. * @copydoc #checkHasGrammar
  728. * @public
  729. * @function
  730. */
  731. hasGrammar: checkHasGrammar,
  732. /**
  733. * @copydoc #doRemoveGrammar
  734. * @public
  735. * @function
  736. */
  737. removeGrammar: doRemoveGrammar,
  738. /**
  739. * Shortcut for {@link mmir.GrammarConverter#addProc}:
  740. * add pre-/post-processing step for running before/after {@link #interpret}
  741. *
  742. * @param {String} langCode the language code, for which to add the (pre- and/or post-) processing step
  743. * @param {ProcessingStep} proc the processing step:
  744. * <pre>
  745. * {
  746. * //the name of the processing step
  747. * name: string,
  748. * //OPTIONAL pre-processing function: pre(input: string | Positions, isCalcPos: boolean)
  749. * pre: Function,
  750. * //OPTIONAL post-processing function: post(result: any, pos: Positions)
  751. * post: Function
  752. * }
  753. * </pre>
  754. * @param {Boolean|Number} [isPrepend] OPTIONAL
  755. * if omitted (or FALSY): appended <code>proc</code> to processing steps
  756. * if number: insert <code>proc</code> at this index into the processing steps-list
  757. * if TRUE: prepend <code>proc</code> to processing steps
  758. * @param {Function} [callback] OPTIONAL
  759. * callback, in case of asnychronous initalization, i.e. if
  760. * grammar is not loaded/compiled yet, and grammar.json is available.
  761. * If omitted, an error is thrown, if the grammar has not been loaded/compiled yet.
  762. *
  763. * @see mmir.GrammarConverter#addProc
  764. * @example
  765. * //poitionUtils:
  766. * var posUtil = mmir.require('mmirf/positionUtils');
  767. * //stemming function
  768. * var stemFunc = ...;
  769. * //add stemming function for pre-processing for "de" as first step
  770. * mmir.semantic.addProcessing('de', {
  771. * name: 'stem',
  772. * pre: posUtil.createWordPosPreProc(stem, this)
  773. * }, true);
  774. */
  775. addProcessing: function(langCode, processingStep, indexOrIsPrepend, callback){
  776. var cb = callback;
  777. var asyncCb = function(gc){
  778. gc.addProc(processingStep, indexOrIsPrepend);
  779. cb && cb(gc);
  780. };
  781. var gc = doGetGrammar(langCode, !cb, cb? asyncCb : void(0));//<- if no grammar is loaded for this ID, only try to load it, if a callback is provided
  782. if(!cb){
  783. asyncCb(gc);
  784. }
  785. },
  786. /**
  787. * Sets the current grammar.
  788. *
  789. * If in invocations of {@link #interpret} the grammar ID (e.g. language code) is missing,
  790. * then this grammar that is set here is used.
  791. *
  792. * The id must reference either a grammar that was compiled (i.e. generated JavaScript file)
  793. * for this id, or there must exists JSON-grammar file for which the language-dir matches the id parameter,
  794. * e.g. <code>config/languages/[id]/grammar.json</code>.
  795. *
  796. * @param {String} id the ID for the grammar, e.g. an ISO language code
  797. *
  798. * @function
  799. * @public
  800. */
  801. setCurrentGrammar: function(id){
  802. currentGrammarId = id;
  803. //set semantic-interpreter to enabled
  804. // (this ensures, that JSON-grammars are automatically loaded,
  805. // if no corresponding compiled JS-grammar is available yet)
  806. doSetEnabled(true);
  807. },
  808. /**
  809. * @copydoc #currentGrammarId
  810. * @public
  811. */
  812. getCurrentGrammar: function(){
  813. return currentGrammarId;
  814. },
  815. /**
  816. * @see #isEnabled
  817. * @public
  818. */
  819. setEnabled: function(isEnabled){
  820. doSetEnabled(isEnabled);
  821. },
  822. /**
  823. * @copydoc #_isEnabled
  824. * @public
  825. */
  826. isEnabled: function(){
  827. return doCheckIsEnabled();
  828. },
  829. /**
  830. * Get the ID of the current grammar engine / compiler.
  831. *
  832. * @default "jcss"
  833. * @returns {String}
  834. * the ID of the current grammar engine
  835. * @public
  836. */
  837. getGrammarEngine: function(){
  838. return doGetGrammarEngine();
  839. },
  840. /**
  841. * Set the grammar engine, i.e. the
  842. * compiler engine for the JSON grammar
  843. *
  844. * NOTE: implementations of the grammar engines are located at env/grammar/
  845. * The file-name for an implementation should follow the convention: ID+"Generator.js"
  846. * and should be registered with requirejs with the module-ID: ID+"Gen"
  847. *
  848. * @param {String} egnineId
  849. * the ID for the engine.
  850. * Possible values: "jscc", "jison", "pegjs"
  851. *
  852. * @param {Boolean} [asyncCompileMode] OPITIONAL
  853. * sets the compile mode (sychronous or asynchronous) when generating new parsers
  854. * with the grammar-engine.
  855. * DEFAULT: VOID (i.e. leave current set compile-mode setting unchanged)
  856. *
  857. * @param {Boolean} [disableStrictMode] OPTIONAL
  858. * disable JavaScript strict mode when generating grammar code
  859. * <br>NOTE: this argument is positional, i.e. <code>asyncCompileMode</code> must also be given when using this argument
  860. *
  861. * @public
  862. */
  863. setGrammarEngine: function(engineId, asyncCompileMode, disableStrictMode){
  864. doSetGrammarEngine(engineId, asyncCompileMode, disableStrictMode);
  865. },
  866. /**
  867. * Set compile-mode (sychronous or asynchronous) for the grammar engine, i.e. if the
  868. * compiler engine for the JSON grammar should run synchronously or asynchronously.
  869. *
  870. * NOTE: if there is no asynchronous implementation available for the grammar engine,
  871. * the sync-impl. is used by default.
  872. *
  873. * NOTE: asynchronous compile mode requires WebWorkers
  874. *
  875. * @param {Boolean} asyncCompileMode
  876. * sets the compile mode (sychronous or asynchronous) when generating new parsers
  877. * with the grammar-engine.
  878. *
  879. * @param {Boolean} [disableStrictMode] OPTIONAL
  880. * disable JavaScript strict mode when generating grammar code
  881. *
  882. * @public
  883. * @default false (i.e. synchronous compile mode)
  884. * @require WebWorker (if async mode)
  885. */
  886. setEngineCompileMode: function(asyncCompileMode, disableStrictMode){
  887. _isAsyncCompileMode = !!asyncCompileMode;
  888. if(typeof disableStrictMode === 'boolean'){
  889. _disableStrictMode = disableStrictMode;
  890. }
  891. },
  892. /**
  893. * Get compile-mode (sychronous or asynchronous) for the grammar engine, i.e. if the
  894. * compiler engine for the JSON grammar should run synchronously or asynchronously.
  895. *
  896. * @return {Boolean} the compile mode (sychronous or asynchronous) when generating new parsers
  897. * with the grammar-engine.
  898. * @public
  899. */
  900. getEngineCompileMode: function(){
  901. return _isAsyncCompileMode;
  902. },
  903. /**
  904. * Get JavaScript strict mode compile-setting for the grammar engine, i.e. if the
  905. * compiler engine should generate code with strict-mode setting.
  906. *
  907. * @return {Boolean} the strict mode setting
  908. * @public
  909. */
  910. getEngineCompileStrictMode: function(){
  911. return !_disableStrictMode;
  912. },
  913. /**
  914. * @copydoc #GRAMMAR_FILE_FORMAT_VERSION
  915. * @returns {Number} the current version number that this SemanticInterpreter
  916. * instance supports, for the file format of compiled grammars.
  917. */
  918. getFileVersion: function(){
  919. return GRAMMAR_FILE_FORMAT_VERSION;
  920. },
  921. /**
  922. * Enable / disable calculation of modified positions during pre-processing
  923. *
  924. * @param {Boolean} isEnabled if calculation of modified positions during pre-processing should be enabled
  925. *
  926. * @public
  927. * @see #isPreProcessPositionsEnabled
  928. * @see #_isCalcProcPos
  929. */
  930. setPreProcessPositionsEnabled: function(isEnabled){
  931. _isCalcProcPos = isEnabled;
  932. },
  933. /**
  934. * If true, pre-processing input-phrase (before running interpretion) will
  935. * include meta-data for changed positions (due to pre-processing) in input-string
  936. *
  937. * E.g. can be used to map semantic-results (matched tokens/utterances where
  938. * e.g. stopwords would be removed) on the raw input-string.
  939. *
  940. * The meta-information will be included in field <code>preproc</code> of the
  941. * interpretation result.
  942. *
  943. * @return {Boolean} if calculation of modified positions during pre-processing is enabled
  944. *
  945. * @public
  946. * @see #setPreProcessPositionsEnabled
  947. * @see #_isCalcProcPos
  948. */
  949. isPreProcessPositionsEnabled: function(){
  950. return _isCalcProcPos;
  951. },
  952. //FIXME rename/move functions
  953. get_json_grammar_url: function(id){
  954. return res.getGrammarFileUrl(id);
  955. }//,
  956. // exists_gen_grammar: function(id){
  957. // var lang = require('mmirf/languageManager');
  958. // return lang.existsGrammar(id, 'bin');
  959. // }
  960. };//END: var _tmpInstance = {...
  961. return _tmpInstance;
  962. }
  963. instance = new constructor();
  964. return instance;
  965. });//END: define(..., function(){