1 2 3 define(['jison', 'constants', 'configurationManager', 'grammarConverter', 'jquery', 'logger', 'module'], 4 /** 5 * Generator for executable language-grammars (i.e. converted JSON grammars). 6 * 7 * <p> 8 * This generator uses Jison for compiling the JSON grammar. 9 * 10 * <p> 11 * The generator for compiling the JSON grammar definitions in <code>www/config/languages/<language code>/grammar.json</code> 12 * can be configured in <code>www/config/configuration.json</code>:<br> 13 * <pre> 14 * { 15 * ... 16 * "grammarCompiler": "jison", 17 * ... 18 * }</pre> 19 * 20 * <p> 21 * jison supports grammar generation for: 22 * LALR(1), LR(0), SLR(1), LL(1) 23 * [and experimental support for LR(1)] 24 * 25 * see <a href="http://zaach.github.io/jison/docs/#parsing-algorithms">jison documentation</a> 26 * 27 * @see <a href="https://github.com/zaach/jison">https://github.com/zaach/jison</a> 28 * 29 * @class 30 * @constant 31 * @public 32 * @name JisonGenerator 33 * @memberOf mmir.env.grammar 34 * 35 * @requires Jison 36 * @requires jQuery.Deferred 37 * @requires jQuery.extend 38 * @requires jQuery.makeArray 39 */ 40 function(jison, constants, configManager, GrammarConverter, $, Logger, module){ 41 42 /** 43 * Deferred object that will be returned - for async-initialization: 44 * the deferred object will be resolved, when this module has been initialized. 45 * 46 * @private 47 * @type Deferred 48 * @memberOf JisonGenerator# 49 */ 50 var deferred = $.Deferred(); 51 //no async initialization necessary for PEG.js generator -> resolve immediately 52 deferred.resolve(); 53 54 55 /** 56 * The Logger for the jison generator. 57 * 58 * @private 59 * @type Logger 60 * @memberOf JisonGenerator# 61 * 62 * @see mmir.Logging 63 */ 64 var logger = Logger.create(module); 65 66 /** 67 * The argument name when generating the grammar function: 68 * the argument holds the raw text that will be parsed by the generated grammar. 69 * 70 * NOTE: this argument/variable name must not collide with any code that is generated for the grammar. 71 * 72 * @constant 73 * @private 74 * @memberOf JisonGenerator# 75 */ 76 var INPUT_FIELD_NAME = 'asr_recognized_text'; 77 78 /** 79 * The default options for the jison compiler. 80 * 81 * To overwrite the default options, configure the following property in <code>www/config/configuration.json</code>:<br> 82 * <pre> 83 * { 84 * ... 85 * "grammar": { 86 * ... 87 * "jison": { 88 * "type": "your configuration setting!" 89 * } 90 * ... 91 * }, 92 * ... 93 * }</pre> 94 * 95 * Valid settings are: 96 * <code>type = 'lr0' | 'slr' | 'lr' | 'll' | 'lalr'</code> 97 * <code>execMode = 'sync' | 'async'</code> 98 * <code>genSourceUrl = true | STRING | FALSY'</code> 99 * 100 * 101 * genSourceUrl: if TRUTHY, the sourceUrl for eval'ed parser-module is set 102 * (i.e. eval'ed code will appear at the URL in debugger, if browser supports sourceURL setting) 103 * if true: the sourceUrl will be generated using the grammar's ID 104 * if STRING: the string will be used as sourceUrl; if "<id>" is contained, it will be replaced by the grammar's ID 105 * 106 * @constant 107 * @private 108 * @default type := 'lalr', execMode := sync, genSourceUrl := FALSY 109 * @memberOf JisonGenerator# 110 */ 111 var DEFAULT_OPTIONS = { 112 type: 'lalr',//'lr0' | 'slr' | 'lr' | 'll' | default: lalr 113 execMode: 'sync',//'sync' | 'async' | default: sync 114 genSourceUrl: '',// true | STRING: the sourceURL for eval'ed parser-module | default: FALSY 115 }; 116 117 /** 118 * Name for this plugin/grammar-generator (e.g. used for looking up configuration values in configuration.json). 119 * @constant 120 * @private 121 * @memberOf JisonGenerator# 122 */ 123 var pluginName = 'grammar.jison'; 124 125 /** 126 * Exported (public) functions for the jison grammar-engine. 127 * @public 128 * @type GrammarGenerator 129 * @memberOf JisonGenerator# 130 */ 131 var jisonGen = { 132 /** @scope JisonGenerator.prototype */ 133 134 /** 135 * The name/ID for the compile engine for the jison compiler 136 * 137 * @memberOf JisonGenerator.prototype 138 */ 139 engineId: 'jison', 140 /** 141 * @param {Function} [callback] OPTIONAL 142 * the callback that is triggered, when the engine is initialized 143 * @returns {Deferred} 144 * a promise that is resolved, when the engine is initialized 145 * (NOTE: if you use the same function for the <code>callback</code> AND the promise, 146 * then the function will be invoked twice!) 147 * 148 * @memberOf JisonGenerator.prototype 149 */ 150 init: function(callback){ 151 if(callback){ 152 deferred.always(callback); 153 } 154 return deferred; 155 }, 156 /** @returns {Boolean} if this engine compilation works asynchronously. The current implementation works synchronously (returns FALSE) */ 157 isAsyncCompilation: function(){ return false; }, 158 /** 159 * The function for compiling a JSON grammar: 160 * 161 * 162 * @param {GrammarConverter} theConverterInstance 163 * @param {String} instanceId 164 * the ID for the compiled grammar (usually this is a language code) 165 * @param {Number} fileFormatVersion 166 * the version of the file format (this is a constant within {@link mmir.SemanticInterpreter} 167 * @param callback 168 * @returns {GrammarConverter} 169 * the grammar instance with attached with the compiled function for executing the 170 * grammar to the instance's {@link GrammarConvert#executeGrammar} property/function. 171 */ 172 compileGrammar: function(theConverterInstance, instanceId, fileFormatVersion, callback){ 173 174 //attach functions for PEG.js conversion/generation to the converter-instance: 175 $.extend(theConverterInstance, JisonGrammarConverterExt); 176 177 //start conversion: create grammar in jison syntax (from the JSON definition): 178 theConverterInstance.init(); 179 this._preparePrintError(); 180 theConverterInstance.convertJSONGrammar(); 181 var grammarDefinition = theConverterInstance.getGrammarDef(); 182 183 //load options from configuration: 184 var config = configManager.get(pluginName, true, {}); 185 //combine with default default options: 186 var options = $.extend({id: instanceId}, DEFAULT_OPTIONS, config); 187 188 //HELPER function for generating the parser-module (after parser was generated) 189 var compileParserModule = function(grammarParser, hasError){ 190 191 var addGrammarParserExec = 192 '(function(){\n var semanticInterpreter = require("semanticInterpreter");\n' 193 + 'var options = {fileFormat:'+fileFormatVersion+',execMode:'+JSON.stringify(options.execMode)+'};\n' 194 + 'var module = {};\n' 195 + grammarParser 196 // + ';\nvar grammarFunc = function(){ return parser.parse.apply(parser, arguments);};\n' 197 + ';\nvar grammarFunc = function(){\n' 198 + ' var result; try {\n' 199 + ' result = parser.parse.apply(parser, arguments);\n' 200 + ' } catch (err){\n' 201 + ' console.error(err.stack?err.stack:err); result = {};\n'//TODO warning/error messaging? -> need to handle encoded chars, if error message should be meaningful 202 + ' }\n' 203 + ' return result;\n' 204 + '};\n' 205 + 'semanticInterpreter.addGrammar("' 206 +instanceId 207 +'", grammarFunc, options);\n\n' 208 + 'semanticInterpreter.setStopwords("' 209 +instanceId+'",' 210 211 //store stopwords with their Unicode representation (only for non-ASCII chars) 212 +JSON.stringify( 213 theConverterInstance.getEncodedStopwords() 214 ).replace(/\\\\u/gm,'\\u')//<- revert JSON.stringify encoding for the Unicodes 215 + ');\n' 216 + 'return grammarFunc;\n' 217 + '})();'; 218 219 if(options.genSourceUrl){ 220 221 var sourceUrlStr; 222 if(options.genSourceUrl === true){ 223 sourceUrlStr = 'gen/grammar/_compiled_grammar_'+instanceId; 224 } else { 225 sourceUrlStr = options.genSourceUrl.toString().replace(/<id>/g,instanceId); 226 } 227 228 //for Chrome / FireFox debugging: provide an URL for eval'ed code 229 addGrammarParserExec += '//@ sourceURL='+sourceUrlStr+'\n' 230 +'//# sourceURL='+sourceUrlStr+'\n'; 231 232 } 233 234 theConverterInstance.setGrammarSource(addGrammarParserExec); 235 236 try{ 237 238 eval(addGrammarParserExec); 239 240 } catch (err) { 241 242 //TODO russa: generate meaningful error message with details about error location 243 // eg. use esprima (http://esprima.org) ...? 244 // ... as optional dependency (see deferred initialization above?) 245 246 var evalMsg = 'Error during eval() for "'+ instanceId +'": ' + err; 247 248 if(jison.printError){ 249 jison.printError(evalMsg); 250 } 251 else { 252 logger.error('jison', 'evalCompiled', evalMsg, err); 253 } 254 255 if(! hasError){ 256 evalMsg = '[INVALID GRAMMAR JavaScript CODE] ' + evalMsg; 257 var parseDummyFunc = (function(msg, error){ 258 return function(){ console.error(msg); console.error(error); throw msg;}; 259 })(evalMsg, err); 260 261 parseDummyFunc.hasErrors = true; 262 263 //theConverterInstance = doGetGrammar(instanceId); 264 theConverterInstance.setGrammarFunction(parseDummyFunc); 265 } 266 267 } 268 269 //invoke callback if present: 270 if(callback){ 271 callback(theConverterInstance); 272 } 273 }; 274 275 var isPreventDefault = this._afterCompileParser(compileParserModule, callback); 276 var result = this._compileParser(grammarDefinition, options, isPreventDefault); 277 278 if(!isPreventDefault){ 279 var hasError = result.hasError; 280 compileParserModule(result.def, hasError); 281 } 282 283 return theConverterInstance; 284 }, 285 /** 286 * @protected 287 */ 288 _compileParser: function(grammarDefinition, options, afterCompileParserResult){ 289 290 var hasError = false; 291 var grammarParser; 292 try{ 293 var cfg = bnf.parse(grammarDefinition); 294 var parser = Jison.Generator(cfg, options); 295 grammarParser = parser.generate(); 296 } catch(error) { 297 // "{ 298 // "message": "Expected \"=\" or string but \"_\" found.", 299 // "expected": [ 300 // { 301 // "type": "literal", 302 // "value": "=", 303 // "description": "\"=\"" 304 // }, 305 // { 306 // "type": "other", 307 // "description": "string" 308 // } 309 // ], 310 // "found": "_", 311 // "offset": 4104, 312 // "line": 40, 313 // "column": 6, 314 // "name": "SyntaxError" 315 // }" 316 var msg = ' while compiling grammar "' + options.id + '": '; 317 if(error.name === 'SyntaxError'){ 318 msg= 'SyntaxError' + msg + error.message; 319 } 320 else { 321 msg = 'Error' + msg + (error && error.stack? error.stack : error); 322 } 323 324 if(typeof error.lineNumber !== 'undefined'){ 325 msg += ' at line '+error.lineNumber; 326 } 327 328 if(typeof error.column !== 'undefined'){ 329 msg += ':'+error.column; 330 } 331 332 if(typeof error.index !== 'undefined'){ 333 msg += ' (offset '+error.index+')'; 334 } 335 336 if(jison.printError){ 337 jison.printError(msg); 338 } 339 else { 340 console.error(msg); 341 } 342 msg = '[INVALID GRAMMAR] ' + msg; 343 grammarParser = 'var parser = { parse: function(){ var msg = '+JSON.stringify(msg)+'; console.error(msg); throw msg;} }'; 344 hasError = true; 345 } 346 347 return {def: grammarParser, hasError: hasError}; 348 }, 349 /** 350 * @protected 351 */ 352 _preparePrintError: function(){ 353 354 //setup logger for compile errors (if not already set) 355 if(! jison.printError){ 356 /** 357 * The default logging / error-print function for jison. 358 * 359 * @private 360 * @name printError 361 * @function 362 * @memberOf JisonGenerator.jison# 363 * 364 * @see mmir.Logging 365 */ 366 jison.printError = function(){ 367 var args = $.makeArray(arguments); 368 //prepend "location-information" to logger-call: 369 args.unshift('jison', 'compile'); 370 //output log-message: 371 logger.error.apply(logger, args); 372 }; 373 } 374 }, 375 /** 376 * The default logging / error-print function for jison. 377 * 378 * @protected 379 * 380 * @see mmir.Logging 381 */ 382 printError: function(){ 383 jison.printError.apply(jison, arguments); 384 }, 385 /** 386 * Optional hook for pre-processing the generated parser, after the parser is generated. 387 * 388 * By default, this function returns VOID, in which case the parser-module is created by default. 389 * 390 * If a function is returned instead, then it must invoke <code>compileParserModuleFunc</code>: 391 * <code>compileParserModuleFunc(compiledParser : STRING, hasErrors : BOOLEAN)</code> 392 * 393 * 394 * @param {Function} compileParserModuleFunc 395 * the function that generates the parser-module: 396 * <code>compileParserModuleFunc(compiledParser : STRING, hasErrors : BOOLEAN)</code> 397 * 398 * @param {Function} compileCallbackFunc 399 * the callback function which will be invoked by compileParserModuleFunc, after it has finished. 400 * If compileParserModuleFunc() is prevented from exectution then the callback MUST be invoked manually 401 * <code>compileCallbackFunc(theConverterInstance: GrammarConverter)</code> 402 * 403 * @returns {TRUTHY|VOID} 404 * FALSY for the default behavior. 405 * IF a TRUTHY value is returned, then the default action after compiling the parser 406 * is not executed: 407 * i.e. compileParserModuleFunc is not automatically called and in consequence the callback is not invoked 408 * 409 * 410 * NOTE: if not FALSY, then either compileParserModuleFunc() must be invoked, or the callback() must be invoked! 411 * 412 * @protected 413 */ 414 _afterCompileParser: function(compileParserModuleFunc, compileCallbackFunc){ 415 //default: return VOID 416 return; 417 } 418 }; 419 420 421 ////////////////////////////////////// Jison specific extensions to GrammarConverter //////////////////////////////// 422 /** 423 * jison specific extension / implementation for {@link GrammarConverter} instances 424 * 425 * @type GrammarConverter 426 * @memberOf JisonGenerator# 427 */ 428 var JisonGrammarConverterExt = { 429 /** @memberOf JisonGrammarConverterExt */ 430 init: function(){ 431 432 this.THE_INTERNAL_GRAMMAR_CONVERTER_INSTANCE_NAME = "theGrammarConverterInstance"; 433 this._PARTIAL_MATCH_PREFIX = "$"; 434 435 this.grammar_tokens = "/* --- Token definitions --- */\n\n/* Characters to be ignored */\n" 436 + "\\s+ /* skip whitespace */\n\n/* Non-associative tokens */\n"; 437 438 this.grammar_utterances = ""; 439 this.grammar_phrases = "phrases:\n "; 440 this.token_variables = "%{\n var " + this.variable_prefix + "result = '';\n"; 441 this.tokens_array = []; 442 443 this.grammar_special_tokens = ""; 444 this.grammar_special_tokens_no = 0; 445 446 }, 447 convertJSONGrammar: function(){ 448 449 this.json_grammar_definition = this.maskJSON(this.json_grammar_definition); 450 451 this.token_variables += " var semanticAnnotationResult = {};\n" 452 + " var _flatten = function(match){ if(!match.join){ return match;} for(var i=0, size = match.length; i < size; ++i){if(!match[i]){continue;}if(match[i].join){match[i] = _flatten(match[i])}} return match.join('') };\n" 453 + " var _tok = function(field, match){ match = _flatten(match); field[match] = match; return match;}\n" 454 ; 455 456 this.parseTokens(); 457 this.parseUtterances(); 458 this.parseStopWords(); 459 460 this.jscc_grammar_definition = this.token_variables 461 + "%}\n\n" 462 + "/* lexical grammar */\n%lex\n\n" 463 + this.grammar_special_tokens 464 + "\n\n%%" 465 + this.grammar_tokens 466 + "\n<<EOF>> %{ return 'EOF'; %};\n\n/lex" 467 + "\n\n/* --- Grammar specification --- */\n%start utterance\n\n%% /* language grammar */\n\nutterance:\n phrases EOF %{ " 468 469 //TODO use LOG LEVEL for activating / deactivating this: 470 + "console.log(" 471 + this.variable_prefix + "result); " 472 473 + "semanticAnnotationResult.result = " 474 + this.variable_prefix + "result; return "+ this.variable_prefix +"result; %};\n\n" + this.grammar_utterances 475 + "\n" + this.grammar_phrases + ";\n\n" 476 ; 477 478 this.json_grammar_definition = this.unmaskJSON(this.json_grammar_definition); 479 }, 480 parseTokens: function(){ 481 var self = this; 482 var json_tokens = this.json_grammar_definition.tokens; 483 var pref = self.variable_prefix; 484 485 486 for(token_name in json_tokens){ 487 488 var words = json_tokens[token_name]; 489 490 self.token_variables += " var " + pref 491 + token_name.toLowerCase() + " = {};\n"; 492 493 494 //FIXME TODO handle RegExpr: need to be encoded without String-quoting!, i.e. instead of 495 // FLASE: '[a-zA-Z_]+' 496 // CORRECT: [a-zA-Z_]+ 497 498 //OLD IMPL.: 499 // var grammar_token = token_name + "\n = match:('"; 500 // for(var i=0, size = words.length; i < size ; ++i){ 501 // if(i > 0){ 502 // grammar_token += "'/'"; 503 // } 504 // grammar_token += words[i]; 505 // } 506 // 507 // grammar_token += "') " + token_name + " { " + self.variable_prefix 508 // + token_name.toLowerCase() + "[match] = match; return match;};\n"; 509 // 510 // self.grammar_tokens += grammar_token; 511 512 //NEW IMPL.: 513 var sb = []; 514 515 var isNotRegExpr = true; 516 for(var i=0, size = words.length; i < size ; ++i){ 517 518 //NOTE RegExpr need to be recoded -> need to check, if current word is RegExp! 519 // example (see also _convertRegExpr()): 520 // INPUT: '[a-zA-Z_]+' 521 // RECODED: [a-zA-Z_]+ 522 isNotRegExpr = this._checkIfNotRegExpr(words[i]); 523 if( isNotRegExpr ){ 524 sb.push("\""); 525 } 526 527 //add TOKEN string: 528 if(isNotRegExpr){ 529 sb.push(words[i]); 530 } 531 else { 532 var special_token_name = "regexpr" + (++ this.grammar_special_tokens_no); 533 this.grammar_special_tokens += special_token_name + " " + this._convertRegExpr(words[i]) + "\n"; 534 sb.push("{" + special_token_name + "}"); 535 } 536 537 538 if( isNotRegExpr ){ 539 sb.push("\""); 540 } 541 542 //if there is another word following, add OR operator 543 if(i < size-1){ 544 sb.push("|"); 545 } 546 } 547 548 //close assignment for "= match:(" and create JavaScript processing for token 549 sb.push( 550 " %{ _tok(" + pref + token_name.toLowerCase() + ", yytext); return '"+token_name+"'; %}\n" 551 ); 552 553 self.grammar_tokens += sb.join(""); 554 } 555 }, 556 parseUtterances: function(){ 557 var self = this; 558 var utt_index = 0; 559 var json_utterances = this.json_grammar_definition.utterances; 560 561 for(var utterance_name in json_utterances){ 562 var utterance_def = json_utterances[utterance_name]; 563 if(utt_index > 0){ 564 self.grammar_phrases += "\n\t|"; 565 } 566 utt_index++; 567 self.doParseUtterance(utterance_name, utterance_def); 568 } 569 }, 570 doParseUtterance: function(utterance_name, utterance_def){ 571 572 var self = this; 573 574 self.token_variables += " var " + self.variable_prefix 575 + utterance_name.toLowerCase() + " = {};\n"; 576 577 578 var grammar_utterance = utterance_name + ":\n "; 579 //self.grammar_phrases += utterance_name + " " + self.doCreateSemanticInterpretationForUtterance(utterance_name, utterance_def); 580 self.grammar_phrases += utterance_name + " " ; 581 var phrases = utterance_def.phrases; 582 var semantic = self.doCreateSemanticInterpretationForUtterance(utterance_name, utterance_def); 583 584 for(var index=0,size=phrases.length; index < size; ++index){ 585 if(index > 0){ 586 grammar_utterance += "\n | "; 587 } 588 var phrase = phrases[index]; 589 var semantic_interpretation = self.doCreateSemanticInterpretationForPhrase( 590 utterance_name.toLowerCase(), utterance_def, phrase, semantic 591 ); 592 grammar_utterance += /*phrase +*/ semantic_interpretation; 593 } 594 self.grammar_utterances += grammar_utterance + ";\n\n"; 595 }, 596 doCreateSemanticInterpretationForUtterance: function(utterance_name, utterance_def){ 597 var semantic = utterance_def.semantic, 598 variable_index, variable_name; 599 600 if(logger.isDebug()) logger.debug('doCreateSemanticInterpretationForUtterance: '+semantic);//debug 601 602 var semantic_as_string = JSON.stringify(semantic); 603 if( semantic_as_string != null){ 604 this.variable_regexp.lastIndex = 0; 605 var variables = this.variable_regexp.exec(semantic_as_string); 606 while (variables != null) { 607 var variable = variables[1], 608 remapped_variable_name = ""; 609 610 if(logger.isDebug()) logger.debug("variables " + variable, semantic_as_string);//debug 611 612 variable_index = /\[(\d+)\]/.exec(variable); 613 variable_name = new RegExp('_\\$([a-zA-Z_][a-zA-Z0-9_\\-]*)').exec(variable)[1]; 614 // variableObj = /_\$([a-zA-Z_][a-zA-Z0-9_\-]*)(\[(\d+)\])?(\["semantic"\]|\['semantic'\]|\.semantic)?/.exec(variable); 615 // variableObj = /_\$([a-zA-Z_][a-zA-Z0-9_\-]*)(\[(\d+)\])?((\[(("(.*?[^\\])")|('(.*?[^\\])'))\])|(\.(\w+)))?/.exec(variable); 616 //"_$NAME[INDEX]['FIELD']": _$NAME [ INDEX ] [" FIELD "] | [' FIELD '] | .FIELD 617 if (variable_index == null) { 618 remapped_variable_name = variable; 619 } else { 620 remapped_variable_name = variable.replace( 621 '[' + variable_index[1] + ']' 622 , "[" 623 + utterance_name.toLowerCase() + "_temp['phrases']['" 624 + variable_name.toLowerCase() + "'][" 625 + variable_index[1] 626 + "]."+this.entry_token_field+"]"); 627 //TODO replace try/catch with safe_acc function 628 // PROBLEM: currently, the format for variable-access is not well defined 629 // -> in case of accessing the "semantic" field for a variable reference of another Utterance 630 // we would need another safe_acc call 631 // ... i.e. need to parse expression for this, but since the format is not well defined 632 // we cannot say, for what exactly we should parse... 633 // NORMAL VAR EXPR: _$a_normal_token[0] 634 // ACCESS TO SEMANTICS: _$other_utterance[0]['semantic'] 635 // but this could also be expressed e.g. as _$other_utterance[0].semantic 636 // ... 637 // remapped_variable_name = variable.replace( 638 // '[' + variable_index[1] + ']' 639 // , "[safe_acc(" 640 // + utterance_name.toLowerCase() + "_temp, 'phrases', '" 641 // + variable_name.toLowerCase() + "', " 642 // + variable_index[1] 643 // + ")]" 644 // ); 645 } 646 semantic_as_string = semantic_as_string.replace( 647 variables[0], 648 " function(){try{return " + remapped_variable_name 649 + ";} catch(e){return void(0);}}() " 650 // "' + " + remapped_variable_name + " + '"//TODO replace try/catch with safe_acc function 651 ); 652 variables = this.variable_regexp.exec(semantic_as_string); 653 } 654 } 655 return semantic_as_string; 656 }, 657 doCreateSemanticInterpretationForPhrase: function(utterance_name, utterance_def, phrase, semantic_as_string){ 658 var phraseList = phrase.split(/\s+/), 659 length = phraseList.length, 660 duplicate_helper = {}; 661 662 var phraseStr = ""; 663 // var result = " { var _m = "; 664 var i = 0; 665 666 var pharseMatchResult = " $$ = "; 667 // for (; i < length; ++i){ 668 // pharseMatchResult += this._PARTIAL_MATCH_PREFIX + (i+1); 669 // if(i < length){ 670 // pharseMatchResult += " + ' ' + "; 671 // } 672 // } 673 674 // result += "; var "+utterance_name+"_temp = {}; "+utterance_name+"_temp['phrases'] = {};"; 675 676 var semanticProcResult = "var "+utterance_name+"_temp = {}; "+utterance_name+"_temp['phrases'] = {};"; 677 var num; 678 for (i = 0; i < length; ++i) { 679 680 num = i+1; 681 682 //create STR for phrase-matching 683 phraseStr += " " + phraseList[i]; 684 685 //create STR for concatenated match of all partial phrases 686 pharseMatchResult += this._PARTIAL_MATCH_PREFIX + num; 687 if(num < length){ 688 pharseMatchResult += " + ' ' + "; 689 } 690 691 //create STR for semantic processing of phrase 692 if (typeof(duplicate_helper[phraseList[i]]) == "undefined") { 693 duplicate_helper[phraseList[i]] = 0; 694 semanticProcResult += utterance_name+"_temp['phrases']['"+phraseList[i].toLowerCase()+"'] = [];\n\t\t"; 695 } else { 696 duplicate_helper[phraseList[i]] += 1; 697 } 698 semanticProcResult += utterance_name + "_temp['phrases']['" 699 + phraseList[i].toLowerCase() + "'][" 700 + duplicate_helper[phraseList[i]] + "] = {" 701 + this.entry_token_field + ": " + this._PARTIAL_MATCH_PREFIX + num + "," 702 + this.entry_index_field + ": " + (num-1) 703 +"};\n\t\t"; 704 } 705 706 semanticProcResult += "var " + this.variable_prefix + "phrase = $$; " 707 + utterance_name + "_temp['phrase']=" + this.variable_prefix + "phrase; " 708 + utterance_name + "_temp['utterance']='" + utterance_name + "'; " 709 + utterance_name + "_temp['engine']='jison'; "//FIXME debug 710 + utterance_name + "_temp['semantic'] = " + semantic_as_string 711 + "; " + this.variable_prefix + utterance_name + "[" 712 + this.variable_prefix + "phrase] = " + utterance_name + "_temp; " 713 + this.variable_prefix + "result = " + utterance_name + "_temp;"; 714 715 return phraseStr + " %{\n\t " + pharseMatchResult + "; " + semanticProcResult + "; \n\t%} "; 716 }, 717 _checkIfNotRegExpr: function(token){ 718 719 //test for character-group 720 if( ! /([^\\]\[)|(^\[).*?[^\\]\]/.test(token)){ 721 722 //test for grouping 723 if( ! /([^\\]\()|(^\().*?[^\\]\)/.test(token) ){ 724 725 //try for single-characters that occur in reg-expr FIXME this may procude false-positives!!! 726 return ! /[\?|\*|\+|\^|\|\\]/.test(token); //excluded since these may be more common in natural text: . $ 727 } 728 } 729 730 return false; 731 }, 732 _convertRegExpr: function(token){ 733 var sb = [], ch, last = null, isString = false, isGroup = false, isEsc = false/*, hasOr = false*/; 734 for(var i=0, size = token.length; i < size; ++i){ 735 ch = token.charAt(i); 736 switch(ch){ 737 case '(': 738 case ')': 739 case '[': 740 case ']': 741 case '+': 742 case '*': 743 case '?': 744 case '$': 745 case '^': 746 case '.': 747 case '|': 748 if(last !== '\\'){ 749 750 //if changed from STRING -> non-STRING, then "close" string first: 751 if(isString){ 752 753 //for "optional" expression: modify previous entry to be a single character-sequence 754 // ...cars'? -> ...car' 's'? 755 if(ch === '?' && sb.length > 0){//TODO also for '+', '*', ...??? 756 sb[ sb.length - 1 ] = '" "' + sb[ sb.length - 1 ]; 757 } 758 759 sb.push("\" "); 760 isString = false; 761 } 762 763 //insert reg-expr symbol 764 // if(ch !== '|'){ 765 sb.push(ch); 766 // } 767 // else { 768 // sb.push(' | '); 769 // hasOr = true; 770 // } 771 772 //is character-group opening/closing? 773 if(isGroup && ch === ']'){ 774 isGroup = false; 775 } 776 else if(!isGroup && ch === '['){ 777 isGroup = true; 778 } 779 780 781 break; 782 } 783 else { 784 isEsc = true; 785 } 786 default: 787 788 if(isEsc){ 789 sb.splice(sb.length-1);//remove last element, i.e. the escape-character 790 isEsc = false; 791 } 792 793 //if changed from non-STRING -> STRING, then "open" string now: 794 if(!isGroup && !isString){ 795 sb.push(" \""); 796 isString = ! isGroup; 797 } 798 sb.push(ch); 799 } 800 801 last = ch; 802 } 803 804 //if last char was a STRING, "close" string now: 805 if(isString){ 806 sb.push("\""); 807 } 808 // if(hasOr){ 809 // sb.unshift('('); 810 // sb.push(')'); 811 // } 812 return sb.join(''); 813 } 814 }; 815 816 817 return jisonGen; 818 819 });