1 2 define(['pegjs', 'constants', 'configurationManager', 'grammarConverter', 'jquery', 'logger', 'module'], 3 /** 4 * Generator for executable language-grammars (i.e. converted JSON grammars). 5 * 6 * <p> 7 * This generator uses PEG.js for compiling the JSON grammar. 8 * 9 * <p> 10 * The generator for compiling the JSON grammar definitions in <code>www/config/languages/<language code>/grammar.json</code> 11 * can be configured in <code>www/config/configuration.json</code>:<br> 12 * <pre> 13 * { 14 * ... 15 * "grammarCompiler": "pegjs", 16 * ... 17 * }</pre> 18 * 19 * <p> 20 * PEGjs supports grammar generation for: 21 * PEG (Parsing Expression Grammar) 22 * 23 * NOTE: PEG is a different formalism than "classical" context-free grammar definitions/formalisms; 24 * see also <a href="http://en.wikipedia.org/wiki/Parsing_expression_grammar">explanation of PEG in Wikipedia</a> 25 * 26 * @see PEGjs homepage at <a href="http://pegjs.majda.cz/">http://pegjs.majda.cz/</a> 27 * 28 * @class 29 * @constant 30 * @public 31 * @name PegJsGenerator 32 * @memberOf mmir.env.grammar 33 * 34 * @requires PEG.js 35 * @requires jQuery.Deferred 36 * @requires jQuery.extend 37 * @requires jQuery.makeArray 38 */ 39 function(pegjs, constants, configManager, GrammarConverter, $, Logger, module){ 40 41 /** 42 * Deferred object that will be returned - for async-initialization: 43 * the deferred object will be resolved, when this module has been initialized. 44 * 45 * @private 46 * @type Deferred 47 * @memberOf PegJsGenerator# 48 */ 49 var deferred = $.Deferred(); 50 //no async initialization necessary for PEG.js generator -> resolve immediately 51 deferred.resolve(); 52 53 /** 54 * The Logger for the PEGjs generator. 55 * 56 * @private 57 * @type Logger 58 * @memberOf PegJsGenerator# 59 * 60 * @see mmir.Logging 61 */ 62 var logger = Logger.create(module); 63 64 /** 65 * The argument name when generating the grammar function: 66 * the argument holds the raw text that will be parsed by the generated grammar. 67 * 68 * NOTE: this argument/variable name must not collide with any code that is generated for the grammar. 69 * 70 * @constant 71 * @private 72 * @function 73 * @memberOf PegJsGenerator# 74 */ 75 var INPUT_FIELD_NAME = 'asr_recognized_text'; 76 77 /** 78 * The default options for the PEGjs compiler. 79 * 80 * To overwrite the default options, configure the following property in <code>www/config/configuration.json</code>:<br> 81 * <pre> 82 * { 83 * ... 84 * "grammar": { 85 * ... 86 * "pegjs": { 87 * "cache": [true | false], // "If true, makes the parser cache results, avoiding exponential parsing time in pathological cases but making the parser slower" - DEFAULT false 88 * "optimize": ["speed" | "size"], //optimizing the generated parser for speed or (code) size - DEFAULT "speed" 89 * "output": ["source" | "parser"], //should not be changed!!! whether to return TEXT or evaluated JavaScript - DEFAULT: "source" 90 * "allowedStartRules": RULE_NAMES //should not be changed!!! - DEFAULT: not set 91 * } 92 * ... 93 * }, 94 * ... 95 * }</pre> 96 * 97 * non-specific compiler options: 98 * <code>execMode = 'sync' | 'async'</code> 99 * <code>genSourceUrl = true | STRING | FALSY'</code> 100 * 101 * 102 * @constant 103 * @private 104 * @default cache := false, optimize := 'speed', output := 'source', allowedStartRules := undefined 105 * @memberOf PegJsGenerator# 106 */ 107 var DEFAULT_OPTIONS = { 108 cache: false, 109 optimize: "speed", 110 output: "source", 111 allowedStartRules: void(0), 112 execMode: 'sync',//'sync' | 'async' | default: sync 113 genSourceUrl: '',// true | STRING: the sourceURL for eval'ed parser-module | default: FALSY 114 }; 115 116 /** 117 * Name for this plugin/grammar-generator (e.g. used for looking up configuration values in configuration.json). 118 * @constant 119 * @private 120 * @memberOf PegJsGenerator# 121 */ 122 var pluginName = 'grammar.pegjs'; 123 124 /** 125 * Exported (public) functions for the PEGjs grammar-engine. 126 * @public 127 * @type GrammarGenerator 128 * @memberOf PegJsGenerator# 129 */ 130 var pegjsGen = { 131 /** @scope PegJsGenerator.prototype */ 132 133 /** 134 * The name/ID for the compile engine for the PEG.js compiler 135 * 136 * @memberOf mmir.env.grammar.PegJsGenerator.prototype 137 */ 138 engineId: 'pegjs', 139 /** 140 * @param {Function} [callback] OPTIONAL 141 * the callback that is triggered, when the engine is initialized 142 * @returns {Deferred} 143 * a promise that is resolved, when the engine is initialized 144 * (NOTE: if you use the same function for the <code>callback</code> AND the promise, 145 * then the function will be invoked twice!) 146 * 147 * @memberOf mmir.env.grammar.PegJsGenerator.prototype 148 */ 149 init: function(callback){ 150 if(callback){ 151 deferred.always(callback); 152 } 153 return deferred; 154 }, 155 /** @returns {Boolean} if this engine compilation works asynchronously. The current implementation works synchronously (returns FALSE) */ 156 isAsyncCompilation: function(){ return false; }, 157 /** 158 * The function for compiling a JSON grammar: 159 * 160 * 161 * @param {GrammarConverter} theConverterInstance 162 * @param {String} instanceId 163 * the ID for the compiled grammar (usually this is a language code) 164 * @param {Number} fileFormatVersion 165 * the version of the file format (this is a constant within {@link mmir.SemanticInterpreter} 166 * @param callback 167 * @returns {GrammarConverter} 168 * the grammar instance with attached with the compiled function for executing the 169 * grammar to the instance's {@link GrammarConvert#executeGrammar} property/function. 170 */ 171 compileGrammar: function(theConverterInstance, instanceId, fileFormatVersion, callback){ 172 173 //attach functions for PEG.js conversion/generation to the converter-instance: 174 $.extend(theConverterInstance, PegJsGrammarConverterExt); 175 176 //start conversion: create grammar in PEG.js syntax (from the JSON definition): 177 theConverterInstance.init(); 178 this._preparePrintError(); 179 theConverterInstance.convertJSONGrammar(); 180 var grammarDefinition = theConverterInstance.getGrammarDef(); 181 182 //load options from configuration: 183 var config = configManager.get(pluginName, true, {}); 184 //combine with default default options: 185 var options = $.extend({id: instanceId}, DEFAULT_OPTIONS, config); 186 187 var compileParserModule = function(grammarParser, hasError){ 188 189 var addGrammarParserExec = 190 '(function(){\n var semanticInterpreter = require("semanticInterpreter");\n'//FIXME 191 + 'var options = {fileFormat:'+fileFormatVersion+',execMode:'+JSON.stringify(options.execMode)+'};\n' 192 + 'var parser = ' 193 + grammarParser 194 // + ';\nvar grammarFunc = parser.parse;\n' 195 + ';\nvar grammarFunc = function(){\n' 196 + ' var result; try {\n' 197 + ' result = parser.parse.apply(this, arguments);\n' 198 + ' } catch (err){\n' 199 + ' console.error(err.stack?err.stack:err); result = {};\n'//TODO warning/error messaging? -> need to handle encoded chars, if error message should be meaningful 200 + ' }\n' 201 + ' return result;\n' 202 + '};\n' 203 + 'semanticInterpreter.addGrammar("' 204 +instanceId 205 +'", grammarFunc, options);\n\n' 206 + 'semanticInterpreter.setStopwords("' 207 +instanceId+'",' 208 209 //store stopwords with their Unicode representation (only for non-ASCII chars) 210 +JSON.stringify( 211 theConverterInstance.getEncodedStopwords() 212 ).replace(/\\\\u/gm,'\\u')//<- revert JSON.stringify encoding for the Unicodes 213 + ');\n' 214 + 'return grammarFunc;\n' 215 + '})();'; 216 217 if(options.genSourceUrl){ 218 219 var sourceUrlStr; 220 if(options.genSourceUrl === true){ 221 sourceUrlStr = 'gen/grammar/_compiled_grammar_'+instanceId; 222 } else { 223 sourceUrlStr = options.genSourceUrl.toString().replace(/<id>/g,instanceId); 224 } 225 226 //for Chrome / FireFox debugging: provide an URL for eval'ed code 227 addGrammarParserExec += '//@ sourceURL='+sourceUrlStr+'\n' 228 +'//# sourceURL='+sourceUrlStr+'\n'; 229 230 } 231 232 theConverterInstance.setGrammarSource(addGrammarParserExec); 233 234 try{ 235 236 eval(addGrammarParserExec); 237 238 } catch (err) { 239 240 //TODO russa: generate meaningful error message with details about error location 241 // eg. use esprima (http://esprima.org) ...? 242 // ... as optional dependency (see deferred initialization above?) 243 244 var evalMsg = 'Error during eval() for "'+ instanceId +'": ' + err; 245 246 if(pegjs.printError){ 247 pegjs.printError(evalMsg); 248 } 249 else { 250 logger.error('PEGjs', 'evalCompiled', evalMsg, err); 251 } 252 253 if(! hasError){ 254 evalMsg = '[INVALID GRAMMAR JavaScript CODE] ' + evalMsg; 255 var parseDummyFunc = (function(msg, error){ 256 return function(){ console.error(msg); console.error(error); throw msg;}; 257 })(evalMsg, err); 258 259 parseDummyFunc.hasErrors = true; 260 261 //theConverterInstance = doGetGrammar(instanceId); 262 theConverterInstance.setGrammarFunction(parseDummyFunc); 263 } 264 265 } 266 267 //invoke callback if present: 268 if(callback){ 269 callback(theConverterInstance); 270 } 271 }; 272 273 var isPreventDefault = this._afterCompileParser(compileParserModule, callback); 274 var result = this._compileParser(grammarDefinition, options, isPreventDefault); 275 276 if(!isPreventDefault){ 277 var hasError = result.hasError; 278 compileParserModule(result.def, hasError); 279 } 280 281 return theConverterInstance; 282 return theConverterInstance; 283 }, 284 /** 285 * @protected 286 */ 287 _compileParser: function(grammarDefinition, options, afterCompileParserResult){ 288 289 var hasError = false; 290 var grammarParser; 291 try{ 292 grammarParser = pegjs.buildParser(grammarDefinition, options); 293 } catch(error) { 294 // "{ 295 // "message": "Expected \"=\" or string but \"_\" found.", 296 // "expected": [ 297 // { 298 // "type": "literal", 299 // "value": "=", 300 // "description": "\"=\"" 301 // }, 302 // { 303 // "type": "other", 304 // "description": "string" 305 // } 306 // ], 307 // "found": "_", 308 // "offset": 4104, 309 // "line": 40, 310 // "column": 6, 311 // "name": "SyntaxError" 312 // }" 313 hasError = true; 314 var msg = ' while compiling grammar "' + options.id+ '": '; 315 if(error.name === 'SyntaxError'){ 316 msg= 'SyntaxError' + msg + error.message; 317 } 318 else { 319 msg = 'Error' + msg + (error && error.stack? error.stack : error); 320 } 321 322 if(typeof error.line !== 'undefined'){ 323 msg += ' at line '+error.line; 324 } 325 326 if(typeof error.column !== 'undefined'){ 327 msg += ':'+error.column; 328 } 329 330 if(typeof error.offset !== 'undefined'){ 331 msg += ' (offset '+error.offset+')'; 332 } 333 334 if(pegjs.printError){ 335 pegjs.printError(msg); 336 } 337 else { 338 console.error(msg); 339 } 340 msg = '[INVALID GRAMMAR] ' + msg; 341 grammarParser = '{ parse: function(){ var msg = '+JSON.stringify(msg)+'; console.error(msg); throw msg;} }'; 342 } 343 344 return {def: grammarParser, hasError: hasError}; 345 }, 346 /** 347 * @protected 348 */ 349 _preparePrintError: function(){ 350 //setup logger for compile errors, if not already set 351 if(! pegjs.printError){ 352 /** 353 * The default logging / error-print function for PEGjs. 354 * 355 * @private 356 * @name printError 357 * @function 358 * @memberOf PegJsGenerator.pegjs# 359 * 360 * @see mmir.Logging 361 */ 362 pegjs.printError = function(){ 363 var args = $.makeArray(arguments); 364 //prepend "location-information" to logger-call: 365 args.unshift('PEGjs', 'compile'); 366 //output log-message: 367 logger.error.apply(logger, args); 368 }; 369 } 370 }, 371 /** 372 * The default logging / error-print function for PEGjs. 373 * 374 * @protected 375 * 376 * @see mmir.Logging 377 */ 378 printError: function(){ 379 pegjs.printError.apply(pegjs, arguments); 380 }, 381 /** 382 * Optional hook for pre-processing the generated parser, after the parser is generated. 383 * 384 * By default, this function returns VOID, in which case the parser-module is created by default. 385 * 386 * If a function is returned instead, then it must invoke <code>compileParserModuleFunc</code>: 387 * <code>compileParserModuleFunc(compiledParser : STRING, hasErrors : BOOLEAN)</code> 388 * 389 * 390 * @param {Function} compileParserModuleFunc 391 * the function that generates the parser-module: 392 * <code>compileParserModuleFunc(compiledParser : STRING, hasErrors : BOOLEAN)</code> 393 * 394 * @param {Function} compileCallbackFunc 395 * the callback function which will be invoked by compileParserModuleFunc, after it has finished. 396 * If compileParserModuleFunc() is prevented from exectution then the callback MUST be invoked manually 397 * <code>compileCallbackFunc(theConverterInstance: GrammarConverter)</code> 398 * 399 * @returns {TRUTHY|VOID} 400 * FALSY for the default behavior. 401 * IF a TRUTHY value is returned, then the default action after compiling the parser 402 * is not executed: 403 * i.e. compileParserModuleFunc is not automatically called and in consequence the callback is not invoked 404 * 405 * 406 * NOTE: if not FALSY, then either compileParserModuleFunc() must be invoked, or the callback() must be invoked! 407 * 408 * @protected 409 */ 410 _afterCompileParser: function(compileParserModuleFunc, compileCallbackFunc){ 411 //default: return VOID 412 return; 413 } 414 }; 415 416 417 ////////////////////////////////////// PEG.js specific extensions to GrammarConverter //////////////////////////////// 418 /** 419 * PEGjs specific extension / implementation for {@link GrammarConverter} instances 420 * 421 * @type GrammarConverter 422 * @memberOf PegJsGenerator# 423 */ 424 var PegJsGrammarConverterExt = { 425 /** @memberOf PegJsGrammarConverterExt */ 426 init: function(){ 427 428 this.THE_INTERNAL_GRAMMAR_CONVERTER_INSTANCE_NAME = "theGrammarConverterInstance"; 429 this._WHITESPACE_TOKEN_NAME = "WS"; 430 this._PARTIAL_MATCH_PREFIX = "_r"; 431 this.grammar_tokens = "/* --- Token definitions --- */\n\n/* Characters to be ignored */\n" 432 + this._WHITESPACE_TOKEN_NAME +" = ' '/'\\t';\n\n/* Non-associative tokens */\n"; 433 434 this.grammar_utterances = ""; 435 this.grammar_phrases = "phrases\n = "; 436 this.token_variables = "{\n var " + this.variable_prefix + "result = '';\n"; 437 this.tokens_array = []; 438 439 }, 440 convertJSONGrammar: function(){ 441 442 this.json_grammar_definition = this.maskJSON(this.json_grammar_definition); 443 444 this.token_variables += " var semanticAnnotationResult = {};\n" 445 + " var _flatten = function(match){ if(!match.join){ return match;} for(var i=0, size = match.length; i < size; ++i){if(!match[i]){continue;}if(match[i].join){match[i] = _flatten(match[i])}} return match.join('') };\n" 446 + " var _tok = function(field, match){ match = _flatten(match); field[match] = match; return match;}\n" 447 ; 448 449 this.parseTokens(); 450 this.parseUtterances(); 451 this.parseStopWords(); 452 453 this.jscc_grammar_definition = this.token_variables 454 + "}\n\n" 455 + "\n\n/* --- Grammar specification --- */\n\nutterance\n = phrases { " 456 457 //TODO use LOG LEVEL for activating / deactivating this: 458 + "console.log(" 459 + this.variable_prefix + "result); " 460 461 + "semanticAnnotationResult.result = " 462 + this.variable_prefix + "result; return "+ this.variable_prefix +"result;} ;\n\n" + this.grammar_utterances 463 + "\n" + this.grammar_phrases + ";\n\n" 464 + this.grammar_tokens; 465 466 this.json_grammar_definition = this.unmaskJSON(this.json_grammar_definition); 467 }, 468 parseTokens: function(){ 469 var self = this; 470 var json_tokens = this.json_grammar_definition.tokens; 471 var pref = self.variable_prefix; 472 473 474 for(token_name in json_tokens){ 475 476 var words = json_tokens[token_name]; 477 478 self.token_variables += " var " + pref 479 + token_name.toLowerCase() + " = {};\n"; 480 481 var sb = [token_name, "\n = _m:("]; 482 483 var isNotRegExpr = true; 484 for(var i=0, size = words.length; i < size ; ++i){ 485 486 //NOTE RegExpr need to be recoded -> need to check, if current word is RegExp! 487 // example (see also _convertRegExpr()): 488 // INPUT: '[a-zA-Z_]+' 489 // RECODED: [a-zA-Z_]+ 490 isNotRegExpr = this._checkIfNotRegExpr(words[i]); 491 if( isNotRegExpr ){ 492 sb.push("'"); 493 } 494 495 //add TOKEN string: 496 sb.push( isNotRegExpr? words[i] : this._convertRegExpr(words[i])); 497 498 499 if( isNotRegExpr ){ 500 sb.push("'"); 501 } 502 503 //if there is another word following, add OR operator 504 if(i < size-1){ 505 sb.push("/"); 506 } 507 } 508 509 //close assignment for "= match:(" and create JavaScript processing for token 510 sb.push( 511 ") { return _tok(" + pref + token_name.toLowerCase() + ", _m); };\n" 512 ); 513 514 self.grammar_tokens += sb.join(""); 515 } 516 }, 517 parseUtterances: function(){ 518 var self = this; 519 var utt_index = 0; 520 var json_utterances = this.json_grammar_definition.utterances; 521 522 for(var utterance_name in json_utterances){ 523 var utterance_def = json_utterances[utterance_name]; 524 if(utt_index > 0){ 525 self.grammar_phrases += "\n\t/"; 526 } 527 utt_index++; 528 self.doParseUtterance(utterance_name, utterance_def); 529 } 530 }, 531 doParseUtterance: function(utterance_name, utterance_def){ 532 533 var self = this; 534 535 self.token_variables += " var " + self.variable_prefix 536 + utterance_name.toLowerCase() + " = {};\n"; 537 538 539 var grammar_utterance = utterance_name + "\n = "; 540 //self.grammar_phrases += utterance_name + " " + self.doCreateSemanticInterpretationForUtterance(utterance_name, utterance_def); 541 self.grammar_phrases += utterance_name + " " ; 542 var phrases = utterance_def.phrases; 543 var semantic = self.doCreateSemanticInterpretationForUtterance(utterance_name, utterance_def); 544 545 for(var index=0,size=phrases.length; index < size; ++index){ 546 if(index > 0){ 547 grammar_utterance += "\n / "; 548 } 549 var phrase = phrases[index]; 550 var semantic_interpretation = self.doCreateSemanticInterpretationForPhrase( 551 utterance_name.toLowerCase(), utterance_def, phrase, semantic 552 ); 553 grammar_utterance += /*phrase +*/ semantic_interpretation; 554 } 555 self.grammar_utterances += grammar_utterance + ";\n\n"; 556 }, 557 doCreateSemanticInterpretationForUtterance: function(utterance_name, utterance_def){ 558 var semantic = utterance_def.semantic, 559 variable_index, variable_name; 560 561 if(logger.isDebug()) logger.debug('doCreateSemanticInterpretationForUtterance: '+semantic);//debug 562 563 var semantic_as_string = JSON.stringify(semantic); 564 if( semantic_as_string != null){ 565 this.variable_regexp.lastIndex = 0; 566 var variables = this.variable_regexp.exec(semantic_as_string); 567 while (variables != null) { 568 var variable = variables[1], 569 remapped_variable_name = ""; 570 571 if(logger.isDebug()) logger.debug("variables " + variable, semantic_as_string);//debug 572 573 variable_index = /\[(\d+)\]/.exec(variable); 574 variable_name = new RegExp('_\\$([a-zA-Z_][a-zA-Z0-9_\\-]*)').exec(variable)[1]; 575 // variableObj = /_\$([a-zA-Z_][a-zA-Z0-9_\-]*)(\[(\d+)\])?(\["semantic"\]|\['semantic'\]|\.semantic)?/.exec(variable); 576 // variableObj = /_\$([a-zA-Z_][a-zA-Z0-9_\-]*)(\[(\d+)\])?((\[(("(.*?[^\\])")|('(.*?[^\\])'))\])|(\.(\w+)))?/.exec(variable); 577 //"_$NAME[INDEX]['FIELD']": _$NAME [ INDEX ] [" FIELD "] | [' FIELD '] | .FIELD 578 if (variable_index == null) { 579 remapped_variable_name = variable; 580 } else { 581 remapped_variable_name = variable.replace( 582 '[' + variable_index[1] + ']' 583 , "[" 584 + utterance_name.toLowerCase() + "_temp['phrases']['" 585 + variable_name.toLowerCase() + "'][" 586 + variable_index[1] 587 + "]."+this.entry_token_field+"]"); 588 //TODO replace try/catch with safe_acc function 589 // PROBLEM: currently, the format for variable-access is not well defined 590 // -> in case of accessing the "semantic" field for a variable reference of another Utterance 591 // we would need another safe_acc call 592 // ... i.e. need to parse expression for this, but since the format is not well defined 593 // we cannot say, for what exactly we should parse... 594 // NORMAL VAR EXPR: _$a_normal_token[0] 595 // ACCESS TO SEMANTICS: _$other_utterance[0]['semantic'] 596 // but this could also be expressed e.g. as _$other_utterance[0].semantic 597 // ... 598 // remapped_variable_name = variable.replace( 599 // '[' + variable_index[1] + ']' 600 // , "[safe_acc(" 601 // + utterance_name.toLowerCase() + "_temp, 'phrases', '" 602 // + variable_name.toLowerCase() + "', " 603 // + variable_index[1] 604 // + ")]" 605 // ); 606 } 607 semantic_as_string = semantic_as_string.replace( 608 variables[0], 609 " function(){try{return " + remapped_variable_name 610 + ";} catch(e){return void(0);}}() " 611 // "' + " + remapped_variable_name + " + '"//TODO replace try/catch with safe_acc function 612 ); 613 variables = this.variable_regexp.exec(semantic_as_string); 614 } 615 } 616 return semantic_as_string; 617 }, 618 doCreateSemanticInterpretationForPhrase: function(utterance_name, utterance_def, phrase, semantic_as_string){ 619 var phraseList = phrase.split(/\s+/), 620 length = phraseList.length, 621 duplicate_helper = {}; 622 623 var phraseStr = ""; 624 // var result = " { var _m = "; 625 var i = 0; 626 627 var pharseMatchResult = "var _m = "; 628 // for (; i < length; ++i){ 629 // pharseMatchResult += this._PARTIAL_MATCH_PREFIX + (i+1); 630 // if(i < length){ 631 // pharseMatchResult += " + ' ' + "; 632 // } 633 // } 634 635 // result += "; var "+utterance_name+"_temp = {}; "+utterance_name+"_temp['phrases'] = {};"; 636 637 var semanticProcResult = "var "+utterance_name+"_temp = {}; "+utterance_name+"_temp['phrases'] = {};"; 638 var num; 639 for (i = 0; i < length; ++i) { 640 641 num = i+1; 642 643 //create STRING for phrase-matching 644 if(i > 0){ 645 phraseStr += " " + this._WHITESPACE_TOKEN_NAME + " "; 646 } 647 phraseStr += this._PARTIAL_MATCH_PREFIX + num + ":" + phraseList[i]; 648 649 //create STRING for concatenated match of all partial phrases 650 pharseMatchResult += this._PARTIAL_MATCH_PREFIX + num; 651 if(num < length){ 652 pharseMatchResult += " + ' ' + "; 653 } 654 655 //create STRING for semantic processing of phrase 656 if (typeof(duplicate_helper[phraseList[i]]) == "undefined") { 657 duplicate_helper[phraseList[i]] = 0; 658 semanticProcResult += utterance_name+"_temp['phrases']['"+phraseList[i].toLowerCase()+"'] = [];\n\t\t"; 659 } else { 660 duplicate_helper[phraseList[i]] += 1; 661 } 662 semanticProcResult += utterance_name + "_temp['phrases']['" 663 + phraseList[i].toLowerCase() + "'][" 664 + duplicate_helper[phraseList[i]] + "] = {" 665 + this.entry_token_field + ": " + this._PARTIAL_MATCH_PREFIX + num + "," 666 + this.entry_index_field + ": " + (num-1) 667 +"};\n\t\t"; 668 } 669 670 semanticProcResult += "var " + this.variable_prefix + "phrase = _m; " 671 + utterance_name + "_temp['phrase']=" + this.variable_prefix + "phrase; " 672 + utterance_name + "_temp['utterance']='" + utterance_name + "'; " 673 + utterance_name + "_temp['engine']='pegjs'; "//FIXME debug 674 + utterance_name + "_temp['semantic'] = " + semantic_as_string 675 + "; " + this.variable_prefix + utterance_name + "[" 676 + this.variable_prefix + "phrase] = " + utterance_name + "_temp; " 677 + this.variable_prefix + "result = " + utterance_name + "_temp;"; 678 679 return phraseStr + " {\n\t " + pharseMatchResult + "; " + semanticProcResult + "; return _m; \n\t} "; 680 }, 681 _checkIfNotRegExpr: function(token){ 682 683 //test for character-group 684 if( ! /([^\\]\[)|(^\[).*?[^\\]\]/.test(token)){ 685 686 //test for grouping 687 if( ! /([^\\]\()|(^\().*?[^\\]\)/.test(token) ){ 688 689 //try for single-characters that occur in reg-expr FIXME this may procude false-positives!!! 690 return ! /[\?|\*|\+|\^|\|\\]/.test(token); //excluded since these may be more common in natural text: . $ 691 } 692 } 693 694 return false; 695 }, 696 _convertRegExpr: function(token){ 697 var sb = [], ch, last = null, isString = false, isGroup = false, isEsc = false, hasOr = false; 698 for(var i=0, size = token.length; i < size; ++i){ 699 ch = token.charAt(i); 700 switch(ch){ 701 case '(': 702 case ')': 703 case '[': 704 case ']': 705 case '+': 706 case '*': 707 case '?': 708 case '$': 709 case '^': 710 case '.': 711 case '|': 712 if(last !== '\\'){ 713 714 //if changed from STRING -> non-STRING, then "close" string first: 715 if(isString){ 716 717 //for "optional" expression: modify previous entry to be a single character-sequence 718 // ...cars'? -> ...car' 's'? 719 if(ch === '?' && sb.length > 0){//TODO also for '+', '*', ...??? 720 sb[ sb.length - 1 ] = '\' \'' + sb[ sb.length - 1 ]; 721 } 722 723 sb.push("' "); 724 isString = false; 725 } 726 727 //insert reg-expr symbol 728 if(ch !== '|'){ 729 sb.push(ch); 730 } 731 else { 732 sb.push(' / '); 733 hasOr = true; 734 } 735 736 //is character-group opening/closing? 737 if(isGroup && ch === ']'){ 738 isGroup = false; 739 } 740 else if(!isGroup && ch === '['){ 741 isGroup = true; 742 } 743 744 745 break; 746 } 747 else { 748 isEsc = true; 749 } 750 default: 751 752 if(isEsc){ 753 sb.splice(sb.length-1);//remove last element, i.e. the escape-character 754 isEsc = false; 755 } 756 757 //if changed from non-STRING -> STRING, then "open" string now: 758 if(!isGroup && !isString){ 759 sb.push(" '"); 760 isString = ! isGroup; 761 } 762 sb.push(ch); 763 } 764 765 last = ch; 766 } 767 768 //if last char was a STRING, "close" string now: 769 if(isString){ 770 sb.push("'"); 771 } 772 if(hasOr){ 773 sb.unshift('('); 774 sb.push(')'); 775 } 776 return sb.join(''); 777 } 778 }; 779 780 781 return pegjsGen; 782 783 });