1 
  2 define(['pegjs', 'constants', 'configurationManager', 'grammarConverter', 'jquery', 'logger', 'module'],
  3 /**
  4  * Generator for executable language-grammars (i.e. converted JSON grammars).
  5  * 
  6  * <p>
  7  * This generator uses PEG.js for compiling the JSON grammar.
  8  * 
  9  * <p>
 10  * The generator for compiling the JSON grammar definitions in <code>www/config/languages/<language code>/grammar.json</code>
 11  * can be configured in <code>www/config/configuration.json</code>:<br>
 12  * <pre>
 13  * {
 14  *   ...
 15  *   "grammarCompiler": "pegjs",
 16  *   ...
 17  * }</pre>
 18  * 
 19  * <p>
 20  * PEGjs supports grammar generation for:
 21  * PEG (Parsing Expression Grammar)
 22  * 
 23  * NOTE: PEG is a different formalism than "classical" context-free grammar definitions/formalisms; 
 24  * see also <a href="http://en.wikipedia.org/wiki/Parsing_expression_grammar">explanation of PEG in Wikipedia</a>
 25  * 
 26  * @see PEGjs homepage at <a href="http://pegjs.majda.cz/">http://pegjs.majda.cz/</a>
 27  * 
 28  * @class
 29  * @constant
 30  * @public
 31  * @name PegJsGenerator
 32  * @memberOf mmir.env.grammar
 33  * 
 34  * @requires PEG.js
 35  * @requires jQuery.Deferred
 36  * @requires jQuery.extend
 37  * @requires jQuery.makeArray
 38  */		
 39 function(pegjs, constants, configManager, GrammarConverter, $, Logger, module){
 40 
 41 /**
 42  * Deferred object that will be returned - for async-initialization:
 43  * the deferred object will be resolved, when this module has been initialized.
 44  * 
 45  * @private
 46  * @type Deferred
 47  * @memberOf PegJsGenerator#
 48  */
 49 var deferred = $.Deferred();
 50 //no async initialization necessary for PEG.js generator -> resolve immediately
 51 deferred.resolve();
 52 
 53 /**
 54  * The Logger for the PEGjs generator.
 55  * 
 56  * @private
 57  * @type Logger
 58  * @memberOf PegJsGenerator#
 59  * 
 60  * @see mmir.Logging
 61  */
 62 var logger = Logger.create(module);
 63 
 64 /**
 65  * The argument name when generating the grammar function:
 66  * the argument holds the raw text that will be parsed by the generated grammar.
 67  * 
 68  * NOTE: this argument/variable name must not collide with any code that is generated for the grammar.
 69  * 
 70  * @constant
 71  * @private
 72  * @function
 73  * @memberOf PegJsGenerator#
 74  */
 75 var INPUT_FIELD_NAME = 'asr_recognized_text';
 76 
 77 /**
 78  * The default options for the PEGjs compiler.
 79  * 
 80  * To overwrite the default options, configure the following property in <code>www/config/configuration.json</code>:<br>
 81  * <pre>
 82  * {
 83  *   ...
 84  *   "grammar": {
 85  *   	...
 86  *   	"pegjs": {
 87  *   		"cache": [true | false], 			// "If true, makes the parser cache results, avoiding exponential parsing time in pathological cases but making the parser slower" - DEFAULT false
 88  *   		"optimize": ["speed" | "size"], 	//optimizing the generated parser for speed or (code) size - DEFAULT "speed"
 89  *   		"output": ["source" | "parser"], 	//should not be changed!!! whether to return TEXT or evaluated JavaScript - DEFAULT: "source"
 90  *   		"allowedStartRules": RULE_NAMES 	//should not be changed!!! - DEFAULT: not set
 91  *   	}
 92  *   	...
 93  *   },
 94  *   ...
 95  * }</pre>
 96  * 
 97  * non-specific compiler options:
 98  * <code>execMode = 'sync' | 'async'</code>
 99  * <code>genSourceUrl = true | STRING | FALSY'</code>
100  * 
101  * 
102  * @constant
103  * @private
104  * @default cache := false, optimize := 'speed', output := 'source', allowedStartRules := undefined
105  * @memberOf PegJsGenerator#
106  */
107 var DEFAULT_OPTIONS = {
108 	cache:    false,
109 	optimize: "speed",
110 	output:   "source",
111 	allowedStartRules: void(0),
112 	execMode: 'sync',//'sync' | 'async' | default: sync
113 	genSourceUrl: '',// true | STRING: the sourceURL for eval'ed parser-module | default: FALSY 
114 };
115 
116 /**
117  * Name for this plugin/grammar-generator (e.g. used for looking up configuration values in configuration.json).
118  * @constant
119  * @private
120  * @memberOf PegJsGenerator#
121  */
122 var pluginName = 'grammar.pegjs';
123 
124 /**
125  * Exported (public) functions for the PEGjs grammar-engine.
126  * @public
127  * @type GrammarGenerator
128  * @memberOf PegJsGenerator#
129  */
130 var pegjsGen = {
131 	/** @scope PegJsGenerator.prototype */
132 	
133 	/**
134 	 * The name/ID for the compile engine for the PEG.js compiler
135 	 * 
136 	 * @memberOf mmir.env.grammar.PegJsGenerator.prototype
137 	 */
138 	engineId: 'pegjs',
139 	/**
140 	 * @param {Function} [callback] OPTIONAL
141 	 * 			the callback that is triggered, when the engine is initialized
142 	 * @returns {Deferred}
143 	 * 			a promise that is resolved, when the engine is initialized
144 	 * 			(NOTE: if you use the same function for the <code>callback</code> AND the promise,
145 	 * 			       then the function will be invoked twice!)
146 	 * 
147 	 * @memberOf mmir.env.grammar.PegJsGenerator.prototype
148 	 */
149 	init: function(callback){
150 		if(callback){
151 			deferred.always(callback);
152 		}
153 		return deferred;
154 	},
155 	/** @returns {Boolean} if this engine compilation works asynchronously. The current implementation works synchronously (returns FALSE) */
156 	isAsyncCompilation: function(){ return false; },
157 	/**
158 	 * The function for compiling a JSON grammar:
159 	 * 
160 	 * 
161 	 * @param {GrammarConverter} theConverterInstance
162 	 * @param {String} instanceId
163 	 * 				the ID for the compiled grammar (usually this is a language code)
164 	 * @param {Number} fileFormatVersion
165 	 * 				the version of the file format (this is a constant within {@link mmir.SemanticInterpreter}
166 	 * @param callback
167 	 * @returns {GrammarConverter}
168 	 * 			the grammar instance with attached with the compiled function for executing the
169 	 * 			grammar to the instance's {@link GrammarConvert#executeGrammar} property/function. 
170 	 */
171 	compileGrammar: function(theConverterInstance, instanceId, fileFormatVersion, callback){
172         
173 		//attach functions for PEG.js conversion/generation to the converter-instance: 
174 		$.extend(theConverterInstance, PegJsGrammarConverterExt);
175 		
176 		//start conversion: create grammar in PEG.js syntax (from the JSON definition):
177 		theConverterInstance.init();
178 		this._preparePrintError();	
179 		theConverterInstance.convertJSONGrammar();
180         var grammarDefinition = theConverterInstance.getGrammarDef();
181         
182         //load options from configuration:
183         var config = configManager.get(pluginName, true, {});
184         //combine with default default options:
185         var options = $.extend({id: instanceId}, DEFAULT_OPTIONS, config);
186         
187         var compileParserModule = function(grammarParser, hasError){
188         	
189 	        var addGrammarParserExec = 
190 	    	  '(function(){\n  var semanticInterpreter = require("semanticInterpreter");\n'//FIXME
191 	        	+ 'var options = {fileFormat:'+fileFormatVersion+',execMode:'+JSON.stringify(options.execMode)+'};\n'
192 	        	+ 'var parser = '
193 	        	+ grammarParser
194 	//        	+ ';\nvar grammarFunc = parser.parse;\n'
195 	        	+ ';\nvar grammarFunc = function(){\n'
196 	        	+ '  var result;  try {\n'
197 	        	+ '    result = parser.parse.apply(this, arguments);\n'
198 	        	+ '  } catch (err){\n'
199 	        	+ '    console.error(err.stack?err.stack:err); result = {};\n'//TODO warning/error messaging? -> need to handle encoded chars, if error message should be meaningful
200 	        	+ '  }\n'
201 	        	+ '  return result;\n'
202 	        	+ '};\n'
203 	        	+ 'semanticInterpreter.addGrammar("'
204 	        		+instanceId
205 	        		+'", grammarFunc, options);\n\n'
206 	        	+ 'semanticInterpreter.setStopwords("'
207 	        		+instanceId+'",'
208 	        		
209 	        		//store stopwords with their Unicode representation (only for non-ASCII chars)
210 	        		+JSON.stringify(
211 	        				theConverterInstance.getEncodedStopwords()
212 	        		).replace(/\\\\u/gm,'\\u')//<- revert JSON.stringify encoding for the Unicodes
213 	        	+ ');\n'
214 	        	+ 'return grammarFunc;\n'
215 	        	+ '})();';
216 	        
217 	        if(options.genSourceUrl){
218             	
219             	var sourceUrlStr;
220             	if(options.genSourceUrl === true){
221             		sourceUrlStr = 'gen/grammar/_compiled_grammar_'+instanceId;
222             	} else {
223             		sourceUrlStr = options.genSourceUrl.toString().replace(/<id>/g,instanceId);
224             	}
225             	
226             	//for Chrome / FireFox debugging: provide an URL for eval'ed code
227             	addGrammarParserExec += '//@ sourceURL='+sourceUrlStr+'\n'
228             							+'//# sourceURL='+sourceUrlStr+'\n';
229                     
230             }
231 	        
232 	        theConverterInstance.setGrammarSource(addGrammarParserExec);
233 	        
234 	        try{
235 	        	
236 	        	eval(addGrammarParserExec);
237 	        	
238 	        } catch (err) {
239 	        	
240 	        	//TODO russa: generate meaningful error message with details about error location
241 	        	//			  eg. use esprima (http://esprima.org) ...?
242 	        	//			... as optional dependency (see deferred initialization above?)
243 	        	
244 	        	var evalMsg = 'Error during eval() for "'+ instanceId +'": ' + err;
245 	        	
246 	        	if(pegjs.printError){
247 	        		pegjs.printError(evalMsg);
248 	        	}
249 	        	else {
250 	        		logger.error('PEGjs', 'evalCompiled', evalMsg, err);
251 	        	}
252 	        	
253 	        	if(! hasError){
254 	            	evalMsg = '[INVALID GRAMMAR JavaScript CODE] ' + evalMsg;
255 	            	var parseDummyFunc = (function(msg, error){ 
256 	            		return function(){ console.error(msg); console.error(error); throw msg;};
257 	            	})(evalMsg, err);
258 	            	
259 	            	parseDummyFunc.hasErrors = true;
260 	            	
261 	            	//theConverterInstance = doGetGrammar(instanceId);
262 	            	theConverterInstance.setGrammarFunction(parseDummyFunc);
263 	        	}
264 	        	
265 	        }
266 	        
267 	        //invoke callback if present:
268 	        if(callback){
269 	        	callback(theConverterInstance);
270 	        }
271 		};
272 
273         var isPreventDefault = this._afterCompileParser(compileParserModule, callback);
274         var result = this._compileParser(grammarDefinition, options, isPreventDefault);
275         
276         if(!isPreventDefault){
277         	var hasError = result.hasError;
278         	compileParserModule(result.def, hasError);
279         }
280         	
281         return theConverterInstance;
282         return theConverterInstance;
283 	},
284 	/**
285 	 * @protected 
286 	 */
287 	_compileParser: function(grammarDefinition, options, afterCompileParserResult){
288 		
289 		var hasError = false;
290         var grammarParser;
291         try{
292         	grammarParser = pegjs.buildParser(grammarDefinition, options);
293         } catch(error) {
294 //        	"{
295 //        	  "message": "Expected \"=\" or string but \"_\" found.",
296 //        	  "expected": [
297 //        	    {
298 //        	      "type": "literal",
299 //        	      "value": "=",
300 //        	      "description": "\"=\""
301 //        	    },
302 //        	    {
303 //        	      "type": "other",
304 //        	      "description": "string"
305 //        	    }
306 //        	  ],
307 //        	  "found": "_",
308 //        	  "offset": 4104,
309 //        	  "line": 40,
310 //        	  "column": 6,
311 //        	  "name": "SyntaxError"
312 //        	}"
313         	hasError = true;
314         	var msg = ' while compiling grammar "' + options.id+ '": ';
315         	if(error.name === 'SyntaxError'){
316         		msg= 'SyntaxError' + msg + error.message;
317         	}
318         	else {
319         		msg = 'Error' + msg + (error && error.stack? error.stack : error);
320         	}
321         	
322         	if(typeof error.line !== 'undefined'){
323         		msg += ' at line '+error.line;
324         	}
325 
326         	if(typeof error.column !== 'undefined'){
327         		msg += ':'+error.column;
328         	}
329         	
330         	if(typeof error.offset !== 'undefined'){
331         		msg += ' (offset '+error.offset+')';
332         	}
333         	
334         	if(pegjs.printError){
335         		pegjs.printError(msg);
336         	}
337         	else {
338         		console.error(msg);
339         	}
340         	msg = '[INVALID GRAMMAR] ' + msg;
341         	grammarParser = '{ parse: function(){ var msg = '+JSON.stringify(msg)+'; console.error(msg); throw msg;} }';
342         }
343         
344         return {def: grammarParser, hasError: hasError};
345 	},
346 	/**
347 	 * @protected 
348 	 */
349 	_preparePrintError: function(){
350 		//setup logger for compile errors, if not already set
351 		if(! pegjs.printError){
352 			/**
353 			 * The default logging / error-print function for PEGjs.
354 			 * 
355 			 * @private
356 			 * @name printError
357 			 * @function
358 			 * @memberOf PegJsGenerator.pegjs#
359 			 * 
360 			 * @see mmir.Logging
361 			 */
362 			pegjs.printError = function(){
363 				var args = $.makeArray(arguments);
364 				//prepend "location-information" to logger-call:
365 				args.unshift('PEGjs', 'compile');
366 				//output log-message:
367 				logger.error.apply(logger, args);
368 			};
369 		}
370 	},
371 	/**
372 	 * The default logging / error-print function for PEGjs.
373 	 * 
374 	 * @protected
375 	 * 
376 	 * @see mmir.Logging
377 	 */
378 	printError: function(){
379 		pegjs.printError.apply(pegjs, arguments);
380 	},
381 	/**
382 	 * Optional hook for pre-processing the generated parser, after the parser is generated.
383 	 * 
384 	 * By default, this function returns VOID, in which case the parser-module is created by default.
385 	 * 
386 	 * If a function is returned instead, then it must invoke <code>compileParserModuleFunc</code>:
387 	 * <code>compileParserModuleFunc(compiledParser : STRING, hasErrors : BOOLEAN)</code>
388 	 * 
389 	 * 
390 	 * @param {Function} compileParserModuleFunc
391 	 * 				the function that generates the parser-module:
392 	 * 				<code>compileParserModuleFunc(compiledParser : STRING, hasErrors : BOOLEAN)</code>
393 	 * 
394 	 * @param {Function} compileCallbackFunc
395 	 * 				the callback function which will be invoked by compileParserModuleFunc, after it has finished.
396 	 * 				If compileParserModuleFunc() is prevented from exectution then the callback MUST be invoked manually
397 	 * 				<code>compileCallbackFunc(theConverterInstance: GrammarConverter)</code>
398 	 * 
399 	 * @returns {TRUTHY|VOID}
400 	 * 				FALSY for the default behavior.
401 	 * 				IF a TRUTHY value is returned, then the default action after compiling the parser
402 	 * 				is not executed:
403 	 * 					i.e. compileParserModuleFunc is not automatically called and in consequence the callback is not invoked
404 	 * 					
405 	 * 				
406 	 * 				NOTE: if not FALSY, then either compileParserModuleFunc() must be invoked, or the callback() must be invoked!
407 	 * 
408 	 * @protected
409 	 */
410 	_afterCompileParser: function(compileParserModuleFunc, compileCallbackFunc){
411 		//default: return VOID
412 		return;
413 	}
414 };
415 
416 
417 ////////////////////////////////////// PEG.js specific extensions to GrammarConverter ////////////////////////////////
418 /**
419  * PEGjs specific extension / implementation for {@link GrammarConverter} instances
420  * 
421  * @type GrammarConverter
422  * @memberOf PegJsGenerator#
423  */
424 var PegJsGrammarConverterExt = {
425 	/** @memberOf PegJsGrammarConverterExt */
426 	init: function(){
427 		
428 		this.THE_INTERNAL_GRAMMAR_CONVERTER_INSTANCE_NAME = "theGrammarConverterInstance";
429 		this._WHITESPACE_TOKEN_NAME = "WS";
430 		this._PARTIAL_MATCH_PREFIX = "_r";
431 		this.grammar_tokens = "/* --- Token definitions --- */\n\n/* Characters to be ignored */\n"
432 			+ this._WHITESPACE_TOKEN_NAME +" = ' '/'\\t';\n\n/* Non-associative tokens */\n";
433 		
434 		this.grammar_utterances = "";
435 		this.grammar_phrases = "phrases\n    = ";
436 		this.token_variables = "{\n  var " + this.variable_prefix + "result = '';\n";
437 		this.tokens_array = [];
438 		
439 	},
440 	convertJSONGrammar: function(){
441 	
442 		this.json_grammar_definition = this.maskJSON(this.json_grammar_definition);
443 		
444 		this.token_variables += "  var semanticAnnotationResult = {};\n"
445 			+ "  var _flatten = function(match){ if(!match.join){ return match;} for(var i=0, size = match.length; i < size; ++i){if(!match[i]){continue;}if(match[i].join){match[i] = _flatten(match[i])}} return match.join('') };\n"
446 			+ "  var _tok = function(field, match){ match = _flatten(match); field[match] = match; return match;}\n"
447 		;
448 		
449 		this.parseTokens();
450 		this.parseUtterances();
451 		this.parseStopWords();
452 		
453 		this.jscc_grammar_definition = this.token_variables
454 				+ "}\n\n"
455 				+ "\n\n/* --- Grammar specification --- */\n\nutterance\n    = phrases    {  "
456 				
457 				//TODO use LOG LEVEL for activating / deactivating this:
458 				+ "console.log("
459 				+ this.variable_prefix + "result); "
460 				
461 				+ "semanticAnnotationResult.result = "
462 				+ this.variable_prefix + "result; return "+ this.variable_prefix +"result;} ;\n\n" + this.grammar_utterances
463 				+ "\n" + this.grammar_phrases + ";\n\n"
464 				+ this.grammar_tokens;
465 
466 		this.json_grammar_definition = this.unmaskJSON(this.json_grammar_definition);
467 	},
468 	parseTokens: function(){
469 		var self = this;
470 		var json_tokens =  this.json_grammar_definition.tokens;
471 		var pref = self.variable_prefix;
472 		
473 		
474 		for(token_name in json_tokens){
475 			
476 			var words = json_tokens[token_name];
477 			
478 			self.token_variables += "  var " + pref
479 					+ token_name.toLowerCase() + " = {};\n";
480 			
481 			var sb = [token_name, "\n    = _m:("];
482 			
483 			var isNotRegExpr = true;
484 			for(var i=0, size = words.length; i < size ; ++i){
485 				
486 				//NOTE RegExpr need to be recoded -> need to check, if current word is RegExp!
487 				//  example (see also _convertRegExpr()):
488 				//	INPUT:   '[a-zA-Z_]+'
489 				//	RECODED: [a-zA-Z_]+
490 				isNotRegExpr = this._checkIfNotRegExpr(words[i]);
491 				if( isNotRegExpr ){
492 					sb.push("'");
493 				}
494 				
495 				//add TOKEN string:
496 				sb.push( isNotRegExpr? words[i] : this._convertRegExpr(words[i]));
497 
498 				
499 				if( isNotRegExpr ){
500 					sb.push("'");
501 				}
502 				
503 				//if there is another word following, add OR operator
504 				if(i < size-1){
505 					sb.push("/");
506 				}
507 			}
508 			
509 			//close assignment for "= match:(" and create JavaScript processing for token
510 			sb.push(
511 				")  { return _tok(" + pref + token_name.toLowerCase() + ", _m); };\n"
512 			);
513 			
514 			self.grammar_tokens += sb.join("");
515 		}
516 	},
517 	parseUtterances: function(){
518 		var self = this;
519 		var utt_index = 0;
520 		var json_utterances =  this.json_grammar_definition.utterances;
521 
522 		for(var utterance_name in json_utterances){
523 			var utterance_def = json_utterances[utterance_name];
524 			if(utt_index > 0){
525 				self.grammar_phrases += "\n\t/";
526 			}
527 			utt_index++;
528 			self.doParseUtterance(utterance_name, utterance_def);
529 		}
530 	},
531 	doParseUtterance: function(utterance_name, utterance_def){
532 		
533 		var self = this; 
534 		
535 		self.token_variables += "  var " + self.variable_prefix
536 				+ utterance_name.toLowerCase() + " = {};\n";
537 		
538 
539 		var grammar_utterance = utterance_name + "\n   = ";
540 		//self.grammar_phrases += utterance_name + "  " +  self.doCreateSemanticInterpretationForUtterance(utterance_name, utterance_def);
541 		self.grammar_phrases += utterance_name + "  " ;
542 		var phrases = utterance_def.phrases;
543 		var semantic  = self.doCreateSemanticInterpretationForUtterance(utterance_name, utterance_def);
544 		
545 		for(var index=0,size=phrases.length; index < size; ++index){
546 			if(index > 0){
547 				grammar_utterance += "\n  / ";
548 			}
549 			var phrase = phrases[index];
550 			var semantic_interpretation = self.doCreateSemanticInterpretationForPhrase(
551 					utterance_name.toLowerCase(), utterance_def, phrase, semantic
552 			);
553 			grammar_utterance += /*phrase +*/ semantic_interpretation;
554 		}
555 		self.grammar_utterances += grammar_utterance + ";\n\n";
556 	},
557 	doCreateSemanticInterpretationForUtterance: function(utterance_name, utterance_def){
558 		var semantic = utterance_def.semantic,
559 		variable_index, variable_name;
560 		
561 		if(logger.isDebug()) logger.debug('doCreateSemanticInterpretationForUtterance: '+semantic);//debug
562 		
563 		var semantic_as_string = JSON.stringify(semantic);
564 		if( semantic_as_string != null){
565 		this.variable_regexp.lastIndex = 0;
566 		var variables = this.variable_regexp.exec(semantic_as_string);
567 		while (variables != null) {
568 			var variable = variables[1],
569 			remapped_variable_name = "";
570 			
571 			if(logger.isDebug()) logger.debug("variables " + variable, semantic_as_string);//debug
572 			
573 			variable_index = /\[(\d+)\]/.exec(variable);
574 			variable_name = new RegExp('_\\$([a-zA-Z_][a-zA-Z0-9_\\-]*)').exec(variable)[1];
575 //			variableObj = /_\$([a-zA-Z_][a-zA-Z0-9_\-]*)(\[(\d+)\])?(\["semantic"\]|\['semantic'\]|\.semantic)?/.exec(variable);
576 //			variableObj = /_\$([a-zA-Z_][a-zA-Z0-9_\-]*)(\[(\d+)\])?((\[(("(.*?[^\\])")|('(.*?[^\\])'))\])|(\.(\w+)))?/.exec(variable);
577 	//"_$NAME[INDEX]['FIELD']":  _$NAME                  [ INDEX ]        [" FIELD "]  | [' FIELD ']      |   .FIELD
578 			if (variable_index == null) {
579 				remapped_variable_name = variable;
580 			} else {
581 					remapped_variable_name = variable.replace(
582 							  '[' + variable_index[1] + ']'
583 							, "["
584 								+ utterance_name.toLowerCase() + "_temp['phrases']['"
585 								+ variable_name.toLowerCase() + "']["
586 								+ variable_index[1]
587 							+ "]."+this.entry_token_field+"]");
588 					//TODO replace try/catch with safe_acc function
589 					//     PROBLEM: currently, the format for variable-access is not well defined
590 					//              -> in case of accessing the "semantic" field for a variable reference of another Utterance
591 					//                 we would need another safe_acc call 
592 					//				   ... i.e. need to parse expression for this, but since the format is not well defined
593 					//				   we cannot say, for what exactly we should parse...
594 					//                 NORMAL VAR EXPR: 		_$a_normal_token[0]
595 					//                 ACCESS TO SEMANTICS: 	_$other_utterance[0]['semantic']
596 					//                                      but this could also be expressed e.g. as _$other_utterance[0].semantic
597 					//                                      ...
598 //					remapped_variable_name = variable.replace(
599 //							  '[' + variable_index[1] + ']'
600 //							, "[safe_acc("
601 //								+ utterance_name.toLowerCase() + "_temp, 'phrases', '"
602 //								+ variable_name.toLowerCase() + "', "
603 //								+ variable_index[1] 
604 //								+ ")]"
605 //							);
606 			}
607 			semantic_as_string = semantic_as_string.replace(
608 					variables[0],
609 					" function(){try{return " + remapped_variable_name
610 						+ ";} catch(e){return void(0);}}() "
611 //					"' + " + remapped_variable_name + " + '"//TODO replace try/catch with safe_acc function
612 			);
613 			variables =  this.variable_regexp.exec(semantic_as_string);
614 		}
615 		}
616 		return semantic_as_string;
617 	},
618 	doCreateSemanticInterpretationForPhrase: function(utterance_name, utterance_def, phrase, semantic_as_string){
619 		var phraseList = phrase.split(/\s+/),
620 		length = phraseList.length,
621 		duplicate_helper = {};
622 	
623 		var phraseStr = "";
624 	//	var result = " { var _m = ";
625 		var i = 0;
626 		
627 		var pharseMatchResult = "var _m = ";
628 	//	for (; i < length; ++i){
629 	//		pharseMatchResult += this._PARTIAL_MATCH_PREFIX + (i+1);
630 	//		if(i < length){
631 	//			pharseMatchResult += " + ' ' + ";
632 	//		}
633 	//	}
634 		
635 	//	result += "; var "+utterance_name+"_temp = {}; "+utterance_name+"_temp['phrases'] = {};";
636 		
637 		var semanticProcResult = "var "+utterance_name+"_temp = {}; "+utterance_name+"_temp['phrases'] = {};";
638 		var num;
639 		for (i = 0; i < length; ++i) {
640 			
641 			num = i+1;
642 			
643 			//create STRING for phrase-matching
644 			if(i > 0){
645 				phraseStr += " " + this._WHITESPACE_TOKEN_NAME + " ";
646 			}
647 			phraseStr += this._PARTIAL_MATCH_PREFIX + num + ":" + phraseList[i];
648 			
649 			//create STRING for concatenated match of all partial phrases
650 			pharseMatchResult += this._PARTIAL_MATCH_PREFIX + num;
651 			if(num < length){
652 				pharseMatchResult += " + ' ' + ";
653 			}
654 			
655 			//create STRING for semantic processing of phrase
656 			if (typeof(duplicate_helper[phraseList[i]]) == "undefined") {
657 				duplicate_helper[phraseList[i]] = 0;
658 				semanticProcResult += utterance_name+"_temp['phrases']['"+phraseList[i].toLowerCase()+"'] = [];\n\t\t";
659 			} else {
660 				duplicate_helper[phraseList[i]] += 1;
661 			}
662 			semanticProcResult += utterance_name + "_temp['phrases']['"
663 						+ phraseList[i].toLowerCase() + "']["
664 						+ duplicate_helper[phraseList[i]] + "] = {"
665 							+ this.entry_token_field + ": " + this._PARTIAL_MATCH_PREFIX + num + ","
666 							+ this.entry_index_field + ": " + (num-1)
667 						+"};\n\t\t";
668 		}
669 		
670 		semanticProcResult += "var " + this.variable_prefix + "phrase = _m; " 
671 				+ utterance_name + "_temp['phrase']=" + this.variable_prefix + "phrase; "
672 				+ utterance_name + "_temp['utterance']='" + utterance_name + "'; "
673 				+ utterance_name + "_temp['engine']='pegjs'; "//FIXME debug
674 				+ utterance_name + "_temp['semantic'] = " + semantic_as_string
675 				+ "; " + this.variable_prefix + utterance_name + "["
676 				+ this.variable_prefix + "phrase] = " + utterance_name + "_temp; "
677 				+ this.variable_prefix + "result = " + utterance_name + "_temp;";
678 		
679 		return phraseStr + " {\n\t   " + pharseMatchResult +  "; " + semanticProcResult + "; return _m; \n\t} ";
680 	},
681 	_checkIfNotRegExpr: function(token){
682 		
683 		//test for character-group
684 		if( ! /([^\\]\[)|(^\[).*?[^\\]\]/.test(token)){
685 			
686 			//test for grouping
687 			if( ! /([^\\]\()|(^\().*?[^\\]\)/.test(token) ){
688 			
689 				//try for single-characters that occur in reg-expr FIXME this may procude false-positives!!!
690 				return ! /[\?|\*|\+|\^|\|\\]/.test(token); //excluded since these may be more common in natural text: . $
691 			}
692 		}
693 		
694 		return false;
695 	},
696 	_convertRegExpr: function(token){
697 		var sb = [], ch, last = null, isString = false, isGroup = false, isEsc = false, hasOr = false;
698 		for(var i=0, size = token.length; i < size; ++i){
699 			ch = token.charAt(i);
700 			switch(ch){
701 			case '(':
702 			case ')':
703 			case '[':
704 			case ']':
705 			case '+':
706 			case '*':
707 			case '?':
708 			case '$':
709 			case '^':
710 			case '.':
711 			case '|':
712 				if(last !== '\\'){
713 
714 					//if changed from STRING -> non-STRING, then "close" string first:
715 					if(isString){
716 						
717 						//for "optional" expression: modify previous entry to be a single character-sequence
718 						// ...cars'?  -> ...car' 's'?
719 						if(ch === '?' && sb.length > 0){//TODO also for '+', '*', ...???
720 							sb[ sb.length - 1 ] = '\' \'' + sb[ sb.length - 1 ];
721 						}
722 						
723 						sb.push("' ");
724 						isString = false;
725 					}
726 					
727 					//insert reg-expr symbol
728 					if(ch !== '|'){
729 						sb.push(ch);
730 					}
731 					else {
732 						sb.push(' / ');
733 						hasOr = true;
734 					}
735 					
736 					//is character-group opening/closing?
737 					if(isGroup && ch === ']'){
738 						isGroup = false;
739 					}
740 					else if(!isGroup && ch === '['){
741 						isGroup = true;
742 					}
743 					
744 					
745 					break;
746 				}
747 				else {
748 					isEsc = true;
749 				}
750 			default:
751 				
752 				if(isEsc){
753 					sb.splice(sb.length-1);//remove last element, i.e. the escape-character
754 					isEsc = false;
755 				}
756 				
757 				//if changed from non-STRING -> STRING, then "open" string now:
758 				if(!isGroup && !isString){
759 					sb.push(" '");
760 					isString = ! isGroup;
761 				}
762 				sb.push(ch);
763 			}
764 			
765 			last = ch;
766 		}
767 		
768 		//if last char was a STRING, "close" string now:
769 		if(isString){
770 			sb.push("'");
771 		}
772 		if(hasOr){
773 			sb.unshift('(');
774 			sb.push(')');
775 		}
776 		return sb.join('');
777 	}
778 };
779 
780 
781 return pegjsGen;
782 
783 });