1 /*
  2  * 	Copyright (C) 2012-2013 DFKI GmbH
  3  * 	Deutsches Forschungszentrum fuer Kuenstliche Intelligenz
  4  * 	German Research Center for Artificial Intelligence
  5  * 	http://www.dfki.de
  6  * 
  7  * 	Permission is hereby granted, free of charge, to any person obtaining a 
  8  * 	copy of this software and associated documentation files (the 
  9  * 	"Software"), to deal in the Software without restriction, including 
 10  * 	without limitation the rights to use, copy, modify, merge, publish, 
 11  * 	distribute, sublicense, and/or sell copies of the Software, and to 
 12  * 	permit persons to whom the Software is furnished to do so, subject to 
 13  * 	the following conditions:
 14  * 
 15  * 	The above copyright notice and this permission notice shall be included 
 16  * 	in all copies or substantial portions of the Software.
 17  * 
 18  * 	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 
 19  * 	OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
 20  * 	MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 
 21  * 	IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
 22  * 	CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
 23  * 	TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
 24  * 	SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 25  */
 26 
 27 
 28 /**
 29 * The GrammarConverter object initializes the grammar for processing
 30 * <em>natural language text</em>, e.g. from the voice recognition.
 31 * 
 32 * @class
 33 * @name GrammarConverter
 34 * 
 35 * @requires mmir.CommonUtils.isArray
 36 * @requires jQuery.ajax
 37 */
 38 define(['commonUtils', 'jquery'], function(commonUtils, $){
 39 
 40 
 41 
 42 /**
 43  * @ignore
 44  * 
 45  * @constructs GrammarConverter
 46  */
 47 function GrammarConverter(){
 48 	
 49 //	this.THE_INTERNAL_GRAMMAR_CONVERTER_INSTANCE_NAME = "theGrammarConverterInstance";
 50 //	this.grammar_tokens = "/~ --- Token definitions --- ~/\n\n/~ Characters to be ignored ~/\n!   ' |\\t' ;\n\n/~ Non-associative tokens ~/\n";
 51 //	this.grammar_utterances = "";
 52 //	this.grammar_phrases = "phrases:";
 53 //	this.token_variables = "[*\n  var " + this.variable_prefix
 54 //			+ "result = '';\n";
 55 //	this.tokens_array = new Array();
 56 	
 57 	this.variable_prefix = "_$";
 58 	this.variable_regexp = /"(_\$[^\"]*)"/igm;// /"_$([^\"]*)/igm;
 59 	
 60 	this.entry_token_field = "tok";//must consist of ASCI "word chars", i.e. not whitepaces, numbers etc.
 61 	this.entry_index_field = "i";//must consist of ASCI "word chars", i.e. not whitepaces, numbers etc.
 62 	
 63 	//regular expression for detecting encoded chars (see mask/unmask functions)
 64 	this.enc_regexp_str = "~~([0-9|A-F|a-f]{4})~~";
 65 
 66 	this.jscc_grammar_definition = "";
 67 	this.js_grammar_definition = "";
 68 	this.json_grammar_definition = null;
 69 	this.stop_words_regexp;
 70 	
 71 	//default setting for masking value Strings in JSON values (see maskJSON() / unmaskJSON)
 72 	this.maskValues = true;
 73 	//default setting for masking property-name Strings in JSON values (see maskJSON() / unmaskJSON)
 74 	// WARNING: this is actually EXPERIMENTAL; it should be set to false, since JS/CC may not be able to handle masked ID names...
 75 	this.maskNames = false;
 76 	//default setting for loading JSON files:
 77 	// if set to true, old-style umlauts encodings (e.g. __oe__) will converted after loading the file
 78 	// Enable this, if you need to use old-style encoded grammars ... still, the better option would
 79 	//  be to convert the old-style grammar (i.e. use un-encoded umlauts in the JSON grammar file).
 80 	this.convertOldFormat = false;
 81 	
 82 
 83 	//alternative reg-exp for stop-words (a different method for detecting/removing stopwords must be used!)
 84 	this.stop_words_regexp_alt;
 85 	
 86 	//if execution of the grammar is asynchronously done (i.e. result is delivered using a callback)
 87 	this.is_async = false;
 88 };
 89 
 90 GrammarConverter.prototype.loadGrammar = function(successCallback, errorCallback, grammarUrl, doLoadSynchronously){
 91 	var self = this;
 92 	var success = function(data, status, xhr){
 93 		
 94 		//DISABLED: old-style masking for umlauts:
 95 //		data = self.recodeJSON(data, self.encodeUmlauts);
 96 		
 97 		//if auto-upgrading is enabled:
 98 		//   decode old-style umlaut masking before continuing
 99 		if(self.convertOldFormat){
100 			data = self.recodeJSON(data, self.decodeUmlauts);
101 		}
102 		
103 		self.json_grammar_definition = data;
104 		
105 		if (typeof successCallback == "function") {
106 			successCallback.call(this, self, xhr);
107 		}
108 	};
109 	var error = function(xhr, status, data){
110 		alert("failed to load the grammar! error: "+ JSON.stringify(data));
111 		if (typeof errorCallback == "function") {
112 			errorCallback.call(this, self);
113 		}
114 	};
115 	this.loadResource(success, error, grammarUrl, doLoadSynchronously);
116 };
117 
118 GrammarConverter.prototype.loadResource = function(successCallback, errorCallback, resourceUrl, doLoadSynchronously){
119 
120 	var theUrl = resourceUrl;
121 	if(!theUrl){
122 		console.error('GrammarConverter.loadResource: missing URL!');
123 		if(errorCallback){
124 			errorCallback.call(this, this);
125 		}
126 		return;///////////////// EARLY EXIT //////////////////////
127 	}
128 	
129 	var isLoadAsync = false;
130 	if(typeof doLoadSynchronously !== 'undefined' && doLoadSynchronously === false){
131 		isLoadAsync = true;
132 	}
133 	
134 	$.ajax({
135 		async: isLoadAsync,
136 		dataType: 'json',
137 		url:theUrl,
138 		success: successCallback,
139 		error: errorCallback
140 	});
141 };
142 
143 GrammarConverter.prototype.setStopWords = function(stopWordArray){
144 	
145 	if(!this.json_grammar_definition){
146 		this.json_grammar_definition = {};
147 	}
148 	
149 	this.json_grammar_definition.stop_word = this.maskJSON(stopWordArray);
150 	
151 	this.parseStopWords();
152 	this.parseStopWords_alt();
153 	
154 	//use unmask-function in order to ensure masking/unmasking is reversible
155 	//  (or in case it is not: the error will be held in property stop_word)
156 	this.json_grammar_definition.stop_word = this.unmaskJSON(this.json_grammar_definition.stop_word);
157 };
158 
159 GrammarConverter.prototype.getStopWords = function(){
160 	if(!this.json_grammar_definition){
161 		return null;
162 	}
163 	return this.json_grammar_definition.stop_word;
164 };
165 
166 /**
167  * HELPER creates a copy of the stopword list and encodes all non-ASCII chars to their unicode
168  *        representation (e.g. for save storage of stringified stopword list, even if file-encoding
169  *        does not support non-ASCII letters).
170  * 
171  * @returns {Array<String>} a copy of the stopword list, from the current JSON grammar
172  * 							(or empty list, if no grammar is present)
173  */
174 GrammarConverter.prototype.getEncodedStopwords = function(){
175 	var list = this.getStopWords();
176 	if(!list){
177 		return [];
178 	}
179 	
180 	//use copy, since recoding works in-place (we do not want to modify the stored stopword list here)
181 	list = list.slice(0, list.length);
182 	
183 	//store stopwords with their Unicode representation (only for non-ASCII chars)
184 	return this.recodeJSON(
185 			list, this.maskAsUnicode
186 	);
187 };
188 
189 //this is the original / main implementation for creating the RegExp for stopword removal
190 GrammarConverter.prototype.parseStopWords = function(){
191 
192 	//create RegExp for stop words:
193 	var json_stop_words = this.json_grammar_definition.stop_word;
194 	var size = json_stop_words.length;
195 	var stop_words = "";
196 	
197 	//FIX for encoded chars: if a word begins or ends with an encoded char, \b cannot detect the word's boundaries
198 	//	-> FIX if we encounter such words, create a separate RegExpr that uses 
199 	//         whitespaces & START-/END-expression for detecting word-boundaries, i.e. something like: (\s|^)(~~ ... words ... ~~)(\s|$)
200 	//
201 	//  NOTE: the word-boundaries expression \b seems to have no effect in case of non-ASCII chars in general
202 	//        (e.g. for Japanese characters / words)
203 	//        .... so we would need to use this alternative mechanism (e.g. using whitespaces & START-/END-expr.) 
204 	//        even if these characters were not encoded! 
205 	var encStartTester = new RegExp("^" + this.enc_regexp_str      ,"gm");
206 	var encEndTester   = new RegExp(      this.enc_regexp_str + "$","gm");
207 	var enc_stop_words = "";
208 	var isEncWord = function(str){
209 		return encStartTester.test(str) || encEndTester.test(str); 
210 	};
211 	
212 	
213 	if(size > 0){
214 		
215 		//... then the RegExp matches each stopword:
216 		for(var index=0; index < size ; ++index){
217 			var stop_word = json_stop_words[index];
218 			
219 			//special treatment for word that begin/end with encoded chars:
220 			if(isEncWord(stop_word)){
221 				if(enc_stop_words.length === 0){
222 					enc_stop_words = "(\\s|^)(";
223 				}
224 				else {
225 					enc_stop_words += "|";
226 				}
227 				
228 				enc_stop_words += stop_word;
229 				
230 				continue;
231 			}
232 			
233 			//... for "normal" stopwords:
234 			
235 			if (stop_words.length > 0){
236 				stop_words +=	"|";    //... if there is already a previous stopword-entry: do add OR-matching ...
237 			}
238 	
239 			stop_words +=	stop_word;  //... add the stopword "stop_word"
240 		}
241 	}
242 	
243 	if(stop_words.length > 0){
244 		
245 		stop_words = 
246 				 "\\b("             //starting at a word-boundary (-> ignore within-word matches)
247 			   
248 			+ stop_words 
249 			   
250 			   + ")"
251 		       + "\\b"	            //... ending with a word-boundary -> avoid "cutting out" matching partial strings
252 	                                //    e.g. without \b: '(in)\s?' would match (and cut out all matches) within "winning" -> "wng"
253 		       
254 		       + "\\s?";	        //... and optionally: one white-character that follows the stopword
255 	}
256 	else {
257 		//for empty stopword definition: match empty string
258 		//  (basically: remove nothing)
259 		stop_words += '^$';
260 	}
261 	this.stop_words_regexp = new RegExp(stop_words,"igm");	//RegExp options: 
262 															// ignore-case (i),
263 															// match globally i.e. all occurrences in the String (g), 
264 															// do not stop at line breaks (m)
265 	
266 	
267 	//only create ReExp for special stopwords, if we actually have at least 1 of those:
268 	//NOTE for replacement, we need to use a space-char (i.e. replace these with spaces, not empty strings: str.replace(..., ' '); )
269 	if(enc_stop_words.length > 0){
270 			enc_stop_words += ")(\\s|$)";
271 			this.stop_words_regexp_enc = new RegExp(enc_stop_words,"igm");	
272 	}
273 
274 	//DISABLED: only create these if necessary (i.e. if getStopWordsRegExpr_alt() is called)
275 //	//initialize the alternative version / regular expression for stopwords:
276 //	this.parseStopWords_alt();
277 };
278 
279 //initialize alternative version / regular expression for stopwords:
280 GrammarConverter.prototype.parseStopWords_alt = function(){
281 	
282 	var json_stop_words = this.json_grammar_definition.stop_word;
283 	var size = json_stop_words.length;
284 	var stop_words = "";
285 	
286 	if(size > 0){
287 		stop_words += "(";
288 
289 		for(var index=0; index < size ; ++index){
290 			var stop_word = json_stop_words[index];
291 			if (index > 0) {
292 				stop_words += "|";
293 			}
294 			//create match pattern for: (1) stopword enclosed in spaces, (2) the stopword at 'line end' preceded by a space, (3) the stopword at 'line start' followed by a space
295 			stop_words += " " + stop_word + " | " + stop_word + "$|^" + stop_word
296 					+ " ";
297 		}
298 		
299 		stop_words += ")";
300 	}
301 	else {
302 		//for empty stopword definition: match empty string
303 		//  (basically: remove nothing)
304 		stop_words += '^$';
305 	}
306 	this.stop_words_regexp_alt = new RegExp(stop_words,"igm");
307 };
308 
309 GrammarConverter.prototype.getStopWordsRegExpr = function(){
310 	if(!this.stop_words_regexp){
311 		this.parseStopWords();
312 	}
313 	return this.stop_words_regexp;
314 };
315 
316 /**
317  * FIX for stopwords that start or end with encoded chars (i.e. non-ASCII chars)
318  * 
319  * This RegExp may be NULL/undefined, if no stopwords exist, that begin/end with encoded chars
320  * i.e. you need to check for NULL, before trying to use this RegExpr.
321  * 
322  *  Usage:
323  *  @example 
324  *  
325  *  //remove normal stopwords:
326  *  var removedStopwordsStr  = someStr.replace( gc.getStopWordsRegExpr(), '');
327  *  
328  *  
329  *  var removedStopwordsStr2 = removedStopwordsStr;
330  *  if(gc.getStopWordsEncRegExpr()){
331  *  	//NOTE replace stopwords with spaces (not with empty String as above, ie. with "normal" stopwords) 
332  *  	removedStopwordsStr2 = gc.getStopWordsEncRegExpr().replace( gc.getStopWordsEncRegExpr(), ' ');
333  *  }
334  */
335 GrammarConverter.prototype.getStopWordsEncRegExpr = function(){
336 	if(!this.stop_words_regexp){
337 		this.parseStopWords();
338 	}
339 	return this.stop_words_regexp_enc;
340 };
341 
342 //alternative version / regular expression for stopwords:
343 GrammarConverter.prototype.getStopWordsRegExpr_alt = function(){
344 	if(!this.stop_words_regexp_alt){
345 		this.parseStopWords_alt();
346 	}
347 	return this.stop_words_regexp_alt;
348 };
349 
350 /**
351  * Get grammar definition text.
352  * 
353  * This is the "source code" input for the grammar compiler
354  * (i.e. syntax for jison, PEG.js or JS/CC).
355  * 
356  * The grammar definition text is generated from the JSON grammar.
357  * 
358  * @returns {String} the grammar definition in compiler-specific syntax
359  */
360 GrammarConverter.prototype.getGrammarDef = function(){
361 	return  this.jscc_grammar_definition;
362 };
363 
364 /**
365  * Sets the grammar definition text.
366  * 
367  * This function should only be used during compilation of the JSON grammar
368  * to the executable grammar.
369  * 
370  * NOTE: Setting this "manually" will have no effect on the executable grammar.
371  * 
372  * @see #getGrammarDef
373  * @protected
374  * 
375  * @param {String} rawGrammarSyntax
376  * 		the grammar definition in compiler-specific syntax
377  */
378 GrammarConverter.prototype.setGrammarDef = function(rawGrammarSyntax){
379 	this.jscc_grammar_definition = rawGrammarSyntax;
380 };
381 
382 /**
383  * Get the compiled JavaScript grammar source code.
384  * 
385  * This is the output of the grammar compiler (with additional
386  * JavaScript "framing" in SemanticInterpreter.createGrammar).
387  * 
388  * This needs to be eval'ed before it can be executed (eval() will add
389  * the corresponding executable grammar to SemanticInterpreter).
390  * 
391  * @returns {String} the compiled, JavaScript grammar source code
392  */
393 GrammarConverter.prototype.getGrammarSource = function(){
394 	return  this.js_grammar_definition;
395 };
396 
397 GrammarConverter.prototype.setGrammarSource = function(src_code){
398 	 this.js_grammar_definition = src_code;
399 };
400 
401 /**
402  * Set the executable grammar function.
403  * 
404  * The grammar function takes a String argument: the text that should be parsed.
405  *                            a Function argument: the callback for the result.
406  *                            where the callback itself takes 1 argument for the result: <code>callback(result)</code>
407  *                            
408  * The returned result depends on the JSON definition of the grammar:
409  * <code>func(inputText, resultCallback)</code>
410  * 
411  * 
412  * @param {Function} func
413  * 			the executable grammar function: <code>func(string, function(object)) : object</code>
414  * @param {Boolean} [isAsnc] OPTIONAL
415  * 					set to TRUE, if execution is asynchronously done.
416  * 					DEFAULT: FALSE 
417  * 
418  * @see #exectueGrammar
419  */
420 GrammarConverter.prototype.setGrammarFunction = function(func, isAsync){
421 	this.is_async = !!isAsync;
422 	this.executeGrammar = func;
423 };
424 
425 GrammarConverter.prototype.isAsyncExec = function(){
426 	return this.is_async;
427 };
428 
429 /**
430  * Execute the grammar.
431  * 
432  * NOTE: do not use directly, but {@link mmir.SemanticInterpreter.getASRSemantic} instead,
433  * 		since that function applies some pre- and post-processing to the text (stopword removal
434  * 		en-/decoding of special characters etc.).
435  * 
436  * @param {String} text
437  * 			the text String that should be parse.
438  * @param {Function} [callback]
439  * 			if #isAsyncExec is TRUE, then executeGrammar will have no return value, but instead the result
440  * 			of the grammar execution is delivered by the <code>callback</code>:
441  * 			<pre>function callback(result){ ... }</pre>
442  * 			(see also description of <code>return</code> value below)
443  * @returns {Object}
444  * 			the result of the grammar execution:
445  * 			<code>{phrase: STRING, phrases: OBJECT, semantic: OBJECT}</code>
446  * 
447  * 			The property <code>phrase</code> contains the <code>text</code> which was matched (with removed stopwords).
448  * 
449  * 			The property <code>phrases</code> contains the matched <tt>TOKENS</tt> and <tt>UTTERANCES</tt> from
450  * 			the JSON definition of the grammar as properties as arrays
451  *          (e.g. for 1 matched TOKEN "token": <code>{token: ["the matched text"]}</code>).
452  * 
453  *          The returned property <code>semantic</code> depends on the JSON definition of the grammar.
454  *          
455  *          NOTE: if #isAsyncExec is TRUE, then there will be no return value, but instead the callback
456  *                is invoked with the return value.
457  *          
458  */
459 GrammarConverter.prototype.executeGrammar = function(text, callback){
460 	console.warn('GrammarConverter.executeGrammar: this is only a stub. No grammar implementation set yet...');
461 };
462 
463 /**
464  * Masks unicoded characters strings.
465  * 
466  * Unicode characters are mask by replacing them with
467  * <code>~~XXXX~~</code>
468  * where <code>XXXX</code> is the four digit unicode HEX number.
469  * 
470  * <p>
471  * NOTE that this function is <em>stable</em> with regard to
472  * multiple executions:
473  * 
474  * If the function is invoked on the returned String again, the
475  * returned String will be the same / unchanged, i.e.
476  * maskings (i.e. "~~XXXX~~") will not be masked again.
477  * </p>
478  * <p>
479  * NOTE: currently, the masking pattern cannot be escaped,
480  * 		 i.e. if the original String contains a substring
481  * 		 that matches the masking pattern, it cannot
482  * 		 be escaped, so that the unmask-function
483  * 		 will leave it untouched.
484  * </p>
485  * 
486  * @param {String} str
487  * 				the String to process
488  * @param {String} [prefix] OPTIONAL
489  * 				an alternative prefix used for masking, i.e instead of <code>~~</code>
490  * 				(ignored, if argument has other type than <code>string</code>)
491  * @param {String} [postfix] OPTIONAL
492  * 				an alternative postfix used for masking, i.e instead of <code>~~</code>
493  * 				(ignored, if argument has other type than <code>string</code>)
494  * @returns {String} 
495  * 				the masked string
496  */
497 GrammarConverter.prototype.maskString = function (str, prefix, postfix) {
498 	var i, s, ch, peek, result,
499 		next, endline, push, mask,
500 		spaces, source = str;
501 	
502 	var ESC_START = typeof prefix  === 'string'? prefix  : '~~';
503 	var ESC_END   = typeof postfix === 'string'? postfix : '~~';
504 	
505 	// Stash the next character and advance the pointer
506 	next = function () {
507 		peek = source.charAt(i);
508 		i += 1;
509 	};
510 	
511 	// Start a new "line" of output, to be joined later by <br />
512 	endline = function () {
513 		result.push('\n');
514 	};
515 	
516 	function mask(theChar) {
517 		
518 		result.push(ESC_START);
519 		
520 		var theUnicode = theChar.charCodeAt(0).toString(16).toUpperCase();
521 		var j = theUnicode.length;
522 		while (j < 4) {
523 //			theUnicode = '0' + theUnicode;
524 			result.push('0');
525 			++j;
526 		}
527 		result.push(theUnicode);
528 
529 		result.push(ESC_END);
530 	};
531 	
532 	// Push a character or its entity onto the current line
533 	push = function () {
534 		
535 		//handle NEWLINE:
536 		if (ch === '\r' || ch === '\n') {
537 			if (ch === '\r') {
538 				if (peek === '\n') {
539 					next();
540 				}
541 				endline();
542 			}
543 			if (ch === '\n') {
544 				if (peek === '\r') {
545 					next();
546 				}
547 				endline();
548 			}
549 		}
550 		//handle tabs
551 		else if (ch === '\t') {
552 			result.push(ch);
553 		}
554 		//handle NON-ASCII
555 		else if (ch < ' ' || ch > '~') {
556 			mask( ch );
557 		} 
558 		//handle normal chars
559 		else {
560 			result.push(ch);
561 		}
562 	};
563 	
564 	
565 	result = [];
566 
567 	i = 0;
568 	next();
569 	while (i <= source.length) { // less than or equal, because i is always one ahead
570 		ch = peek;
571 		next();
572 		
573 		push();
574 	}
575 	
576 	return result.join('');
577 };
578 
579 /**
580  * HELPER uses #maskString for encoding non-ASCII chars to their Unicode representation,
581  * i.e. <code>\uXXXX</code> where XXXX is the Unicode HEX number.
582  * 
583  * 
584  * SHORTCUT for calling <code>maskString(str, '\\u', '')</code>.
585  * 
586  * @example
587  * //for Japanese "下さい" ("please")
588  * maskAsUnicode("下さい") -> "\u4E0B\u3055\u3044"
589  * 
590  * //... and using default masking:
591  * maskString("下さい") -> "~~4E0B~~~~3055~~~~3044~~"
592  */
593 GrammarConverter.prototype.maskAsUnicode = function (str) {
594 	return this.maskString(str, '\\u', '');
595 };
596 
597 /**
598  * Unmasks <i>masked unicoded characters</i> in a string.
599  * 
600  * Masked unicode characters are assumed to have the pattern:
601  * <code>~~XXXX~~</code>
602  * where <code>XXXX</code> is the four digit unicode HEX number.
603  * 
604  * <p>
605  * NOTE that this function is <em>stable</em> with regard to
606  * multiple executions, <b>IF</b> the original String <tt>str</tt> did not
607  * contain a sub-string that conforms to the encoding pattern 
608  * (see remark for {@link #maskString}):
609  * 
610  * If the function is invoked on the returned String again, the
611  * returned String will be the same, i.e. unchanged. 
612  * </p>
613  * 
614  * @param {String} str
615  * @param {RegExp} [detector] OPTIONAL
616  * 				an alternative detector-RegExp:
617  * 				the RegExp must conatin at least one grouping which detects a unicode number (HEX),
618  * 				e.g. default detector is <code>~~([0-9|A-F|a-f]{4})~~</code> (note the grouping
619  * 				for detecting a 4-digit HEX number within the brackets).
620  * @returns {String} the unmasked string
621  */
622 GrammarConverter.prototype.unmaskString = function (str, detector) {
623 	var match, source = str, result = [], pos = 0, i, len = str.length;
624 	
625 	//RegExpr for: ~~XXXX~~
626 	// where XXXX is the unicode HEX number: ~~([0-9|A-F|a-f]{4})~~
627 	var REGEXPR_ESC = detector? detector : new RegExp( this.enc_regexp_str, "igm");
628 	
629 	while(match = REGEXPR_ESC.exec(source)){
630 		i =  match.index;
631 		//add previous:
632 		if(i > pos){
633 			result.push(source.substring(pos, i));
634 		}
635 		
636 		//add matched ESC as UNICODE:
637 		result.push(String.fromCharCode(  parseInt(match[1], 16) ));
638 		
639 		//update position:
640 		pos = i + match[0].length;
641 	}
642 	
643 	if(pos < len){
644 		result.push(source.substring(pos));
645 	}
646 
647 	return result.join('');
648 };
649 
650 
651 GrammarConverter.prototype.maskJSON = function (json, isMaskValues, isMaskNames) {
652 	return this.recodeJSON(json, this.maskString, isMaskValues, isMaskNames);
653 };
654 
655 GrammarConverter.prototype.unmaskJSON = function (json, isMaskValues, isMaskNames) {
656 	return this.recodeJSON(json, this.unmaskString, isMaskValues, isMaskNames);
657 };
658 
659 /**
660  * Recodes Strings of a JSON-like object.
661  * 
662  * @function
663  * @param {Object} json 
664  * 					the JSON-like object (i.e. PlainObject)
665  * 
666  * @param {Function} recodeFunc
667  * 								the "recoding" function for modifying String values:
668  * 								 must accecpt a String argument and return a String
669  * 									<code>String recodeFunc(String)</code>.
670  * 								The <tt></tt> function is invoked in context of the GrammarConverter object.
671  * 								Example: this.maskString().
672  * 								See {@link #maskString}.k
673  * 
674  * @param {Boolean} [isMaskValues] OPTIONAL
675  * 								 if true, the object's property String values will be processed
676  * 								 NOTE: in case this parameter is specified, then <code>recodeFunc</code> must
677  * 									   also be specified!
678  * 								 DEFAULT: uses property {@link #maskValues}
679  * @param {Boolean} [isMaskNames]  OPTIONAL
680  * 								 if true, the property names will be processed
681  * 								 NOTE: in case this parameter is specified, then <code>recodeFunc</code> and
682  * 									   <code>isMaskValues</code> must also be specified!
683  * 								 DEFAULT: uses property {@link #maskNames}
684  * 
685  * @returns {Object} the recoded JSON object
686  * 
687  * @requires {@link mmir.CommonUtils#isArray} or {@link Array#isArray}
688  */
689 GrammarConverter.prototype.recodeJSON = (function () {//<- NOTE this is only the initializer (i.e. see returned function below)
690 	
691 	var isArray;
692 	if(typeof commonUtils !== 'undefined'){
693 		isArray = commonUtils.isArray;//FIXME this requires ArrayExtension.js !!!
694 	} 
695 	else { 
696 		isArray = Array.isArray;
697 	}
698 	
699 	//recursive processing for an object
700 	//returns: the processed object
701 	var processJSON = function(obj, recodeFunc, isMaskValues, isMaskNames){
702 		
703 		//different treatments for: STRING, ARRAY, OBJECT types (and 'REST' type, i.e. all ohters)
704 		if(typeof obj === 'string' && isMaskValues){
705 			//STRING: encode the string
706 			return recodeFunc.call(this, obj);
707 		}
708 		else if( isArray(obj) ) {
709 			//ARRAY: process all entries:
710 			for(var i=0, size = obj.length; i < size; ++i){
711 				obj[i] = processJSON.call(this, obj[i], recodeFunc, isMaskValues, isMaskNames);
712 			}
713 			
714 			return obj;
715 		}
716 		else if(obj === null) {//NOTE null is typeof object!
717 			return null;
718 		}	
719 		else if(typeof obj === 'object') {
720 			//OBJECT: process all the object's properties (but only, if they are not inherited)
721 			for(var p in obj){
722 				if(obj.hasOwnProperty(p)){
723 					
724 					obj[p] = processJSON.call(this, obj[p], recodeFunc, isMaskValues, isMaskNames);
725 					
726 					//if the property-name should also be encoded:
727 					if(typeof p === 'string' && isMaskNames){
728 						
729 						var masked = recodeFunc.call(this, p);
730 						if(masked !== p){
731 							obj[masked] = obj[p];
732 							delete obj[p];
733 						}
734 					}
735 				}
736 			}
737 			return obj;
738 		}
739 		else {
740 			return obj;
741 		}
742 	};
743 	
744 	return function (json, recodeFunc, isMaskValues, isMaskNames){
745 		//evalate arguments:
746 		if(typeof isMaskValues === 'undefined'){
747 			isMaskValues = this.maskValues;
748 		}
749 		if(typeof isMaskNames === 'undefined'){
750 			isMaskNames = this.maskNames;
751 		}
752 		
753 		return processJSON.call(this, json, recodeFunc, isMaskValues, isMaskNames);
754 	};
755 	
756 })();
757 
758 /**
759  * 
760  * @deprecated this is used for the old-style encoding / decoding for umlauts (now masking for ALL unicode chars is used!)
761  * 
762  * @param {String|Object} target
763  * 							the String for wich all contained umlauts should be replaced with an encoded version.
764  * 							If this parameter is not a String, it will be converted using <code>JSON.stringify()</code>
765  * 							and the resulting String will be processed (may lead to errors if umlauts occur in "strange"
766  * 							places within the stringified object).
767  * @param {Boolean} [doAlsoEncodeUpperCase] OPTIONAL
768  * 							if <code>true</code>, then upper-case umlauts will be encoded, too
769  * 							DEFAULT: <code>false</code> (i.e. no encoding for upper-case umlauts)
770  * 		
771  * @returns {String|Object}
772  * 				the String with encoded umlauts.
773  * 				If the input argument <code>target</code> was an Object, the return value
774  * 				will also be an Object, for which the processing stringified Object is converted
775  * 				back using <code>JSON.parse()</code> (may lead to errors if umlauts occur in "strange"
776  * 				places within the stringified object).
777  */
778 GrammarConverter.prototype.encodeUmlauts = function(target, doAlsoEncodeUpperCase){
779 	var isString = typeof target === 'string';
780 	var str;
781 	if(isString){
782 		str = target;
783 	}
784 	else {
785 		str = JSON.stringify(target);
786 	}
787 	
788 	//Java-Code:
789 	//	data = data.replaceAll("\u00E4", "__ae__");//HTML: ä
790 	//	data = data.replaceAll("\u00FC", "__ue__");//HTML: ü
791 	//	data = data.replaceAll("\u00F6", "__oe__");//HTML: ö
792 	//	data = data.replaceAll("\u00DF", "__ss__");//HTML: ß
793 
794 	//	data = data.replaceAll("\u00C4", "__Ae__");//HTML: Ä
795 	//	data = data.replaceAll("\u00DC", "__Ue__");//HTML: Ü
796 	//	data = data.replaceAll("\u00D6", "__Oe__");//HTML: Ö
797 	str = str.replace(/\u00F6/g,'__oe__').replace(/\u00E4/g,'__ae__').replace(/\u00FC/g,'__ue__').replace(/\u00DF/g,'__ss__');
798 	if(doAlsoEncodeUpperCase){
799     	str = str.replace(/\u00D6/g,'__Oe__').replace(/\u00C4/g,'__Ae__').replace(/\u00DC/g,'__Ue__');
800 	}
801 	
802 	if(isString){
803 		return str;
804 	}
805 	else {
806 		return JSON.parse(str);
807 	}
808 };
809 
810 /**
811  * 
812  * @deprecated this is used for the old-style encoding / decoding for umlauts (now masking for ALL unicode chars is used!)
813  * 
814  * @param {String|Object} target
815  * 							the String for wich all contained umlauts-encoding should be replaced with the original umlauts.
816  * 							If this parameter is not a String, it will be converted using <code>JSON.stringify()</code>
817  * 							and the resulting String will be processed (may lead to errors if umlauts occur in "strange"
818  * 							places within the stringified object).
819  * @param {Boolean} [doAlsoEncodeUpperCase] OPTIONAL
820  * 							if <code>true</code>, then upper-case umlauts-encodings will be decoded, too
821  * 							DEFAULT: <code>false</code> (i.e. no decoding for upper-case umlauts-encodings)
822  * 		
823  * @returns {String|Object}
824  * 				the String with decoded umlauts-encodings (i.e. with the "original" umlauts).
825  * 				If the input argument <code>target</code> was an Object, the return value
826  * 				will also be an Object, for which the processing stringified Object is converted
827  * 				back using <code>JSON.parse()</code> (may lead to errors if umlauts occur in "strange"
828  * 				places within the stringified object).
829  */
830 GrammarConverter.prototype.decodeUmlauts = function(target, doAlsoDecodeUpperCase){
831 	var isString = typeof target === 'string';
832 	var str;
833 	if(isString){
834 		str = target;
835 	}
836 	else {
837 		str = JSON.stringify(target);
838 	}
839 	
840 	str = str.replace(/__oe__/g,'\u00F6').replace(/__ae__/g,'\u00E4').replace(/__ue__/g,'\u00FC').replace(/__ss__/g,'\u00DF');
841 	if(doAlsoDecodeUpperCase){
842     	str = str.replace(/__Oe__/g,'\u00D6').replace(/__Ae__/g,'\u00C4').replace(/__Ue__/g,'\u00DC');
843 	}
844 	
845 	if(isString){
846 		return str;
847 	}
848 	else {
849 		return JSON.parse(str);
850 	}
851 };
852 
853 return GrammarConverter;
854 
855 });//END: define(..., function(){