1 /* 2 * Copyright (C) 2012-2013 DFKI GmbH 3 * Deutsches Forschungszentrum fuer Kuenstliche Intelligenz 4 * German Research Center for Artificial Intelligence 5 * http://www.dfki.de 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a 8 * copy of this software and associated documentation files (the 9 * "Software"), to deal in the Software without restriction, including 10 * without limitation the rights to use, copy, modify, merge, publish, 11 * distribute, sublicense, and/or sell copies of the Software, and to 12 * permit persons to whom the Software is furnished to do so, subject to 13 * the following conditions: 14 * 15 * The above copyright notice and this permission notice shall be included 16 * in all copies or substantial portions of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 21 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 22 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 */ 26 27 28 /** 29 * The GrammarConverter object initializes the grammar for processing 30 * <em>natural language text</em>, e.g. from the voice recognition. 31 * 32 * @class 33 * @name GrammarConverter 34 * 35 * @requires mmir.CommonUtils.isArray 36 * @requires jQuery.ajax 37 */ 38 define(['commonUtils', 'jquery'], function(commonUtils, $){ 39 40 41 42 /** 43 * @ignore 44 * 45 * @constructs GrammarConverter 46 */ 47 function GrammarConverter(){ 48 49 // this.THE_INTERNAL_GRAMMAR_CONVERTER_INSTANCE_NAME = "theGrammarConverterInstance"; 50 // this.grammar_tokens = "/~ --- Token definitions --- ~/\n\n/~ Characters to be ignored ~/\n! ' |\\t' ;\n\n/~ Non-associative tokens ~/\n"; 51 // this.grammar_utterances = ""; 52 // this.grammar_phrases = "phrases:"; 53 // this.token_variables = "[*\n var " + this.variable_prefix 54 // + "result = '';\n"; 55 // this.tokens_array = new Array(); 56 57 this.variable_prefix = "_$"; 58 this.variable_regexp = /"(_\$[^\"]*)"/igm;// /"_$([^\"]*)/igm; 59 60 this.entry_token_field = "tok";//must consist of ASCI "word chars", i.e. not whitepaces, numbers etc. 61 this.entry_index_field = "i";//must consist of ASCI "word chars", i.e. not whitepaces, numbers etc. 62 63 //regular expression for detecting encoded chars (see mask/unmask functions) 64 this.enc_regexp_str = "~~([0-9|A-F|a-f]{4})~~"; 65 66 this.jscc_grammar_definition = ""; 67 this.js_grammar_definition = ""; 68 this.json_grammar_definition = null; 69 this.stop_words_regexp; 70 71 //default setting for masking value Strings in JSON values (see maskJSON() / unmaskJSON) 72 this.maskValues = true; 73 //default setting for masking property-name Strings in JSON values (see maskJSON() / unmaskJSON) 74 // WARNING: this is actually EXPERIMENTAL; it should be set to false, since JS/CC may not be able to handle masked ID names... 75 this.maskNames = false; 76 //default setting for loading JSON files: 77 // if set to true, old-style umlauts encodings (e.g. __oe__) will converted after loading the file 78 // Enable this, if you need to use old-style encoded grammars ... still, the better option would 79 // be to convert the old-style grammar (i.e. use un-encoded umlauts in the JSON grammar file). 80 this.convertOldFormat = false; 81 82 83 //alternative reg-exp for stop-words (a different method for detecting/removing stopwords must be used!) 84 this.stop_words_regexp_alt; 85 86 //if execution of the grammar is asynchronously done (i.e. result is delivered using a callback) 87 this.is_async = false; 88 }; 89 90 GrammarConverter.prototype.loadGrammar = function(successCallback, errorCallback, grammarUrl, doLoadSynchronously){ 91 var self = this; 92 var success = function(data, status, xhr){ 93 94 //DISABLED: old-style masking for umlauts: 95 // data = self.recodeJSON(data, self.encodeUmlauts); 96 97 //if auto-upgrading is enabled: 98 // decode old-style umlaut masking before continuing 99 if(self.convertOldFormat){ 100 data = self.recodeJSON(data, self.decodeUmlauts); 101 } 102 103 self.json_grammar_definition = data; 104 105 if (typeof successCallback == "function") { 106 successCallback.call(this, self, xhr); 107 } 108 }; 109 var error = function(xhr, status, data){ 110 alert("failed to load the grammar! error: "+ JSON.stringify(data)); 111 if (typeof errorCallback == "function") { 112 errorCallback.call(this, self); 113 } 114 }; 115 this.loadResource(success, error, grammarUrl, doLoadSynchronously); 116 }; 117 118 GrammarConverter.prototype.loadResource = function(successCallback, errorCallback, resourceUrl, doLoadSynchronously){ 119 120 var theUrl = resourceUrl; 121 if(!theUrl){ 122 console.error('GrammarConverter.loadResource: missing URL!'); 123 if(errorCallback){ 124 errorCallback.call(this, this); 125 } 126 return;///////////////// EARLY EXIT ////////////////////// 127 } 128 129 var isLoadAsync = false; 130 if(typeof doLoadSynchronously !== 'undefined' && doLoadSynchronously === false){ 131 isLoadAsync = true; 132 } 133 134 $.ajax({ 135 async: isLoadAsync, 136 dataType: 'json', 137 url:theUrl, 138 success: successCallback, 139 error: errorCallback 140 }); 141 }; 142 143 GrammarConverter.prototype.setStopWords = function(stopWordArray){ 144 145 if(!this.json_grammar_definition){ 146 this.json_grammar_definition = {}; 147 } 148 149 this.json_grammar_definition.stop_word = this.maskJSON(stopWordArray); 150 151 this.parseStopWords(); 152 this.parseStopWords_alt(); 153 154 //use unmask-function in order to ensure masking/unmasking is reversible 155 // (or in case it is not: the error will be held in property stop_word) 156 this.json_grammar_definition.stop_word = this.unmaskJSON(this.json_grammar_definition.stop_word); 157 }; 158 159 GrammarConverter.prototype.getStopWords = function(){ 160 if(!this.json_grammar_definition){ 161 return null; 162 } 163 return this.json_grammar_definition.stop_word; 164 }; 165 166 /** 167 * HELPER creates a copy of the stopword list and encodes all non-ASCII chars to their unicode 168 * representation (e.g. for save storage of stringified stopword list, even if file-encoding 169 * does not support non-ASCII letters). 170 * 171 * @returns {Array<String>} a copy of the stopword list, from the current JSON grammar 172 * (or empty list, if no grammar is present) 173 */ 174 GrammarConverter.prototype.getEncodedStopwords = function(){ 175 var list = this.getStopWords(); 176 if(!list){ 177 return []; 178 } 179 180 //use copy, since recoding works in-place (we do not want to modify the stored stopword list here) 181 list = list.slice(0, list.length); 182 183 //store stopwords with their Unicode representation (only for non-ASCII chars) 184 return this.recodeJSON( 185 list, this.maskAsUnicode 186 ); 187 }; 188 189 //this is the original / main implementation for creating the RegExp for stopword removal 190 GrammarConverter.prototype.parseStopWords = function(){ 191 192 //create RegExp for stop words: 193 var json_stop_words = this.json_grammar_definition.stop_word; 194 var size = json_stop_words.length; 195 var stop_words = ""; 196 197 //FIX for encoded chars: if a word begins or ends with an encoded char, \b cannot detect the word's boundaries 198 // -> FIX if we encounter such words, create a separate RegExpr that uses 199 // whitespaces & START-/END-expression for detecting word-boundaries, i.e. something like: (\s|^)(~~ ... words ... ~~)(\s|$) 200 // 201 // NOTE: the word-boundaries expression \b seems to have no effect in case of non-ASCII chars in general 202 // (e.g. for Japanese characters / words) 203 // .... so we would need to use this alternative mechanism (e.g. using whitespaces & START-/END-expr.) 204 // even if these characters were not encoded! 205 var encStartTester = new RegExp("^" + this.enc_regexp_str ,"gm"); 206 var encEndTester = new RegExp( this.enc_regexp_str + "$","gm"); 207 var enc_stop_words = ""; 208 var isEncWord = function(str){ 209 return encStartTester.test(str) || encEndTester.test(str); 210 }; 211 212 213 if(size > 0){ 214 215 //... then the RegExp matches each stopword: 216 for(var index=0; index < size ; ++index){ 217 var stop_word = json_stop_words[index]; 218 219 //special treatment for word that begin/end with encoded chars: 220 if(isEncWord(stop_word)){ 221 if(enc_stop_words.length === 0){ 222 enc_stop_words = "(\\s|^)("; 223 } 224 else { 225 enc_stop_words += "|"; 226 } 227 228 enc_stop_words += stop_word; 229 230 continue; 231 } 232 233 //... for "normal" stopwords: 234 235 if (stop_words.length > 0){ 236 stop_words += "|"; //... if there is already a previous stopword-entry: do add OR-matching ... 237 } 238 239 stop_words += stop_word; //... add the stopword "stop_word" 240 } 241 } 242 243 if(stop_words.length > 0){ 244 245 stop_words = 246 "\\b(" //starting at a word-boundary (-> ignore within-word matches) 247 248 + stop_words 249 250 + ")" 251 + "\\b" //... ending with a word-boundary -> avoid "cutting out" matching partial strings 252 // e.g. without \b: '(in)\s?' would match (and cut out all matches) within "winning" -> "wng" 253 254 + "\\s?"; //... and optionally: one white-character that follows the stopword 255 } 256 else { 257 //for empty stopword definition: match empty string 258 // (basically: remove nothing) 259 stop_words += '^$'; 260 } 261 this.stop_words_regexp = new RegExp(stop_words,"igm"); //RegExp options: 262 // ignore-case (i), 263 // match globally i.e. all occurrences in the String (g), 264 // do not stop at line breaks (m) 265 266 267 //only create ReExp for special stopwords, if we actually have at least 1 of those: 268 //NOTE for replacement, we need to use a space-char (i.e. replace these with spaces, not empty strings: str.replace(..., ' '); ) 269 if(enc_stop_words.length > 0){ 270 enc_stop_words += ")(\\s|$)"; 271 this.stop_words_regexp_enc = new RegExp(enc_stop_words,"igm"); 272 } 273 274 //DISABLED: only create these if necessary (i.e. if getStopWordsRegExpr_alt() is called) 275 // //initialize the alternative version / regular expression for stopwords: 276 // this.parseStopWords_alt(); 277 }; 278 279 //initialize alternative version / regular expression for stopwords: 280 GrammarConverter.prototype.parseStopWords_alt = function(){ 281 282 var json_stop_words = this.json_grammar_definition.stop_word; 283 var size = json_stop_words.length; 284 var stop_words = ""; 285 286 if(size > 0){ 287 stop_words += "("; 288 289 for(var index=0; index < size ; ++index){ 290 var stop_word = json_stop_words[index]; 291 if (index > 0) { 292 stop_words += "|"; 293 } 294 //create match pattern for: (1) stopword enclosed in spaces, (2) the stopword at 'line end' preceded by a space, (3) the stopword at 'line start' followed by a space 295 stop_words += " " + stop_word + " | " + stop_word + "$|^" + stop_word 296 + " "; 297 } 298 299 stop_words += ")"; 300 } 301 else { 302 //for empty stopword definition: match empty string 303 // (basically: remove nothing) 304 stop_words += '^$'; 305 } 306 this.stop_words_regexp_alt = new RegExp(stop_words,"igm"); 307 }; 308 309 GrammarConverter.prototype.getStopWordsRegExpr = function(){ 310 if(!this.stop_words_regexp){ 311 this.parseStopWords(); 312 } 313 return this.stop_words_regexp; 314 }; 315 316 /** 317 * FIX for stopwords that start or end with encoded chars (i.e. non-ASCII chars) 318 * 319 * This RegExp may be NULL/undefined, if no stopwords exist, that begin/end with encoded chars 320 * i.e. you need to check for NULL, before trying to use this RegExpr. 321 * 322 * Usage: 323 * @example 324 * 325 * //remove normal stopwords: 326 * var removedStopwordsStr = someStr.replace( gc.getStopWordsRegExpr(), ''); 327 * 328 * 329 * var removedStopwordsStr2 = removedStopwordsStr; 330 * if(gc.getStopWordsEncRegExpr()){ 331 * //NOTE replace stopwords with spaces (not with empty String as above, ie. with "normal" stopwords) 332 * removedStopwordsStr2 = gc.getStopWordsEncRegExpr().replace( gc.getStopWordsEncRegExpr(), ' '); 333 * } 334 */ 335 GrammarConverter.prototype.getStopWordsEncRegExpr = function(){ 336 if(!this.stop_words_regexp){ 337 this.parseStopWords(); 338 } 339 return this.stop_words_regexp_enc; 340 }; 341 342 //alternative version / regular expression for stopwords: 343 GrammarConverter.prototype.getStopWordsRegExpr_alt = function(){ 344 if(!this.stop_words_regexp_alt){ 345 this.parseStopWords_alt(); 346 } 347 return this.stop_words_regexp_alt; 348 }; 349 350 /** 351 * Get grammar definition text. 352 * 353 * This is the "source code" input for the grammar compiler 354 * (i.e. syntax for jison, PEG.js or JS/CC). 355 * 356 * The grammar definition text is generated from the JSON grammar. 357 * 358 * @returns {String} the grammar definition in compiler-specific syntax 359 */ 360 GrammarConverter.prototype.getGrammarDef = function(){ 361 return this.jscc_grammar_definition; 362 }; 363 364 /** 365 * Sets the grammar definition text. 366 * 367 * This function should only be used during compilation of the JSON grammar 368 * to the executable grammar. 369 * 370 * NOTE: Setting this "manually" will have no effect on the executable grammar. 371 * 372 * @see #getGrammarDef 373 * @protected 374 * 375 * @param {String} rawGrammarSyntax 376 * the grammar definition in compiler-specific syntax 377 */ 378 GrammarConverter.prototype.setGrammarDef = function(rawGrammarSyntax){ 379 this.jscc_grammar_definition = rawGrammarSyntax; 380 }; 381 382 /** 383 * Get the compiled JavaScript grammar source code. 384 * 385 * This is the output of the grammar compiler (with additional 386 * JavaScript "framing" in SemanticInterpreter.createGrammar). 387 * 388 * This needs to be eval'ed before it can be executed (eval() will add 389 * the corresponding executable grammar to SemanticInterpreter). 390 * 391 * @returns {String} the compiled, JavaScript grammar source code 392 */ 393 GrammarConverter.prototype.getGrammarSource = function(){ 394 return this.js_grammar_definition; 395 }; 396 397 GrammarConverter.prototype.setGrammarSource = function(src_code){ 398 this.js_grammar_definition = src_code; 399 }; 400 401 /** 402 * Set the executable grammar function. 403 * 404 * The grammar function takes a String argument: the text that should be parsed. 405 * a Function argument: the callback for the result. 406 * where the callback itself takes 1 argument for the result: <code>callback(result)</code> 407 * 408 * The returned result depends on the JSON definition of the grammar: 409 * <code>func(inputText, resultCallback)</code> 410 * 411 * 412 * @param {Function} func 413 * the executable grammar function: <code>func(string, function(object)) : object</code> 414 * @param {Boolean} [isAsnc] OPTIONAL 415 * set to TRUE, if execution is asynchronously done. 416 * DEFAULT: FALSE 417 * 418 * @see #exectueGrammar 419 */ 420 GrammarConverter.prototype.setGrammarFunction = function(func, isAsync){ 421 this.is_async = !!isAsync; 422 this.executeGrammar = func; 423 }; 424 425 GrammarConverter.prototype.isAsyncExec = function(){ 426 return this.is_async; 427 }; 428 429 /** 430 * Execute the grammar. 431 * 432 * NOTE: do not use directly, but {@link mmir.SemanticInterpreter.getASRSemantic} instead, 433 * since that function applies some pre- and post-processing to the text (stopword removal 434 * en-/decoding of special characters etc.). 435 * 436 * @param {String} text 437 * the text String that should be parse. 438 * @param {Function} [callback] 439 * if #isAsyncExec is TRUE, then executeGrammar will have no return value, but instead the result 440 * of the grammar execution is delivered by the <code>callback</code>: 441 * <pre>function callback(result){ ... }</pre> 442 * (see also description of <code>return</code> value below) 443 * @returns {Object} 444 * the result of the grammar execution: 445 * <code>{phrase: STRING, phrases: OBJECT, semantic: OBJECT}</code> 446 * 447 * The property <code>phrase</code> contains the <code>text</code> which was matched (with removed stopwords). 448 * 449 * The property <code>phrases</code> contains the matched <tt>TOKENS</tt> and <tt>UTTERANCES</tt> from 450 * the JSON definition of the grammar as properties as arrays 451 * (e.g. for 1 matched TOKEN "token": <code>{token: ["the matched text"]}</code>). 452 * 453 * The returned property <code>semantic</code> depends on the JSON definition of the grammar. 454 * 455 * NOTE: if #isAsyncExec is TRUE, then there will be no return value, but instead the callback 456 * is invoked with the return value. 457 * 458 */ 459 GrammarConverter.prototype.executeGrammar = function(text, callback){ 460 console.warn('GrammarConverter.executeGrammar: this is only a stub. No grammar implementation set yet...'); 461 }; 462 463 /** 464 * Masks unicoded characters strings. 465 * 466 * Unicode characters are mask by replacing them with 467 * <code>~~XXXX~~</code> 468 * where <code>XXXX</code> is the four digit unicode HEX number. 469 * 470 * <p> 471 * NOTE that this function is <em>stable</em> with regard to 472 * multiple executions: 473 * 474 * If the function is invoked on the returned String again, the 475 * returned String will be the same / unchanged, i.e. 476 * maskings (i.e. "~~XXXX~~") will not be masked again. 477 * </p> 478 * <p> 479 * NOTE: currently, the masking pattern cannot be escaped, 480 * i.e. if the original String contains a substring 481 * that matches the masking pattern, it cannot 482 * be escaped, so that the unmask-function 483 * will leave it untouched. 484 * </p> 485 * 486 * @param {String} str 487 * the String to process 488 * @param {String} [prefix] OPTIONAL 489 * an alternative prefix used for masking, i.e instead of <code>~~</code> 490 * (ignored, if argument has other type than <code>string</code>) 491 * @param {String} [postfix] OPTIONAL 492 * an alternative postfix used for masking, i.e instead of <code>~~</code> 493 * (ignored, if argument has other type than <code>string</code>) 494 * @returns {String} 495 * the masked string 496 */ 497 GrammarConverter.prototype.maskString = function (str, prefix, postfix) { 498 var i, s, ch, peek, result, 499 next, endline, push, mask, 500 spaces, source = str; 501 502 var ESC_START = typeof prefix === 'string'? prefix : '~~'; 503 var ESC_END = typeof postfix === 'string'? postfix : '~~'; 504 505 // Stash the next character and advance the pointer 506 next = function () { 507 peek = source.charAt(i); 508 i += 1; 509 }; 510 511 // Start a new "line" of output, to be joined later by <br /> 512 endline = function () { 513 result.push('\n'); 514 }; 515 516 function mask(theChar) { 517 518 result.push(ESC_START); 519 520 var theUnicode = theChar.charCodeAt(0).toString(16).toUpperCase(); 521 var j = theUnicode.length; 522 while (j < 4) { 523 // theUnicode = '0' + theUnicode; 524 result.push('0'); 525 ++j; 526 } 527 result.push(theUnicode); 528 529 result.push(ESC_END); 530 }; 531 532 // Push a character or its entity onto the current line 533 push = function () { 534 535 //handle NEWLINE: 536 if (ch === '\r' || ch === '\n') { 537 if (ch === '\r') { 538 if (peek === '\n') { 539 next(); 540 } 541 endline(); 542 } 543 if (ch === '\n') { 544 if (peek === '\r') { 545 next(); 546 } 547 endline(); 548 } 549 } 550 //handle tabs 551 else if (ch === '\t') { 552 result.push(ch); 553 } 554 //handle NON-ASCII 555 else if (ch < ' ' || ch > '~') { 556 mask( ch ); 557 } 558 //handle normal chars 559 else { 560 result.push(ch); 561 } 562 }; 563 564 565 result = []; 566 567 i = 0; 568 next(); 569 while (i <= source.length) { // less than or equal, because i is always one ahead 570 ch = peek; 571 next(); 572 573 push(); 574 } 575 576 return result.join(''); 577 }; 578 579 /** 580 * HELPER uses #maskString for encoding non-ASCII chars to their Unicode representation, 581 * i.e. <code>\uXXXX</code> where XXXX is the Unicode HEX number. 582 * 583 * 584 * SHORTCUT for calling <code>maskString(str, '\\u', '')</code>. 585 * 586 * @example 587 * //for Japanese "下さい" ("please") 588 * maskAsUnicode("下さい") -> "\u4E0B\u3055\u3044" 589 * 590 * //... and using default masking: 591 * maskString("下さい") -> "~~4E0B~~~~3055~~~~3044~~" 592 */ 593 GrammarConverter.prototype.maskAsUnicode = function (str) { 594 return this.maskString(str, '\\u', ''); 595 }; 596 597 /** 598 * Unmasks <i>masked unicoded characters</i> in a string. 599 * 600 * Masked unicode characters are assumed to have the pattern: 601 * <code>~~XXXX~~</code> 602 * where <code>XXXX</code> is the four digit unicode HEX number. 603 * 604 * <p> 605 * NOTE that this function is <em>stable</em> with regard to 606 * multiple executions, <b>IF</b> the original String <tt>str</tt> did not 607 * contain a sub-string that conforms to the encoding pattern 608 * (see remark for {@link #maskString}): 609 * 610 * If the function is invoked on the returned String again, the 611 * returned String will be the same, i.e. unchanged. 612 * </p> 613 * 614 * @param {String} str 615 * @param {RegExp} [detector] OPTIONAL 616 * an alternative detector-RegExp: 617 * the RegExp must conatin at least one grouping which detects a unicode number (HEX), 618 * e.g. default detector is <code>~~([0-9|A-F|a-f]{4})~~</code> (note the grouping 619 * for detecting a 4-digit HEX number within the brackets). 620 * @returns {String} the unmasked string 621 */ 622 GrammarConverter.prototype.unmaskString = function (str, detector) { 623 var match, source = str, result = [], pos = 0, i, len = str.length; 624 625 //RegExpr for: ~~XXXX~~ 626 // where XXXX is the unicode HEX number: ~~([0-9|A-F|a-f]{4})~~ 627 var REGEXPR_ESC = detector? detector : new RegExp( this.enc_regexp_str, "igm"); 628 629 while(match = REGEXPR_ESC.exec(source)){ 630 i = match.index; 631 //add previous: 632 if(i > pos){ 633 result.push(source.substring(pos, i)); 634 } 635 636 //add matched ESC as UNICODE: 637 result.push(String.fromCharCode( parseInt(match[1], 16) )); 638 639 //update position: 640 pos = i + match[0].length; 641 } 642 643 if(pos < len){ 644 result.push(source.substring(pos)); 645 } 646 647 return result.join(''); 648 }; 649 650 651 GrammarConverter.prototype.maskJSON = function (json, isMaskValues, isMaskNames) { 652 return this.recodeJSON(json, this.maskString, isMaskValues, isMaskNames); 653 }; 654 655 GrammarConverter.prototype.unmaskJSON = function (json, isMaskValues, isMaskNames) { 656 return this.recodeJSON(json, this.unmaskString, isMaskValues, isMaskNames); 657 }; 658 659 /** 660 * Recodes Strings of a JSON-like object. 661 * 662 * @function 663 * @param {Object} json 664 * the JSON-like object (i.e. PlainObject) 665 * 666 * @param {Function} recodeFunc 667 * the "recoding" function for modifying String values: 668 * must accecpt a String argument and return a String 669 * <code>String recodeFunc(String)</code>. 670 * The <tt></tt> function is invoked in context of the GrammarConverter object. 671 * Example: this.maskString(). 672 * See {@link #maskString}.k 673 * 674 * @param {Boolean} [isMaskValues] OPTIONAL 675 * if true, the object's property String values will be processed 676 * NOTE: in case this parameter is specified, then <code>recodeFunc</code> must 677 * also be specified! 678 * DEFAULT: uses property {@link #maskValues} 679 * @param {Boolean} [isMaskNames] OPTIONAL 680 * if true, the property names will be processed 681 * NOTE: in case this parameter is specified, then <code>recodeFunc</code> and 682 * <code>isMaskValues</code> must also be specified! 683 * DEFAULT: uses property {@link #maskNames} 684 * 685 * @returns {Object} the recoded JSON object 686 * 687 * @requires {@link mmir.CommonUtils#isArray} or {@link Array#isArray} 688 */ 689 GrammarConverter.prototype.recodeJSON = (function () {//<- NOTE this is only the initializer (i.e. see returned function below) 690 691 var isArray; 692 if(typeof commonUtils !== 'undefined'){ 693 isArray = commonUtils.isArray;//FIXME this requires ArrayExtension.js !!! 694 } 695 else { 696 isArray = Array.isArray; 697 } 698 699 //recursive processing for an object 700 //returns: the processed object 701 var processJSON = function(obj, recodeFunc, isMaskValues, isMaskNames){ 702 703 //different treatments for: STRING, ARRAY, OBJECT types (and 'REST' type, i.e. all ohters) 704 if(typeof obj === 'string' && isMaskValues){ 705 //STRING: encode the string 706 return recodeFunc.call(this, obj); 707 } 708 else if( isArray(obj) ) { 709 //ARRAY: process all entries: 710 for(var i=0, size = obj.length; i < size; ++i){ 711 obj[i] = processJSON.call(this, obj[i], recodeFunc, isMaskValues, isMaskNames); 712 } 713 714 return obj; 715 } 716 else if(obj === null) {//NOTE null is typeof object! 717 return null; 718 } 719 else if(typeof obj === 'object') { 720 //OBJECT: process all the object's properties (but only, if they are not inherited) 721 for(var p in obj){ 722 if(obj.hasOwnProperty(p)){ 723 724 obj[p] = processJSON.call(this, obj[p], recodeFunc, isMaskValues, isMaskNames); 725 726 //if the property-name should also be encoded: 727 if(typeof p === 'string' && isMaskNames){ 728 729 var masked = recodeFunc.call(this, p); 730 if(masked !== p){ 731 obj[masked] = obj[p]; 732 delete obj[p]; 733 } 734 } 735 } 736 } 737 return obj; 738 } 739 else { 740 return obj; 741 } 742 }; 743 744 return function (json, recodeFunc, isMaskValues, isMaskNames){ 745 //evalate arguments: 746 if(typeof isMaskValues === 'undefined'){ 747 isMaskValues = this.maskValues; 748 } 749 if(typeof isMaskNames === 'undefined'){ 750 isMaskNames = this.maskNames; 751 } 752 753 return processJSON.call(this, json, recodeFunc, isMaskValues, isMaskNames); 754 }; 755 756 })(); 757 758 /** 759 * 760 * @deprecated this is used for the old-style encoding / decoding for umlauts (now masking for ALL unicode chars is used!) 761 * 762 * @param {String|Object} target 763 * the String for wich all contained umlauts should be replaced with an encoded version. 764 * If this parameter is not a String, it will be converted using <code>JSON.stringify()</code> 765 * and the resulting String will be processed (may lead to errors if umlauts occur in "strange" 766 * places within the stringified object). 767 * @param {Boolean} [doAlsoEncodeUpperCase] OPTIONAL 768 * if <code>true</code>, then upper-case umlauts will be encoded, too 769 * DEFAULT: <code>false</code> (i.e. no encoding for upper-case umlauts) 770 * 771 * @returns {String|Object} 772 * the String with encoded umlauts. 773 * If the input argument <code>target</code> was an Object, the return value 774 * will also be an Object, for which the processing stringified Object is converted 775 * back using <code>JSON.parse()</code> (may lead to errors if umlauts occur in "strange" 776 * places within the stringified object). 777 */ 778 GrammarConverter.prototype.encodeUmlauts = function(target, doAlsoEncodeUpperCase){ 779 var isString = typeof target === 'string'; 780 var str; 781 if(isString){ 782 str = target; 783 } 784 else { 785 str = JSON.stringify(target); 786 } 787 788 //Java-Code: 789 // data = data.replaceAll("\u00E4", "__ae__");//HTML: ä 790 // data = data.replaceAll("\u00FC", "__ue__");//HTML: ü 791 // data = data.replaceAll("\u00F6", "__oe__");//HTML: ö 792 // data = data.replaceAll("\u00DF", "__ss__");//HTML: ß 793 794 // data = data.replaceAll("\u00C4", "__Ae__");//HTML: Ä 795 // data = data.replaceAll("\u00DC", "__Ue__");//HTML: Ü 796 // data = data.replaceAll("\u00D6", "__Oe__");//HTML: Ö 797 str = str.replace(/\u00F6/g,'__oe__').replace(/\u00E4/g,'__ae__').replace(/\u00FC/g,'__ue__').replace(/\u00DF/g,'__ss__'); 798 if(doAlsoEncodeUpperCase){ 799 str = str.replace(/\u00D6/g,'__Oe__').replace(/\u00C4/g,'__Ae__').replace(/\u00DC/g,'__Ue__'); 800 } 801 802 if(isString){ 803 return str; 804 } 805 else { 806 return JSON.parse(str); 807 } 808 }; 809 810 /** 811 * 812 * @deprecated this is used for the old-style encoding / decoding for umlauts (now masking for ALL unicode chars is used!) 813 * 814 * @param {String|Object} target 815 * the String for wich all contained umlauts-encoding should be replaced with the original umlauts. 816 * If this parameter is not a String, it will be converted using <code>JSON.stringify()</code> 817 * and the resulting String will be processed (may lead to errors if umlauts occur in "strange" 818 * places within the stringified object). 819 * @param {Boolean} [doAlsoEncodeUpperCase] OPTIONAL 820 * if <code>true</code>, then upper-case umlauts-encodings will be decoded, too 821 * DEFAULT: <code>false</code> (i.e. no decoding for upper-case umlauts-encodings) 822 * 823 * @returns {String|Object} 824 * the String with decoded umlauts-encodings (i.e. with the "original" umlauts). 825 * If the input argument <code>target</code> was an Object, the return value 826 * will also be an Object, for which the processing stringified Object is converted 827 * back using <code>JSON.parse()</code> (may lead to errors if umlauts occur in "strange" 828 * places within the stringified object). 829 */ 830 GrammarConverter.prototype.decodeUmlauts = function(target, doAlsoDecodeUpperCase){ 831 var isString = typeof target === 'string'; 832 var str; 833 if(isString){ 834 str = target; 835 } 836 else { 837 str = JSON.stringify(target); 838 } 839 840 str = str.replace(/__oe__/g,'\u00F6').replace(/__ae__/g,'\u00E4').replace(/__ue__/g,'\u00FC').replace(/__ss__/g,'\u00DF'); 841 if(doAlsoDecodeUpperCase){ 842 str = str.replace(/__Oe__/g,'\u00D6').replace(/__Ae__/g,'\u00C4').replace(/__Ue__/g,'\u00DC'); 843 } 844 845 if(isString){ 846 return str; 847 } 848 else { 849 return JSON.parse(str); 850 } 851 }; 852 853 return GrammarConverter; 854 855 });//END: define(..., function(){