1 2 //exports a function: 3 // function(STRING):STRING 4 define(function(){ 5 6 /* 7 Really simple JavaScript stemmer based on the Snowball stemmer 8 http://snowball.tartarus.org/algorithms/german/stemmer.html 9 Some simplifications were made, e.g. ignoring R2 and the special 10 provision for words ending in -isse- 11 */ 12 13 var snowballSimpleStemFunc = (function(){ 14 /* Definitions */ 15 16 // var vowel = /[aeiouyäöüUY]/; 17 // var cons = /[^aeiouyäöüUY]/; 18 var sEnding = "[bdfghklmnrt]"; 19 var stEnding = "[bdfghklmnt]"; 20 21 var prefix = "^((.[aeiouyäöüUY][^aeiouyäöüUY])|([aeiouyäöüUY][^aeiouyäöüUY].))"; 22 23 24 var stem_word = function simpleStemmer(word){ 25 26 word = word.toLowerCase(); 27 word = word.replace(/ß/g, "ss"); 28 29 if (word.length < 4) { 30 return word; 31 } 32 33 word = word.replace(/([aeiouyäöü])y([aeiouyäöü])/g, "$1Y$2"); // replace y between vowels with Y 34 word = word.replace(/([aeiouyäöü])u([aeiouyäöü])/g, "$1U$2"); // replace u between vowels with U 35 /* Step 1 */ 36 37 if (word.match(prefix + "(.*)" + "ern$")) { 38 word = word.slice(0, -3); 39 } 40 else 41 if (word.match(prefix + "(.*)" + "(em$|en$|er$|es$)")) { 42 word = word.slice(0, -2); 43 } 44 else 45 if (word.match(prefix + "(.*)" + "(e$)")) { 46 word = word.slice(0, -1); 47 } 48 else 49 if (word.match(sEnding + "s$") && word.match(prefix + "(.*)" + "(s$)")) { 50 word = word.slice(0, -1); 51 } 52 53 54 55 /* Step 2 */ 56 57 if (word.match(prefix + "(.*)" + "est$")) { 58 word = word.slice(0, -3); 59 } 60 else 61 if (word.match(prefix + "(.*)" + "(en$|er$)")) { 62 word = word.slice(0, -2); 63 } 64 else 65 if (word.match(prefix + "(.*)" + stEnding + "(st$)")) { 66 word = word.slice(0, -2); 67 } 68 69 70 71 /* Step 3 */ 72 // simplified!! Really these should be in R2 not R1 73 74 if (word.match(prefix + "(.*)" + "keit$")) { 75 word = word.slice(0, -4); 76 } 77 if (word.match(prefix + "(.*)" + "(lich$|heit$)")) { 78 word = word.slice(0, -4); 79 if (word.match(prefix + "(.*)" + "(er$|en$)")) { 80 word = word.slice(0, -2); 81 } 82 } 83 else 84 if (word.match(prefix + "(.*)" + "(isch$)")) { 85 if (!word.match("eisch$")) { 86 word = word.slice(0, -4); 87 } 88 } 89 else 90 if (word.match(prefix + "(.*)" + "(ig$|ik$)")) { 91 if (!word.match("e..$")) { 92 word = word.slice(0, -2); 93 } 94 } 95 else 96 if (word.match(prefix + "(.*)" + "(end$|ung$)")) { 97 word = word.slice(0, -3); 98 } 99 100 101 102 /* Clean up */ 103 104 word = word.replace(/([aeiouyäöü])Y/g, "$1y"); // replace Y with y 105 word = word.replace(/([aeiouyäöü])U/g, "$1u"); // replace U with u 106 word = word.replace(/ä/g, "a"); 107 word = word.replace(/ö/g, "o"); 108 word = word.replace(/ü/g, "u"); 109 110 111 return word; 112 113 }; 114 115 return stem_word; 116 })(); 117 118 //exported function: 119 return snowballSimpleStemFunc; 120 121 }); 122