1 
  2 //exports a function:
  3 //  function(STRING):STRING
  4 define(function(){
  5 
  6 /*
  7  Really simple JavaScript stemmer based on the Snowball stemmer
  8  http://snowball.tartarus.org/algorithms/german/stemmer.html
  9  Some simplifications were made, e.g. ignoring R2 and the special
 10  provision for words ending in -isse-  
 11 */
 12 
 13 var snowballSimpleStemFunc = (function(){
 14 	/* Definitions */
 15 	
 16 	// var vowel = /[aeiouyäöüUY]/;
 17 	// var cons = /[^aeiouyäöüUY]/;
 18 	var sEnding = "[bdfghklmnrt]";
 19 	var stEnding = "[bdfghklmnt]";
 20 	
 21 	var prefix = "^((.[aeiouyäöüUY][^aeiouyäöüUY])|([aeiouyäöüUY][^aeiouyäöüUY].))";
 22 	
 23 	
 24 	var stem_word = function simpleStemmer(word){
 25 	
 26 	    word = word.toLowerCase();
 27 	    word = word.replace(/ß/g, "ss");
 28 	    
 29 	    if (word.length < 4) {
 30 	        return word;
 31 	    }
 32 	    
 33 	    word = word.replace(/([aeiouyäöü])y([aeiouyäöü])/g, "$1Y$2"); // replace y between vowels with Y
 34 	    word = word.replace(/([aeiouyäöü])u([aeiouyäöü])/g, "$1U$2"); // replace u between vowels with U
 35 	    /* Step 1 */
 36 	    
 37 	    if (word.match(prefix + "(.*)" + "ern$")) {
 38 	        word = word.slice(0, -3);
 39 	    }
 40 	    else 
 41 	        if (word.match(prefix + "(.*)" + "(em$|en$|er$|es$)")) {
 42 	            word = word.slice(0, -2);
 43 	        }
 44 	        else 
 45 	            if (word.match(prefix + "(.*)" + "(e$)")) {
 46 	                word = word.slice(0, -1);
 47 	            }
 48 	            else 
 49 	                if (word.match(sEnding + "s$") && word.match(prefix + "(.*)" + "(s$)")) {
 50 	                    word = word.slice(0, -1);
 51 	                }
 52 	    
 53 	    
 54 	    
 55 	    /* Step 2 */
 56 	    
 57 	    if (word.match(prefix + "(.*)" + "est$")) {
 58 	        word = word.slice(0, -3);
 59 	    }
 60 	    else 
 61 	        if (word.match(prefix + "(.*)" + "(en$|er$)")) {
 62 	            word = word.slice(0, -2);
 63 	        }
 64 	        else 
 65 	            if (word.match(prefix + "(.*)" + stEnding + "(st$)")) {
 66 	                word = word.slice(0, -2);
 67 	            }
 68 	    
 69 	    
 70 	    
 71 	    /* Step 3 */
 72 	    // simplified!! Really these should be in R2 not R1
 73 	    
 74 	    if (word.match(prefix + "(.*)" + "keit$")) {
 75 	        word = word.slice(0, -4);
 76 	    }
 77 	    if (word.match(prefix + "(.*)" + "(lich$|heit$)")) {
 78 	        word = word.slice(0, -4);
 79 	        if (word.match(prefix + "(.*)" + "(er$|en$)")) {
 80 	            word = word.slice(0, -2);
 81 	        }
 82 	    }
 83 	    else 
 84 	        if (word.match(prefix + "(.*)" + "(isch$)")) {
 85 	            if (!word.match("eisch$")) {
 86 	                word = word.slice(0, -4);
 87 	            }
 88 	        }
 89 	        else 
 90 	            if (word.match(prefix + "(.*)" + "(ig$|ik$)")) {
 91 	                if (!word.match("e..$")) {
 92 	                    word = word.slice(0, -2);
 93 	                }
 94 	            }
 95 	            else 
 96 	                if (word.match(prefix + "(.*)" + "(end$|ung$)")) {
 97 	                    word = word.slice(0, -3);
 98 	                }
 99 	    
100 	    
101 	    
102 	    /* Clean up */
103 	    
104 	    word = word.replace(/([aeiouyäöü])Y/g, "$1y"); // replace Y with y
105 	    word = word.replace(/([aeiouyäöü])U/g, "$1u"); // replace U with u
106 	    word = word.replace(/ä/g, "a");
107 	    word = word.replace(/ö/g, "o");
108 	    word = word.replace(/ü/g, "u");
109 	    
110 	
111 	    return word;
112 	    
113 	};
114 	
115 	return stem_word;
116 })();
117 
118 //exported function:
119 return snowballSimpleStemFunc;
120 
121 });
122