--- /dev/null
+# coding: utf-8\r
+\r
+# Constants for the Porter 2 stemmer\r
+module Porter2\r
+\r
+ # A non-vowel\r
+ C = "[^aeiouy]"\r
+\r
+ # A vowel: a e i o u y\r
+ V = "[aeiouy]"\r
+\r
+ # A non-vowel other than w, x, or Y\r
+ CW = "[^aeiouywxY]"\r
+\r
+ # Doubles created when adding a suffix: these are undoubled when stemmed\r
+ Double = "(bb|dd|ff|gg|mm|nn|pp|rr|tt)"\r
+\r
+ # A valid letter that can come before 'li' (or 'ly')\r
+ Valid_LI = "[cdeghkmnrt]"\r
+\r
+ # A specification for a short syllable.\r
+ #\r
+ # A short syllable in a word is either: \r
+ # 1. a vowel followed by a non-vowel other than w, x or Y and preceded by a non-vowel, or \r
+ # 2. a vowel at the beginning of the word followed by a non-vowel.\r
+ #\r
+ # (The original document is silent on whether sequences of two or more non-vowels make a\r
+ # syllable long. But as this specification is only used to find sequences of non-vowel -\r
+ # vowel - non-vowel - end-of-word, this ambiguity does not have an effect.)\r
+ SHORT_SYLLABLE = "((#{C}#{V}#{CW})|(^#{V}#{C}))"\r
+\r
+ # Suffix transformations used in porter2_step2.\r
+ # (ogi, li endings dealt with in procedure)\r
+ STEP_2_MAPS = {"tional" => "tion",\r
+ "enci" => "ence",\r
+ "anci" => "ance",\r
+ "abli" => "able",\r
+ "entli" => "ent",\r
+ "ization" => "ize",\r
+ "izer" => "ize",\r
+ "ational" => "ate",\r
+ "ation" => "ate",\r
+ "ator" => "ate",\r
+ "alism" => "al",\r
+ "aliti" => "al",\r
+ "alli" => "al",\r
+ "fulness" => "ful",\r
+ "ousli" => "ous",\r
+ "ousness" => "ous",\r
+ "iveness" => "ive",\r
+ "iviti" => "ive",\r
+ "biliti" => "ble",\r
+ "bli" => "ble",\r
+ "fulli" => "ful",\r
+ "lessli" => "less" }\r
+\r
+ # Suffix transformations used in porter2_step3.\r
+ # (ative ending dealt with in procedure) \r
+ STEP_3_MAPS = {"tional" => "tion",\r
+ "ational" => "ate",\r
+ "alize" => "al",\r
+ "icate" => "ic",\r
+ "iciti" => "ic",\r
+ "ical" => "ic",\r
+ "ful" => "",\r
+ "ness" => "" }\r
+ \r
+ # Suffix transformations used in porter2_step4.\r
+ # (ion ending dealt with in procedure)\r
+ STEP_4_MAPS = {"al" => "",\r
+ "ance" => "",\r
+ "ence" => "",\r
+ "er" => "",\r
+ "ic" => "",\r
+ "able" => "",\r
+ "ible" => "",\r
+ "ant" => "",\r
+ "ement" => "",\r
+ "ment" => "",\r
+ "ent" => "",\r
+ "ism" => "",\r
+ "ate" => "",\r
+ "iti" => "",\r
+ "ous" => "",\r
+ "ive" => "",\r
+ "ize" => "" }\r
+ \r
+ # Special-case stemmings \r
+ SPECIAL_CASES = {"skis" => "ski",\r
+ "skies" => "sky",\r
+ \r
+ "dying" => "die",\r
+ "lying" => "lie",\r
+ "tying" => "tie",\r
+ "idly" => "idl",\r
+ "gently" => "gentl",\r
+ "ugly" => "ugli",\r
+ "early" => "earli",\r
+ "only" => "onli",\r
+ "singly" =>"singl",\r
+ \r
+ "sky" => "sky",\r
+ "news" => "news",\r
+ "howe" => "howe",\r
+ "atlas" => "atlas",\r
+ "cosmos" => "cosmos",\r
+ "bias" => "bias",\r
+ "andes" => "andes" }\r
+ \r
+ # Special case words to stop processing after step 1a.\r
+ STEP_1A_SPECIAL_CASES = %w[ inning outing canning herring earring proceed exceed succeed ]\r
+\r
+end\r
+\r