+++ /dev/null
-# coding: utf-8\r
-\r
-# Constants for the Porter 2 stemmer\r
-module Porter2\r
-\r
- # A non-vowel\r
- C = "[^aeiouy]"\r
-\r
- # A vowel: a e i o u y\r
- V = "[aeiouy]"\r
-\r
- # A non-vowel other than w, x, or Y\r
- CW = "[^aeiouywxY]"\r
-\r
- # Doubles created when adding a suffix: these are undoubled when stemmed\r
- Double = "(bb|dd|ff|gg|mm|nn|pp|rr|tt)"\r
-\r
- # A valid letter that can come before 'li' (or 'ly')\r
- Valid_LI = "[cdeghkmnrt]"\r
-\r
- # A specification for a short syllable.\r
- #\r
- # A short syllable in a word is either: \r
- # 1. a vowel followed by a non-vowel other than w, x or Y and preceded by a non-vowel, or \r
- # 2. a vowel at the beginning of the word followed by a non-vowel.\r
- #\r
- # (The original document is silent on whether sequences of two or more non-vowels make a\r
- # syllable long. But as this specification is only used to find sequences of non-vowel -\r
- # vowel - non-vowel - end-of-word, this ambiguity does not have an effect.)\r
- SHORT_SYLLABLE = "((#{C}#{V}#{CW})|(^#{V}#{C}))"\r
-\r
- # Suffix transformations used in porter2_step2.\r
- # (ogi, li endings dealt with in procedure)\r
- STEP_2_MAPS = {"tional" => "tion",\r
- "enci" => "ence",\r
- "anci" => "ance",\r
- "abli" => "able",\r
- "entli" => "ent",\r
- "ization" => "ize",\r
- "izer" => "ize",\r
- "ational" => "ate",\r
- "ation" => "ate",\r
- "ator" => "ate",\r
- "alism" => "al",\r
- "aliti" => "al",\r
- "alli" => "al",\r
- "fulness" => "ful",\r
- "ousli" => "ous",\r
- "ousness" => "ous",\r
- "iveness" => "ive",\r
- "iviti" => "ive",\r
- "biliti" => "ble",\r
- "bli" => "ble",\r
- "fulli" => "ful",\r
- "lessli" => "less" }\r
-\r
- # Suffix transformations used in porter2_step3.\r
- # (ative ending dealt with in procedure) \r
- STEP_3_MAPS = {"tional" => "tion",\r
- "ational" => "ate",\r
- "alize" => "al",\r
- "icate" => "ic",\r
- "iciti" => "ic",\r
- "ical" => "ic",\r
- "ful" => "",\r
- "ness" => "" }\r
- \r
- # Suffix transformations used in porter2_step4.\r
- # (ion ending dealt with in procedure)\r
- STEP_4_MAPS = {"al" => "",\r
- "ance" => "",\r
- "ence" => "",\r
- "er" => "",\r
- "ic" => "",\r
- "able" => "",\r
- "ible" => "",\r
- "ant" => "",\r
- "ement" => "",\r
- "ment" => "",\r
- "ent" => "",\r
- "ism" => "",\r
- "ate" => "",\r
- "iti" => "",\r
- "ous" => "",\r
- "ive" => "",\r
- "ize" => "" }\r
- \r
- # Special-case stemmings \r
- SPECIAL_CASES = {"skis" => "ski",\r
- "skies" => "sky",\r
- \r
- "dying" => "die",\r
- "lying" => "lie",\r
- "tying" => "tie",\r
- "idly" => "idl",\r
- "gently" => "gentl",\r
- "ugly" => "ugli",\r
- "early" => "earli",\r
- "only" => "onli",\r
- "singly" =>"singl",\r
- \r
- "sky" => "sky",\r
- "news" => "news",\r
- "howe" => "howe",\r
- "atlas" => "atlas",\r
- "cosmos" => "cosmos",\r
- "bias" => "bias",\r
- "andes" => "andes" }\r
- \r
- # Special case words to stop processing after step 1a.\r
- STEP_1A_SPECIAL_CASES = %w[ inning outing canning herring earring proceed exceed succeed ]\r
-\r
-end\r
-\r