X-Git-Url: https://git.njae.me.uk/?a=blobdiff_plain;f=lib%2Fporter2_constants.rb;fp=lib%2Fporter2_constants.rb;h=0000000000000000000000000000000000000000;hb=c8c08d5fafba205c5a8e4138edb0df059a63de36;hp=f123bc5c0716d96f52e10c21f71a15a26c81888c;hpb=aa5699112116010af665e83447bd7d1a17d0e2f7;p=porter2stemmer.git diff --git a/lib/porter2_constants.rb b/lib/porter2_constants.rb deleted file mode 100644 index f123bc5..0000000 --- a/lib/porter2_constants.rb +++ /dev/null @@ -1,114 +0,0 @@ -# coding: utf-8 - -# Constants for the Porter 2 stemmer -module Porter2 - - # A non-vowel - C = "[^aeiouy]" - - # A vowel: a e i o u y - V = "[aeiouy]" - - # A non-vowel other than w, x, or Y - CW = "[^aeiouywxY]" - - # Doubles created when adding a suffix: these are undoubled when stemmed - Double = "(bb|dd|ff|gg|mm|nn|pp|rr|tt)" - - # A valid letter that can come before 'li' (or 'ly') - Valid_LI = "[cdeghkmnrt]" - - # A specification for a short syllable. - # - # A short syllable in a word is either: - # 1. a vowel followed by a non-vowel other than w, x or Y and preceded by a non-vowel, or - # 2. a vowel at the beginning of the word followed by a non-vowel. - # - # (The original document is silent on whether sequences of two or more non-vowels make a - # syllable long. But as this specification is only used to find sequences of non-vowel - - # vowel - non-vowel - end-of-word, this ambiguity does not have an effect.) - SHORT_SYLLABLE = "((#{C}#{V}#{CW})|(^#{V}#{C}))" - - # Suffix transformations used in porter2_step2. - # (ogi, li endings dealt with in procedure) - STEP_2_MAPS = {"tional" => "tion", - "enci" => "ence", - "anci" => "ance", - "abli" => "able", - "entli" => "ent", - "ization" => "ize", - "izer" => "ize", - "ational" => "ate", - "ation" => "ate", - "ator" => "ate", - "alism" => "al", - "aliti" => "al", - "alli" => "al", - "fulness" => "ful", - "ousli" => "ous", - "ousness" => "ous", - "iveness" => "ive", - "iviti" => "ive", - "biliti" => "ble", - "bli" => "ble", - "fulli" => "ful", - "lessli" => "less" } - - # Suffix transformations used in porter2_step3. - # (ative ending dealt with in procedure) - STEP_3_MAPS = {"tional" => "tion", - "ational" => "ate", - "alize" => "al", - "icate" => "ic", - "iciti" => "ic", - "ical" => "ic", - "ful" => "", - "ness" => "" } - - # Suffix transformations used in porter2_step4. - # (ion ending dealt with in procedure) - STEP_4_MAPS = {"al" => "", - "ance" => "", - "ence" => "", - "er" => "", - "ic" => "", - "able" => "", - "ible" => "", - "ant" => "", - "ement" => "", - "ment" => "", - "ent" => "", - "ism" => "", - "ate" => "", - "iti" => "", - "ous" => "", - "ive" => "", - "ize" => "" } - - # Special-case stemmings - SPECIAL_CASES = {"skis" => "ski", - "skies" => "sky", - - "dying" => "die", - "lying" => "lie", - "tying" => "tie", - "idly" => "idl", - "gently" => "gentl", - "ugly" => "ugli", - "early" => "earli", - "only" => "onli", - "singly" =>"singl", - - "sky" => "sky", - "news" => "news", - "howe" => "howe", - "atlas" => "atlas", - "cosmos" => "cosmos", - "bias" => "bias", - "andes" => "andes" } - - # Special case words to stop processing after step 1a. - STEP_1A_SPECIAL_CASES = %w[ inning outing canning herring earring proceed exceed succeed ] - -end -