Gemified
[porter2stemmer.git] / lib / porter2stemmer / constants.rb
1 # coding: utf-8
2
3 # Constants for the Porter 2 stemmer
4 module Porter2
5
6 # A non-vowel
7 C = "[^aeiouy]"
8
9 # A vowel: a e i o u y
10 V = "[aeiouy]"
11
12 # A non-vowel other than w, x, or Y
13 CW = "[^aeiouywxY]"
14
15 # Doubles created when adding a suffix: these are undoubled when stemmed
16 Double = "(bb|dd|ff|gg|mm|nn|pp|rr|tt)"
17
18 # A valid letter that can come before 'li' (or 'ly')
19 Valid_LI = "[cdeghkmnrt]"
20
21 # A specification for a short syllable.
22 #
23 # A short syllable in a word is either:
24 # 1. a vowel followed by a non-vowel other than w, x or Y and preceded by a non-vowel, or
25 # 2. a vowel at the beginning of the word followed by a non-vowel.
26 #
27 # (The original document is silent on whether sequences of two or more non-vowels make a
28 # syllable long. But as this specification is only used to find sequences of non-vowel -
29 # vowel - non-vowel - end-of-word, this ambiguity does not have an effect.)
30 SHORT_SYLLABLE = "((#{C}#{V}#{CW})|(^#{V}#{C}))"
31
32 # Suffix transformations used in porter2_step2.
33 # (ogi, li endings dealt with in procedure)
34 STEP_2_MAPS = {"tional" => "tion",
35 "enci" => "ence",
36 "anci" => "ance",
37 "abli" => "able",
38 "entli" => "ent",
39 "ization" => "ize",
40 "izer" => "ize",
41 "ational" => "ate",
42 "ation" => "ate",
43 "ator" => "ate",
44 "alism" => "al",
45 "aliti" => "al",
46 "alli" => "al",
47 "fulness" => "ful",
48 "ousli" => "ous",
49 "ousness" => "ous",
50 "iveness" => "ive",
51 "iviti" => "ive",
52 "biliti" => "ble",
53 "bli" => "ble",
54 "fulli" => "ful",
55 "lessli" => "less" }
56
57 # Suffix transformations used in porter2_step3.
58 # (ative ending dealt with in procedure)
59 STEP_3_MAPS = {"tional" => "tion",
60 "ational" => "ate",
61 "alize" => "al",
62 "icate" => "ic",
63 "iciti" => "ic",
64 "ical" => "ic",
65 "ful" => "",
66 "ness" => "" }
67
68 # Suffix transformations used in porter2_step4.
69 # (ion ending dealt with in procedure)
70 STEP_4_MAPS = {"al" => "",
71 "ance" => "",
72 "ence" => "",
73 "er" => "",
74 "ic" => "",
75 "able" => "",
76 "ible" => "",
77 "ant" => "",
78 "ement" => "",
79 "ment" => "",
80 "ent" => "",
81 "ism" => "",
82 "ate" => "",
83 "iti" => "",
84 "ous" => "",
85 "ive" => "",
86 "ize" => "" }
87
88 # Special-case stemmings
89 SPECIAL_CASES = {"skis" => "ski",
90 "skies" => "sky",
91
92 "dying" => "die",
93 "lying" => "lie",
94 "tying" => "tie",
95 "idly" => "idl",
96 "gently" => "gentl",
97 "ugly" => "ugli",
98 "early" => "earli",
99 "only" => "onli",
100 "singly" =>"singl",
101
102 "sky" => "sky",
103 "news" => "news",
104 "howe" => "howe",
105 "atlas" => "atlas",
106 "cosmos" => "cosmos",
107 "bias" => "bias",
108 "andes" => "andes" }
109
110 # Special case words to stop processing after step 1a.
111 STEP_1A_SPECIAL_CASES = %w[ inning outing canning herring earring proceed exceed succeed ]
112
113 end
114