Parent

Files

Class Index [+]

Quicksearch

TestPorter2

Constants

TEST_WORDS

The full set of test words from snowball.tartarus.org/algorithms/english/stemmer.html

TEST_WORDS_ENGB

Test words with -ise suffixes (and similar), to test how British English is stemmed

Public Instance Methods

test_R1() click to toggle source
    # File test/tc_porter2_parts.rb, line 38
38:   def test_R1
39:     assert_equal "iful",  "beautiful".porter2_r1
40:     assert_equal "y", "beauty".porter2_r1
41:     assert_equal "", "beau".porter2_r1
42:     assert_equal "imadversion", "animadversion".porter2_r1
43:     assert_equal "kled", "sprinkled".porter2_r1
44:     assert_equal "harist", "eucharist".porter2_r1
45:     
46:     # special cases
47:     assert_equal "ate", "generate".porter2_r1
48:     assert_equal "ates", "generates".porter2_r1
49:     assert_equal "ated", "generated".porter2_r1
50:     assert_equal "al", "general".porter2_r1
51:     assert_equal "ally", "generally".porter2_r1
52:     assert_equal "ic", "generic".porter2_r1
53:     assert_equal "ically", "generically".porter2_r1
54:     assert_equal "ous", "generous".porter2_r1
55:     assert_equal "ously", "generously".porter2_r1
56: 
57:     assert_equal "al", "communal".porter2_r1
58:     assert_equal "ity", "community".porter2_r1
59:     assert_equal "e", "commune".porter2_r1
60:     
61:     assert_equal "ic", "arsenic".porter2_r1
62:     assert_equal "al", "arsenal".porter2_r1
63:   end
test_R2() click to toggle source
    # File test/tc_porter2_parts.rb, line 65
65:   def test_R2
66:     assert_equal "ul",  "beautiful".porter2_r2
67:     assert_equal "", "beauty".porter2_r2
68:     assert_equal "", "beau".porter2_r2
69:     assert_equal "adversion", "animadversion".porter2_r2
70:     assert_equal "", "sprinkled".porter2_r2
71:     assert_equal "ist", "eucharist".porter2_r2
72:   end
test_ends_with_short_syllable?() click to toggle source
    # File test/tc_porter2_parts.rb, line 74
74:   def test_ends_with_short_syllable?
75:     assert_equal true, "rap".porter2_ends_with_short_syllable?
76:     assert_equal true, "trap".porter2_ends_with_short_syllable?
77:     assert_equal true, "entrap".porter2_ends_with_short_syllable?
78:     assert_equal true, "ow".porter2_ends_with_short_syllable? 
79:     assert_equal true, "on".porter2_ends_with_short_syllable?
80:     assert_equal true, "at".porter2_ends_with_short_syllable?
81:     assert_equal false, "uproot".porter2_ends_with_short_syllable? 
82:     assert_equal false, "bestow".porter2_ends_with_short_syllable?
83:     assert_equal false, "disturb".porter2_ends_with_short_syllable?
84:   end
test_is_short_word?() click to toggle source
    # File test/tc_porter2_parts.rb, line 86
86:   def test_is_short_word?
87:     short_words = ] bed shed shred hop ]
88:     long_words = ] bead embed beds ]
89:     short_words.each do |w|
90:       r1 = w.porter2_r1
91:       assert_equal true, w.porter2_is_short_word?, 
92:           "#{w} should be short but classified as long"
93:     end
94:     long_words.each do |w|
95:       r1 = w.porter2_r1
96:       assert_equal false, w.porter2_is_short_word?, 
97:           "#{w} should be long but classified as short"
98:     end
99:   end
test_porter2_postprocess() click to toggle source
     # File test/tc_porter2_parts.rb, line 299
299:   def test_porter2_postprocess
300:     assert_equal "abac", "abac".porter2_postprocess
301:     assert_equal "abacy", "abacy".porter2_postprocess
302:     assert_equal "abacy", "abacY".porter2_postprocess
303:     assert_equal "aybcy", "aYbcY".porter2_postprocess
304:     assert_equal "aybcy", "aYbcy".porter2_postprocess
305:   end
test_preprocess() click to toggle source
    # File test/tc_porter2_parts.rb, line 24
24:   def test_preprocess
25:     assert_equal "abacde", "abacde".porter2_preprocess
26:     assert_equal "abacde", "''abacde".porter2_preprocess
27:     assert_equal "ab'c'de", "'ab'c'de".porter2_preprocess
28:     assert_equal "ab'c'de", "''ab'c'de".porter2_preprocess
29:     assert_equal "Yabac", "yabac".porter2_preprocess
30:     assert_equal "aYbc", "aybc".porter2_preprocess
31:     assert_equal "abacdeY", "abacdey".porter2_preprocess
32:     assert_equal "abaYde", "abayde".porter2_preprocess
33:     assert_equal "kabaYde", "kabayde".porter2_preprocess
34:     assert_equal "kabyaYde", "kabyayde".porter2_preprocess
35:     assert_equal "'", "'''".porter2_preprocess
36:   end
test_stemmer() click to toggle source
       # File test/tc_porter2_full.rb, line 29519
29519:   def test_stemmer
29520:     TEST_WORDS.each do |original, stemmed|
29521:       assert_equal stemmed, original.stem, 
29522:           "#{original} should have stemmed to #{stemmed} but got #{original.stem} instead"
29523:     end
29524:   end
test_stemmer_engb() click to toggle source
       # File test/tc_porter2_full.rb, line 29526
29526:   def test_stemmer_engb
29527:     TEST_WORDS_ENGB.each do |original, stemmed|
29528:       assert_equal stemmed, original.stem(true), 
29529:           "#{original} should have stemmed to #{stemmed} but got #{original.stem(true)} instead"
29530:     end
29531:   end
test_step_0() click to toggle source
     # File test/tc_porter2_parts.rb, line 101
101:   def test_step_0
102:     assert_equal "abac", "abac".porter2_step0
103:     assert_equal "abac", "abac'".porter2_step0
104:     assert_equal "abac", "abac's".porter2_step0
105:     assert_equal "abac", "abac's'".porter2_step0
106:     assert_equal "ab'c", "ab'c".porter2_step0
107:     assert_equal "ab'sc", "ab'sc".porter2_step0
108:     assert_equal "ab's'c", "ab's'c".porter2_step0
109:     assert_equal "ab'sc", "ab'sc's".porter2_step0
110:     assert_equal "'", "'".porter2_step0
111:     assert_equal "'s", "'s".porter2_step0
112:     assert_equal "'s", "'s'".porter2_step0
113:   end
test_step_1a() click to toggle source
     # File test/tc_porter2_parts.rb, line 115
115:   def test_step_1a
116:     assert_equal "abacde", "abacde".porter2_step1a
117:     assert_equal "abacess", "abacesses".porter2_step1a
118:     assert_equal "tie", "ties".porter2_step1a
119:     assert_equal "tie", "tied".porter2_step1a
120:     assert_equal "cri", "cries".porter2_step1a
121:     assert_equal "cri", "cried".porter2_step1a
122:     assert_equal "gas", "gas".porter2_step1a
123:     assert_equal "this", "this".porter2_step1a
124:     assert_equal "gap", "gaps".porter2_step1a
125:     assert_equal "kiwi", "kiwis".porter2_step1a
126:     assert_equal "abacus", "abacus".porter2_step1a
127:     assert_equal "abacess", "abacess".porter2_step1a
128:   end
test_step_1b() click to toggle source
     # File test/tc_porter2_parts.rb, line 130
130:   def test_step_1b
131:     assert_equal "abacde", "abacde".porter2_step1b
132:     words_non_gb = {"luxuriated" => "luxuriate", "luxuriating" => "luxuriate", 
133:              "hopping" => "hop", "hopped" => "hop",
134:              "hoped" => "hope", "hoping" => "hope",
135:              "atomized" => "atomize", "atomised" => "atomis",
136:              "addicted" => "addict", "bleed" => "bleed" }
137:     words_non_gb.each do |original, stemmed|
138:       assert_equal stemmed, original.porter2_step1b, 
139:           "#{original} should have stemmed to #{stemmed} but got #{original.porter2_step1b(original.porter2_r1)} instead"
140:     end
141:     words_gb = {"luxuriated" => "luxuriate", "luxuriating" => "luxuriate", 
142:              "hopping" => "hop", "hopped" => "hop",
143:              "hoped" => "hope", "hoping" => "hope",
144:              "atomized" => "atomize", "atomised" => "atomise",
145:              "addicted" => "addict", "bleed" => "bleed" }
146:     words_gb.each do |original, stemmed|
147:       assert_equal stemmed, original.porter2_step1b(true), 
148:           "#{original} should have stemmed to #{stemmed} but got #{original.porter2_step1b(original.porter2_r1)} instead"
149:     end
150:   end
test_step_1c() click to toggle source
     # File test/tc_porter2_parts.rb, line 152
152:   def test_step_1c
153:     assert_equal "cri", "cry".porter2_step1c
154:     assert_equal "by", "by".porter2_step1c
155:     assert_equal "saY", "saY".porter2_step1c
156:     assert_equal "abbeY", "abbeY".porter2_step1c
157:   end
test_step_2() click to toggle source
     # File test/tc_porter2_parts.rb, line 159
159:   def test_step_2
160:     assert_equal "abac", "abac".porter2_step2
161:     
162:     assert_equal "nationalize", "nationalization".porter2_step2
163:     assert_equal "nationalisate", "nationalisation".porter2_step2
164:     assert_equal "nationalize", "nationalization".porter2_step2(true)
165:     assert_equal "nationalise", "nationalisation".porter2_step2(true)
166:     # Repeat the steps to ensure that the english-gb behaviour isn't sticky
167:     assert_equal "nationalize", "nationalization".porter2_step2(false)
168:     assert_equal "nationalisate", "nationalisation".porter2_step2(false)
169:     assert_equal "nationalize", "nationalization".porter2_step2
170:     assert_equal "nationalisate", "nationalisation".porter2_step2
171:     
172:     assert_equal "nationalize", "nationalizer".porter2_step2
173:     assert_equal "nationaliser", "nationaliser".porter2_step2
174:     assert_equal "nationalize", "nationalizer".porter2_step2(true)
175:     assert_equal "nationalise", "nationaliser".porter2_step2(true)
176:     
177:     assert_equal "abaction", "abactional".porter2_step2
178:     assert_equal "abacence", "abacenci".porter2_step2
179:     assert_equal "abacance", "abacanci".porter2_step2
180:     assert_equal "abacable", "abacabli".porter2_step2
181:     assert_equal "abacent", "abacentli".porter2_step2
182:     assert_equal "abacize", "abacizer".porter2_step2
183:     assert_equal "abacize", "abacization".porter2_step2
184:     assert_equal "abacate", "abacational".porter2_step2
185:     assert_equal "abacate", "abacation".porter2_step2
186:     assert_equal "abacate", "abacator".porter2_step2
187:     assert_equal "abacal", "abacalism".porter2_step2
188:     assert_equal "abacal", "abacaliti".porter2_step2
189:     assert_equal "abacal", "abacalli".porter2_step2
190:     assert_equal "abacful", "abacfulness".porter2_step2
191:     assert_equal "abacous", "abacousli".porter2_step2
192:     assert_equal "abacous", "abacousness".porter2_step2
193:     assert_equal "abacive", "abaciveness".porter2_step2
194:     assert_equal "abacive", "abaciviti".porter2_step2
195:     assert_equal "abiliti", "abiliti".porter2_step2
196:     assert_equal "abacble", "abacbiliti".porter2_step2
197:     assert_equal "abacble", "abacbli".porter2_step2
198:     assert_equal "abacful", "abacfulli".porter2_step2
199:     assert_equal "abacless", "abaclessli".porter2_step2
200:     assert_equal "abaclog", "abaclogi".porter2_step2
201:     
202:     assert_equal "abac", "abacli".porter2_step2
203:     assert_equal "abd", "abdli".porter2_step2
204:     assert_equal "abe", "abeli".porter2_step2
205:     assert_equal "abg", "abgli".porter2_step2
206:     assert_equal "abh", "abhli".porter2_step2
207:     assert_equal "abk", "abkli".porter2_step2
208:     assert_equal "abm", "abmli".porter2_step2
209:     assert_equal "abn", "abnli".porter2_step2
210:     assert_equal "abr", "abrli".porter2_step2
211:     assert_equal "abt", "abtli".porter2_step2
212:     assert_equal "abali", "abali".porter2_step2
213: 
214:     assert_equal "bad", "badli".porter2_step2
215:     assert_equal "fluentli", "fluentli".porter2_step2
216:     assert_equal "geolog", "geologi".porter2_step2
217:   end
test_step_3() click to toggle source
     # File test/tc_porter2_parts.rb, line 219
219:   def test_step_3
220:     assert_equal "abac", "abac".porter2_step3("")
221:     
222:     assert_equal "national", "nationalize".porter2_step3
223:     assert_equal "nationalise", "nationalise".porter2_step3
224:     assert_equal "national", "nationalise".porter2_step3(true)
225:     # Repeat the steps to ensure that the english-gb behaviour isn't sticky
226:     assert_equal "national", "nationalize".porter2_step3(false)
227:     assert_equal "nationalise", "nationalise".porter2_step3(false)
228:     assert_equal "national", "nationalize".porter2_step3
229:     assert_equal "nationalise", "nationalise".porter2_step3
230:     
231:     assert_equal "abaction", "abactional".porter2_step3
232:     assert_equal "abacate", "abacational".porter2_step3
233:     assert_equal "abacic", "abacicate".porter2_step3
234:     assert_equal "abacic", "abaciciti".porter2_step3
235:     assert_equal "abacic", "abacical".porter2_step3
236:     assert_equal "abac", "abacful".porter2_step3
237:     assert_equal "abac", "abacness".porter2_step3
238:     
239:     assert_equal "abacabac", "abacabacative".porter2_step3
240:     assert_equal "abacabac", "abacabacative".porter2_step3
241:   
242:     assert_equal "dryness", "dryness".porter2_step3
243:   end
test_step_4() click to toggle source
     # File test/tc_porter2_parts.rb, line 245
245:   def test_step_4
246:     assert_equal "abac", "abac".porter2_step4("")
247:     
248:     assert_equal "nation", "nationize".porter2_step4
249:     assert_equal "nationise", "nationise".porter2_step4
250:     assert_equal "nation", "nationize".porter2_step4(true)
251:     assert_equal "nation", "nationise".porter2_step4(true)
252:     assert_equal "nation", "nationize".porter2_step4(false)
253:     assert_equal "nationise", "nationise".porter2_step4(false)
254:     assert_equal "nation", "nationize".porter2_step4()
255:     assert_equal "nationise", "nationise".porter2_step4()
256:     
257:     assert_equal "abac", "abacal".porter2_step4
258:     assert_equal "abac", "abacance".porter2_step4
259:     assert_equal "abac", "abacence".porter2_step4
260:     assert_equal "abac", "abacer".porter2_step4
261:     assert_equal "abac", "abacic".porter2_step4
262:     assert_equal "abacer", "abacerable".porter2_step4
263:     assert_equal "abac", "abacible".porter2_step4
264:     assert_equal "abac", "abacant".porter2_step4
265:     assert_equal "abac", "abacement".porter2_step4      # Check we handle overlapping suffixes properly
266:     assert_equal "abacac", "abacacement".porter2_step4
267:     assert_equal "abacac", "abacacment".porter2_step4
268:     assert_equal "abac", "abacment".porter2_step4
269:     assert_equal "abac", "abacent".porter2_step4
270:     assert_equal "abac", "abacism".porter2_step4
271:     assert_equal "abac", "abacate".porter2_step4
272:     assert_equal "abac", "abaciti".porter2_step4
273:     assert_equal "abac", "abacous".porter2_step4
274:     assert_equal "abac", "abacive".porter2_step4
275:     assert_equal "abac", "abacize".porter2_step4
276:     assert_equal "abacion", "abacion".porter2_step4
277:     assert_equal "abacs", "abacsion".porter2_step4
278:     assert_equal "abact", "abaction".porter2_step4
279:     assert_equal "abction", "abction".porter2_step4
280:     assert_equal "ablut", "ablution".porter2_step4
281:     assert_equal "agreement", "agreement".porter2_step4
282:     
283:     assert_equal "abcal", "abcal".porter2_step4 # No removal if suffix isn't in R2
284:   end
test_step_5() click to toggle source
     # File test/tc_porter2_parts.rb, line 286
286:   def test_step_5
287:     assert_equal "abac", "abac".porter2_step5
288:     
289:     assert_equal "abacl", "abacll".porter2_step5
290:     assert_equal "abcll", "abcll".porter2_step5
291:     
292:     assert_equal "abc", "abc".porter2_step5
293:     assert_equal "abl", "able".porter2_step5
294:     assert_equal "abe", "abe".porter2_step5
295:     assert_equal "abac", "abace".porter2_step5
296:     assert_equal "bawac", "bawace".porter2_step5
297:   end
test_tidy() click to toggle source
    # File test/tc_porter2_parts.rb, line 13
13:   def test_tidy
14:     assert_equal "abacde", "abacde".porter2_tidy
15:     assert_equal "abacde", "  abacde  ".porter2_tidy
16:     assert_equal "abacde", "ABACDE".porter2_tidy
17:     assert_equal "ab'cde", "ab‘cde".porter2_tidy
18:     assert_equal "ab'cde", "ab’cde".porter2_tidy
19:     assert_equal "ab'c'de", "ab’c’de".porter2_tidy
20:     assert_equal "ab'c'de", "ab‘c‘de".porter2_tidy
21:     assert_equal "''abacde", "’‘abacde".porter2_tidy
22:   end

Disabled; run with --debug to generate this.

[Validate]

Generated with the Darkfish Rdoc Generator 1.1.6.