Test::Unit::TestCase
The full set of test words from snowball.tartarus.org/algorithms/english/stemmer.html
Test words with -ise suffixes (and similar), to test how British English is stemmed
# File test/tc_porter2_parts.rb, line 38 38: def test_R1 39: assert_equal "iful", "beautiful".porter2_r1 40: assert_equal "y", "beauty".porter2_r1 41: assert_equal "", "beau".porter2_r1 42: assert_equal "imadversion", "animadversion".porter2_r1 43: assert_equal "kled", "sprinkled".porter2_r1 44: assert_equal "harist", "eucharist".porter2_r1 45: 46: # special cases 47: assert_equal "ate", "generate".porter2_r1 48: assert_equal "ates", "generates".porter2_r1 49: assert_equal "ated", "generated".porter2_r1 50: assert_equal "al", "general".porter2_r1 51: assert_equal "ally", "generally".porter2_r1 52: assert_equal "ic", "generic".porter2_r1 53: assert_equal "ically", "generically".porter2_r1 54: assert_equal "ous", "generous".porter2_r1 55: assert_equal "ously", "generously".porter2_r1 56: 57: assert_equal "al", "communal".porter2_r1 58: assert_equal "ity", "community".porter2_r1 59: assert_equal "e", "commune".porter2_r1 60: 61: assert_equal "ic", "arsenic".porter2_r1 62: assert_equal "al", "arsenal".porter2_r1 63: end
# File test/tc_porter2_parts.rb, line 65 65: def test_R2 66: assert_equal "ul", "beautiful".porter2_r2 67: assert_equal "", "beauty".porter2_r2 68: assert_equal "", "beau".porter2_r2 69: assert_equal "adversion", "animadversion".porter2_r2 70: assert_equal "", "sprinkled".porter2_r2 71: assert_equal "ist", "eucharist".porter2_r2 72: end
# File test/tc_porter2_parts.rb, line 74 74: def test_ends_with_short_syllable? 75: assert_equal true, "rap".porter2_ends_with_short_syllable? 76: assert_equal true, "trap".porter2_ends_with_short_syllable? 77: assert_equal true, "entrap".porter2_ends_with_short_syllable? 78: assert_equal true, "ow".porter2_ends_with_short_syllable? 79: assert_equal true, "on".porter2_ends_with_short_syllable? 80: assert_equal true, "at".porter2_ends_with_short_syllable? 81: assert_equal false, "uproot".porter2_ends_with_short_syllable? 82: assert_equal false, "bestow".porter2_ends_with_short_syllable? 83: assert_equal false, "disturb".porter2_ends_with_short_syllable? 84: end
# File test/tc_porter2_parts.rb, line 86 86: def test_is_short_word? 87: short_words = ] bed shed shred hop ] 88: long_words = ] bead embed beds ] 89: short_words.each do |w| 90: r1 = w.porter2_r1 91: assert_equal true, w.porter2_is_short_word?, 92: "#{w} should be short but classified as long" 93: end 94: long_words.each do |w| 95: r1 = w.porter2_r1 96: assert_equal false, w.porter2_is_short_word?, 97: "#{w} should be long but classified as short" 98: end 99: end
# File test/tc_porter2_parts.rb, line 299 299: def test_porter2_postprocess 300: assert_equal "abac", "abac".porter2_postprocess 301: assert_equal "abacy", "abacy".porter2_postprocess 302: assert_equal "abacy", "abacY".porter2_postprocess 303: assert_equal "aybcy", "aYbcY".porter2_postprocess 304: assert_equal "aybcy", "aYbcy".porter2_postprocess 305: end
# File test/tc_porter2_parts.rb, line 24 24: def test_preprocess 25: assert_equal "abacde", "abacde".porter2_preprocess 26: assert_equal "abacde", "''abacde".porter2_preprocess 27: assert_equal "ab'c'de", "'ab'c'de".porter2_preprocess 28: assert_equal "ab'c'de", "''ab'c'de".porter2_preprocess 29: assert_equal "Yabac", "yabac".porter2_preprocess 30: assert_equal "aYbc", "aybc".porter2_preprocess 31: assert_equal "abacdeY", "abacdey".porter2_preprocess 32: assert_equal "abaYde", "abayde".porter2_preprocess 33: assert_equal "kabaYde", "kabayde".porter2_preprocess 34: assert_equal "kabyaYde", "kabyayde".porter2_preprocess 35: assert_equal "'", "'''".porter2_preprocess 36: end
# File test/tc_porter2_full.rb, line 29519 29519: def test_stemmer 29520: TEST_WORDS.each do |original, stemmed| 29521: assert_equal stemmed, original.stem, 29522: "#{original} should have stemmed to #{stemmed} but got #{original.stem} instead" 29523: end 29524: end
# File test/tc_porter2_full.rb, line 29526 29526: def test_stemmer_engb 29527: TEST_WORDS_ENGB.each do |original, stemmed| 29528: assert_equal stemmed, original.stem(true), 29529: "#{original} should have stemmed to #{stemmed} but got #{original.stem(true)} instead" 29530: end 29531: end
# File test/tc_porter2_parts.rb, line 101 101: def test_step_0 102: assert_equal "abac", "abac".porter2_step0 103: assert_equal "abac", "abac'".porter2_step0 104: assert_equal "abac", "abac's".porter2_step0 105: assert_equal "abac", "abac's'".porter2_step0 106: assert_equal "ab'c", "ab'c".porter2_step0 107: assert_equal "ab'sc", "ab'sc".porter2_step0 108: assert_equal "ab's'c", "ab's'c".porter2_step0 109: assert_equal "ab'sc", "ab'sc's".porter2_step0 110: assert_equal "'", "'".porter2_step0 111: assert_equal "'s", "'s".porter2_step0 112: assert_equal "'s", "'s'".porter2_step0 113: end
# File test/tc_porter2_parts.rb, line 115 115: def test_step_1a 116: assert_equal "abacde", "abacde".porter2_step1a 117: assert_equal "abacess", "abacesses".porter2_step1a 118: assert_equal "tie", "ties".porter2_step1a 119: assert_equal "tie", "tied".porter2_step1a 120: assert_equal "cri", "cries".porter2_step1a 121: assert_equal "cri", "cried".porter2_step1a 122: assert_equal "gas", "gas".porter2_step1a 123: assert_equal "this", "this".porter2_step1a 124: assert_equal "gap", "gaps".porter2_step1a 125: assert_equal "kiwi", "kiwis".porter2_step1a 126: assert_equal "abacus", "abacus".porter2_step1a 127: assert_equal "abacess", "abacess".porter2_step1a 128: end
# File test/tc_porter2_parts.rb, line 130 130: def test_step_1b 131: assert_equal "abacde", "abacde".porter2_step1b 132: words_non_gb = {"luxuriated" => "luxuriate", "luxuriating" => "luxuriate", 133: "hopping" => "hop", "hopped" => "hop", 134: "hoped" => "hope", "hoping" => "hope", 135: "atomized" => "atomize", "atomised" => "atomis", 136: "addicted" => "addict", "bleed" => "bleed" } 137: words_non_gb.each do |original, stemmed| 138: assert_equal stemmed, original.porter2_step1b, 139: "#{original} should have stemmed to #{stemmed} but got #{original.porter2_step1b(original.porter2_r1)} instead" 140: end 141: words_gb = {"luxuriated" => "luxuriate", "luxuriating" => "luxuriate", 142: "hopping" => "hop", "hopped" => "hop", 143: "hoped" => "hope", "hoping" => "hope", 144: "atomized" => "atomize", "atomised" => "atomise", 145: "addicted" => "addict", "bleed" => "bleed" } 146: words_gb.each do |original, stemmed| 147: assert_equal stemmed, original.porter2_step1b(true), 148: "#{original} should have stemmed to #{stemmed} but got #{original.porter2_step1b(original.porter2_r1)} instead" 149: end 150: end
# File test/tc_porter2_parts.rb, line 152 152: def test_step_1c 153: assert_equal "cri", "cry".porter2_step1c 154: assert_equal "by", "by".porter2_step1c 155: assert_equal "saY", "saY".porter2_step1c 156: assert_equal "abbeY", "abbeY".porter2_step1c 157: end
# File test/tc_porter2_parts.rb, line 159 159: def test_step_2 160: assert_equal "abac", "abac".porter2_step2 161: 162: assert_equal "nationalize", "nationalization".porter2_step2 163: assert_equal "nationalisate", "nationalisation".porter2_step2 164: assert_equal "nationalize", "nationalization".porter2_step2(true) 165: assert_equal "nationalise", "nationalisation".porter2_step2(true) 166: # Repeat the steps to ensure that the english-gb behaviour isn't sticky 167: assert_equal "nationalize", "nationalization".porter2_step2(false) 168: assert_equal "nationalisate", "nationalisation".porter2_step2(false) 169: assert_equal "nationalize", "nationalization".porter2_step2 170: assert_equal "nationalisate", "nationalisation".porter2_step2 171: 172: assert_equal "nationalize", "nationalizer".porter2_step2 173: assert_equal "nationaliser", "nationaliser".porter2_step2 174: assert_equal "nationalize", "nationalizer".porter2_step2(true) 175: assert_equal "nationalise", "nationaliser".porter2_step2(true) 176: 177: assert_equal "abaction", "abactional".porter2_step2 178: assert_equal "abacence", "abacenci".porter2_step2 179: assert_equal "abacance", "abacanci".porter2_step2 180: assert_equal "abacable", "abacabli".porter2_step2 181: assert_equal "abacent", "abacentli".porter2_step2 182: assert_equal "abacize", "abacizer".porter2_step2 183: assert_equal "abacize", "abacization".porter2_step2 184: assert_equal "abacate", "abacational".porter2_step2 185: assert_equal "abacate", "abacation".porter2_step2 186: assert_equal "abacate", "abacator".porter2_step2 187: assert_equal "abacal", "abacalism".porter2_step2 188: assert_equal "abacal", "abacaliti".porter2_step2 189: assert_equal "abacal", "abacalli".porter2_step2 190: assert_equal "abacful", "abacfulness".porter2_step2 191: assert_equal "abacous", "abacousli".porter2_step2 192: assert_equal "abacous", "abacousness".porter2_step2 193: assert_equal "abacive", "abaciveness".porter2_step2 194: assert_equal "abacive", "abaciviti".porter2_step2 195: assert_equal "abiliti", "abiliti".porter2_step2 196: assert_equal "abacble", "abacbiliti".porter2_step2 197: assert_equal "abacble", "abacbli".porter2_step2 198: assert_equal "abacful", "abacfulli".porter2_step2 199: assert_equal "abacless", "abaclessli".porter2_step2 200: assert_equal "abaclog", "abaclogi".porter2_step2 201: 202: assert_equal "abac", "abacli".porter2_step2 203: assert_equal "abd", "abdli".porter2_step2 204: assert_equal "abe", "abeli".porter2_step2 205: assert_equal "abg", "abgli".porter2_step2 206: assert_equal "abh", "abhli".porter2_step2 207: assert_equal "abk", "abkli".porter2_step2 208: assert_equal "abm", "abmli".porter2_step2 209: assert_equal "abn", "abnli".porter2_step2 210: assert_equal "abr", "abrli".porter2_step2 211: assert_equal "abt", "abtli".porter2_step2 212: assert_equal "abali", "abali".porter2_step2 213: 214: assert_equal "bad", "badli".porter2_step2 215: assert_equal "fluentli", "fluentli".porter2_step2 216: assert_equal "geolog", "geologi".porter2_step2 217: end
# File test/tc_porter2_parts.rb, line 219 219: def test_step_3 220: assert_equal "abac", "abac".porter2_step3("") 221: 222: assert_equal "national", "nationalize".porter2_step3 223: assert_equal "nationalise", "nationalise".porter2_step3 224: assert_equal "national", "nationalise".porter2_step3(true) 225: # Repeat the steps to ensure that the english-gb behaviour isn't sticky 226: assert_equal "national", "nationalize".porter2_step3(false) 227: assert_equal "nationalise", "nationalise".porter2_step3(false) 228: assert_equal "national", "nationalize".porter2_step3 229: assert_equal "nationalise", "nationalise".porter2_step3 230: 231: assert_equal "abaction", "abactional".porter2_step3 232: assert_equal "abacate", "abacational".porter2_step3 233: assert_equal "abacic", "abacicate".porter2_step3 234: assert_equal "abacic", "abaciciti".porter2_step3 235: assert_equal "abacic", "abacical".porter2_step3 236: assert_equal "abac", "abacful".porter2_step3 237: assert_equal "abac", "abacness".porter2_step3 238: 239: assert_equal "abacabac", "abacabacative".porter2_step3 240: assert_equal "abacabac", "abacabacative".porter2_step3 241: 242: assert_equal "dryness", "dryness".porter2_step3 243: end
# File test/tc_porter2_parts.rb, line 245 245: def test_step_4 246: assert_equal "abac", "abac".porter2_step4("") 247: 248: assert_equal "nation", "nationize".porter2_step4 249: assert_equal "nationise", "nationise".porter2_step4 250: assert_equal "nation", "nationize".porter2_step4(true) 251: assert_equal "nation", "nationise".porter2_step4(true) 252: assert_equal "nation", "nationize".porter2_step4(false) 253: assert_equal "nationise", "nationise".porter2_step4(false) 254: assert_equal "nation", "nationize".porter2_step4() 255: assert_equal "nationise", "nationise".porter2_step4() 256: 257: assert_equal "abac", "abacal".porter2_step4 258: assert_equal "abac", "abacance".porter2_step4 259: assert_equal "abac", "abacence".porter2_step4 260: assert_equal "abac", "abacer".porter2_step4 261: assert_equal "abac", "abacic".porter2_step4 262: assert_equal "abacer", "abacerable".porter2_step4 263: assert_equal "abac", "abacible".porter2_step4 264: assert_equal "abac", "abacant".porter2_step4 265: assert_equal "abac", "abacement".porter2_step4 # Check we handle overlapping suffixes properly 266: assert_equal "abacac", "abacacement".porter2_step4 267: assert_equal "abacac", "abacacment".porter2_step4 268: assert_equal "abac", "abacment".porter2_step4 269: assert_equal "abac", "abacent".porter2_step4 270: assert_equal "abac", "abacism".porter2_step4 271: assert_equal "abac", "abacate".porter2_step4 272: assert_equal "abac", "abaciti".porter2_step4 273: assert_equal "abac", "abacous".porter2_step4 274: assert_equal "abac", "abacive".porter2_step4 275: assert_equal "abac", "abacize".porter2_step4 276: assert_equal "abacion", "abacion".porter2_step4 277: assert_equal "abacs", "abacsion".porter2_step4 278: assert_equal "abact", "abaction".porter2_step4 279: assert_equal "abction", "abction".porter2_step4 280: assert_equal "ablut", "ablution".porter2_step4 281: assert_equal "agreement", "agreement".porter2_step4 282: 283: assert_equal "abcal", "abcal".porter2_step4 # No removal if suffix isn't in R2 284: end
# File test/tc_porter2_parts.rb, line 286 286: def test_step_5 287: assert_equal "abac", "abac".porter2_step5 288: 289: assert_equal "abacl", "abacll".porter2_step5 290: assert_equal "abcll", "abcll".porter2_step5 291: 292: assert_equal "abc", "abc".porter2_step5 293: assert_equal "abl", "able".porter2_step5 294: assert_equal "abe", "abe".porter2_step5 295: assert_equal "abac", "abace".porter2_step5 296: assert_equal "bawac", "bawace".porter2_step5 297: end
# File test/tc_porter2_parts.rb, line 13 13: def test_tidy 14: assert_equal "abacde", "abacde".porter2_tidy 15: assert_equal "abacde", " abacde ".porter2_tidy 16: assert_equal "abacde", "ABACDE".porter2_tidy 17: assert_equal "ab'cde", "ab‘cde".porter2_tidy 18: assert_equal "ab'cde", "ab’cde".porter2_tidy 19: assert_equal "ab'c'de", "ab’c’de".porter2_tidy 20: assert_equal "ab'c'de", "ab‘c‘de".porter2_tidy 21: assert_equal "''abacde", "’‘abacde".porter2_tidy 22: end
Disabled; run with --debug to generate this.
Generated with the Darkfish Rdoc Generator 1.1.6.