X-Git-Url: https://git.njae.me.uk/?a=blobdiff_plain;ds=inline;f=doc%2FTestPorter2.html;h=137ff2ac7d9305da2491ec208559ae346a1c3570;hb=49dc03eafbc7aa52de0caa3b3c0d5b5f25ce311d;hp=1bd45c0245e2ba9a86d2642a38a8e172c8887f21;hpb=9dd2505747fbbd6040e9516dd0ae2e8ec5c6a4d2;p=porter2stemmer.git diff --git a/doc/TestPorter2.html b/doc/TestPorter2.html index 1bd45c0..137ff2a 100644 --- a/doc/TestPorter2.html +++ b/doc/TestPorter2.html @@ -72,11 +72,11 @@
+The full set of test words from snowball.tartarus.org/algorithms/english/stemmer.html +
+Test words with -ise suffixes (and similar), to test how British English is +stemmed +
- # File test/tc_porter2_parts.rb, line 69 -69: def test_ends_with_short_syllable? -70: assert_equal true, "rap".porter2_ends_with_short_syllable? -71: assert_equal true, "trap".porter2_ends_with_short_syllable? -72: assert_equal true, "entrap".porter2_ends_with_short_syllable? -73: assert_equal true, "ow".porter2_ends_with_short_syllable? -74: assert_equal true, "on".porter2_ends_with_short_syllable? -75: assert_equal true, "at".porter2_ends_with_short_syllable? -76: assert_equal false, "uproot".porter2_ends_with_short_syllable? -77: assert_equal false, "bestow".porter2_ends_with_short_syllable? -78: assert_equal false, "disturb".porter2_ends_with_short_syllable? -79: end+ # File test/tc_porter2_parts.rb, line 38 +38: def test_R1 +39: assert_equal "iful", "beautiful".porter2_r1 +40: assert_equal "y", "beauty".porter2_r1 +41: assert_equal "", "beau".porter2_r1 +42: assert_equal "imadversion", "animadversion".porter2_r1 +43: assert_equal "kled", "sprinkled".porter2_r1 +44: assert_equal "harist", "eucharist".porter2_r1 +45: +46: # special cases +47: assert_equal "ate", "generate".porter2_r1 +48: assert_equal "ates", "generates".porter2_r1 +49: assert_equal "ated", "generated".porter2_r1 +50: assert_equal "al", "general".porter2_r1 +51: assert_equal "ally", "generally".porter2_r1 +52: assert_equal "ic", "generic".porter2_r1 +53: assert_equal "ically", "generically".porter2_r1 +54: assert_equal "ous", "generous".porter2_r1 +55: assert_equal "ously", "generously".porter2_r1 +56: +57: assert_equal "al", "communal".porter2_r1 +58: assert_equal "ity", "community".porter2_r1 +59: assert_equal "e", "commune".porter2_r1 +60: +61: assert_equal "ic", "arsenic".porter2_r1 +62: assert_equal "al", "arsenal".porter2_r1 +63: end
- # File test/tc_porter2_parts.rb, line 42 -42: def test_find_R1 -43: assert_equal "iful", "beautiful".porter2_r1 -44: assert_equal "y", "beauty".porter2_r1 -45: assert_equal "", "beau".porter2_r1 -46: assert_equal "imadversion", "animadversion".porter2_r1 -47: assert_equal "kled", "sprinkled".porter2_r1 -48: assert_equal "harist", "eucharist".porter2_r1 -49: -50: # special cases -51: assert_equal "ate", "generate".porter2_r1 -52: assert_equal "ates", "generates".porter2_r1 -53: assert_equal "ated", "generated".porter2_r1 -54: assert_equal "al", "general".porter2_r1 -55: assert_equal "ally", "generally".porter2_r1 -56: assert_equal "ic", "generic".porter2_r1 -57: assert_equal "ically", "generically".porter2_r1 -58: assert_equal "ous", "generous".porter2_r1 -59: assert_equal "ously", "generously".porter2_r1 -60: -61: assert_equal "al", "communal".porter2_r1 -62: assert_equal "ity", "community".porter2_r1 -63: assert_equal "e", "commune".porter2_r1 -64: -65: assert_equal "ic", "arsenic".porter2_r1 -66: assert_equal "al", "arsenal".porter2_r1 -67: end+ # File test/tc_porter2_parts.rb, line 65 +65: def test_R2 +66: assert_equal "ul", "beautiful".porter2_r2 +67: assert_equal "", "beauty".porter2_r2 +68: assert_equal "", "beau".porter2_r2 +69: assert_equal "adversion", "animadversion".porter2_r2 +70: assert_equal "", "sprinkled".porter2_r2 +71: assert_equal "ist", "eucharist".porter2_r2 +72: end
- # File test/tc_porter2_parts.rb, line 96 - 96: def test_find_R2 - 97: assert_equal "ul", "beautiful".porter2_r2 - 98: assert_equal "", "beauty".porter2_r2 - 99: assert_equal "", "beau".porter2_r2 -100: assert_equal "adversion", "animadversion".porter2_r2 -101: assert_equal "", "sprinkled".porter2_r2 -102: assert_equal "ist", "eucharist".porter2_r2 -103: end+ # File test/tc_porter2_parts.rb, line 74 +74: def test_ends_with_short_syllable? +75: assert_equal true, "rap".porter2_ends_with_short_syllable? +76: assert_equal true, "trap".porter2_ends_with_short_syllable? +77: assert_equal true, "entrap".porter2_ends_with_short_syllable? +78: assert_equal true, "ow".porter2_ends_with_short_syllable? +79: assert_equal true, "on".porter2_ends_with_short_syllable? +80: assert_equal true, "at".porter2_ends_with_short_syllable? +81: assert_equal false, "uproot".porter2_ends_with_short_syllable? +82: assert_equal false, "bestow".porter2_ends_with_short_syllable? +83: assert_equal false, "disturb".porter2_ends_with_short_syllable? +84: end
- # File test/tc_porter2_parts.rb, line 81 -81: def test_is_short_word? -82: short_words = ] bed shed shred hop ] -83: long_words = ] bead embed beds ] -84: short_words.each do |w| -85: r1 = w.porter2_r1 -86: assert_equal true, w.porter2_is_short_word?, -87: "#{w} should be short but classified as long" -88: end -89: long_words.each do |w| + # File test/tc_porter2_parts.rb, line 86 +86: def test_is_short_word? +87: short_words = ] bed shed shred hop ] +88: long_words = ] bead embed beds ] +89: short_words.each do |w| 90: r1 = w.porter2_r1 -91: assert_equal false, w.porter2_is_short_word?, -92: "#{w} should be long but classified as short" +91: assert_equal true, w.porter2_is_short_word?, +92: "#{w} should be short but classified as long" 93: end -94: end+94: long_words.each do |w| +95: r1 = w.porter2_r1 +96: assert_equal false, w.porter2_is_short_word?, +97: "#{w} should be long but classified as short" +98: end +99: end
- # File test/tc_porter2_parts.rb, line 303 -303: def test_porter2_postprocess -304: assert_equal "abac", "abac".porter2_postprocess -305: assert_equal "abacy", "abacy".porter2_postprocess -306: assert_equal "abacy", "abacY".porter2_postprocess -307: assert_equal "aybcy", "aYbcY".porter2_postprocess -308: assert_equal "aybcy", "aYbcy".porter2_postprocess -309: end+ # File test/tc_porter2_parts.rb, line 299 +299: def test_porter2_postprocess +300: assert_equal "abac", "abac".porter2_postprocess +301: assert_equal "abacy", "abacy".porter2_postprocess +302: assert_equal "abacy", "abacY".porter2_postprocess +303: assert_equal "aybcy", "aYbcY".porter2_postprocess +304: assert_equal "aybcy", "aYbcy".porter2_postprocess +305: end
- # File test/tc_porter2_parts.rb, line 29 -29: def test_preprocess -30: assert_equal "abacde", "abacde".porter2_preprocess -31: assert_equal "abacde", "''abacde".porter2_preprocess -32: assert_equal "ab'c'de", "'ab'c'de".porter2_preprocess -33: assert_equal "ab'c'de", "''ab'c'de".porter2_preprocess -34: assert_equal "Yabac", "yabac".porter2_preprocess -35: assert_equal "aYbc", "aybc".porter2_preprocess -36: assert_equal "abacdeY", "abacdey".porter2_preprocess -37: assert_equal "abaYde", "abayde".porter2_preprocess -38: assert_equal "kabaYde", "kabayde".porter2_preprocess -39: assert_equal "'", "'''".porter2_preprocess -40: end+ # File test/tc_porter2_parts.rb, line 24 +24: def test_preprocess +25: assert_equal "abacde", "abacde".porter2_preprocess +26: assert_equal "abacde", "''abacde".porter2_preprocess +27: assert_equal "ab'c'de", "'ab'c'de".porter2_preprocess +28: assert_equal "ab'c'de", "''ab'c'de".porter2_preprocess +29: assert_equal "Yabac", "yabac".porter2_preprocess +30: assert_equal "aYbc", "aybc".porter2_preprocess +31: assert_equal "abacdeY", "abacdey".porter2_preprocess +32: assert_equal "abaYde", "abayde".porter2_preprocess +33: assert_equal "kabaYde", "kabayde".porter2_preprocess +34: assert_equal "kabyaYde", "kabyayde".porter2_preprocess +35: assert_equal "'", "'''".porter2_preprocess +36: end
- # File test/tc_porter2_full.rb, line 29521 -29521: def test_stemmer -29522: TEST_WORDS.each do |original, stemmed| -29523: assert_equal stemmed, original.stem, -29524: "#{original} should have stemmed to #{stemmed} but got #{original.stem} instead" -29525: end -29526: end+ # File test/tc_porter2_full.rb, line 29519 +29519: def test_stemmer +29520: TEST_WORDS.each do |original, stemmed| +29521: assert_equal stemmed, original.stem, +29522: "#{original} should have stemmed to #{stemmed} but got #{original.stem} instead" +29523: end +29524: end
- # File test/tc_porter2_full.rb, line 29528 -29528: def test_stemmer_engb -29529: TEST_WORDS_ENGB.each do |original, stemmed| -29530: assert_equal stemmed, original.stem(true), -29531: "#{original} should have stemmed to #{stemmed} but got #{original.stem(true)} instead" -29532: end -29533: end+ # File test/tc_porter2_full.rb, line 29526 +29526: def test_stemmer_engb +29527: TEST_WORDS_ENGB.each do |original, stemmed| +29528: assert_equal stemmed, original.stem(true), +29529: "#{original} should have stemmed to #{stemmed} but got #{original.stem(true)} instead" +29530: end +29531: end
- # File test/tc_porter2_parts.rb, line 105 -105: def test_step_0 -106: assert_equal "abac", "abac".step_0 -107: assert_equal "abac", "abac'".step_0 -108: assert_equal "abac", "abac's".step_0 -109: assert_equal "abac", "abac's'".step_0 -110: assert_equal "ab'c", "ab'c".step_0 -111: assert_equal "ab'sc", "ab'sc".step_0 -112: assert_equal "ab's'c", "ab's'c".step_0 -113: assert_equal "ab'sc", "ab'sc's".step_0 -114: assert_equal "'", "'".step_0 -115: assert_equal "'s", "'s".step_0 -116: assert_equal "'s", "'s'".step_0 -117: end+ # File test/tc_porter2_parts.rb, line 101 +101: def test_step_0 +102: assert_equal "abac", "abac".porter2_step0 +103: assert_equal "abac", "abac'".porter2_step0 +104: assert_equal "abac", "abac's".porter2_step0 +105: assert_equal "abac", "abac's'".porter2_step0 +106: assert_equal "ab'c", "ab'c".porter2_step0 +107: assert_equal "ab'sc", "ab'sc".porter2_step0 +108: assert_equal "ab's'c", "ab's'c".porter2_step0 +109: assert_equal "ab'sc", "ab'sc's".porter2_step0 +110: assert_equal "'", "'".porter2_step0 +111: assert_equal "'s", "'s".porter2_step0 +112: assert_equal "'s", "'s'".porter2_step0 +113: end
- # File test/tc_porter2_parts.rb, line 119 -119: def test_step_1a -120: assert_equal "abacde", "abacde".step_1a -121: assert_equal "abacess", "abacesses".step_1a -122: assert_equal "tie", "ties".step_1a -123: assert_equal "tie", "tied".step_1a -124: assert_equal "cri", "cries".step_1a -125: assert_equal "cri", "cried".step_1a -126: assert_equal "gas", "gas".step_1a -127: assert_equal "this", "this".step_1a -128: assert_equal "gap", "gaps".step_1a -129: assert_equal "kiwi", "kiwis".step_1a -130: assert_equal "abacus", "abacus".step_1a -131: assert_equal "abacess", "abacess".step_1a -132: end+ # File test/tc_porter2_parts.rb, line 115 +115: def test_step_1a +116: assert_equal "abacde", "abacde".porter2_step1a +117: assert_equal "abacess", "abacesses".porter2_step1a +118: assert_equal "tie", "ties".porter2_step1a +119: assert_equal "tie", "tied".porter2_step1a +120: assert_equal "cri", "cries".porter2_step1a +121: assert_equal "cri", "cried".porter2_step1a +122: assert_equal "gas", "gas".porter2_step1a +123: assert_equal "this", "this".porter2_step1a +124: assert_equal "gap", "gaps".porter2_step1a +125: assert_equal "kiwi", "kiwis".porter2_step1a +126: assert_equal "abacus", "abacus".porter2_step1a +127: assert_equal "abacess", "abacess".porter2_step1a +128: end
- # File test/tc_porter2_parts.rb, line 134 -134: def test_step_1b -135: assert_equal "abacde", "abacde".step_1b -136: words_non_gb = {"luxuriated" => "luxuriate", "luxuriating" => "luxuriate", -137: "hopping" => "hop", "hopped" => "hop", -138: "hoped" => "hope", "hoping" => "hope", -139: "atomized" => "atomize", "atomised" => "atomis", -140: "addicted" => "addict", "bleed" => "bleed" } -141: words_non_gb.each do |original, stemmed| -142: assert_equal stemmed, original.step_1b, -143: "#{original} should have stemmed to #{stemmed} but got #{original.step_1b(original.porter2_r1)} instead" -144: end -145: words_gb = {"luxuriated" => "luxuriate", "luxuriating" => "luxuriate", -146: "hopping" => "hop", "hopped" => "hop", -147: "hoped" => "hope", "hoping" => "hope", -148: "atomized" => "atomize", "atomised" => "atomise", -149: "addicted" => "addict", "bleed" => "bleed" } -150: words_gb.each do |original, stemmed| -151: assert_equal stemmed, original.step_1b(true), -152: "#{original} should have stemmed to #{stemmed} but got #{original.step_1b(original.porter2_r1)} instead" -153: end -154: end+ # File test/tc_porter2_parts.rb, line 130 +130: def test_step_1b +131: assert_equal "abacde", "abacde".porter2_step1b +132: words_non_gb = {"luxuriated" => "luxuriate", "luxuriating" => "luxuriate", +133: "hopping" => "hop", "hopped" => "hop", +134: "hoped" => "hope", "hoping" => "hope", +135: "atomized" => "atomize", "atomised" => "atomis", +136: "addicted" => "addict", "bleed" => "bleed" } +137: words_non_gb.each do |original, stemmed| +138: assert_equal stemmed, original.porter2_step1b, +139: "#{original} should have stemmed to #{stemmed} but got #{original.porter2_step1b(original.porter2_r1)} instead" +140: end +141: words_gb = {"luxuriated" => "luxuriate", "luxuriating" => "luxuriate", +142: "hopping" => "hop", "hopped" => "hop", +143: "hoped" => "hope", "hoping" => "hope", +144: "atomized" => "atomize", "atomised" => "atomise", +145: "addicted" => "addict", "bleed" => "bleed" } +146: words_gb.each do |original, stemmed| +147: assert_equal stemmed, original.porter2_step1b(true), +148: "#{original} should have stemmed to #{stemmed} but got #{original.porter2_step1b(original.porter2_r1)} instead" +149: end +150: end
- # File test/tc_porter2_parts.rb, line 156 -156: def test_step_1c -157: assert_equal "cri", "cry".step_1c -158: assert_equal "by", "by".step_1c -159: assert_equal "saY", "saY".step_1c -160: assert_equal "abbeY", "abbeY".step_1c -161: end+ # File test/tc_porter2_parts.rb, line 152 +152: def test_step_1c +153: assert_equal "cri", "cry".porter2_step1c +154: assert_equal "by", "by".porter2_step1c +155: assert_equal "saY", "saY".porter2_step1c +156: assert_equal "abbeY", "abbeY".porter2_step1c +157: end
- # File test/tc_porter2_parts.rb, line 163 -163: def test_step_2 -164: assert_equal "abac", "abac".step_2 -165: -166: assert_equal "nationalize", "nationalization".step_2 -167: assert_equal "nationalisate", "nationalisation".step_2 -168: assert_equal "nationalize", "nationalization".step_2(true) -169: assert_equal "nationalise", "nationalisation".step_2(true) -170: # Repeat the steps to ensure that the english-gb behaviour isn't sticky -171: assert_equal "nationalize", "nationalization".step_2(false) -172: assert_equal "nationalisate", "nationalisation".step_2(false) -173: assert_equal "nationalize", "nationalization".step_2 -174: assert_equal "nationalisate", "nationalisation".step_2 -175: -176: assert_equal "nationalize", "nationalizer".step_2 -177: assert_equal "nationaliser", "nationaliser".step_2 -178: assert_equal "nationalize", "nationalizer".step_2(true) -179: assert_equal "nationalise", "nationaliser".step_2(true) -180: -181: assert_equal "abaction", "abactional".step_2 -182: assert_equal "abacence", "abacenci".step_2 -183: assert_equal "abacance", "abacanci".step_2 -184: assert_equal "abacable", "abacabli".step_2 -185: assert_equal "abacent", "abacentli".step_2 -186: assert_equal "abacize", "abacizer".step_2 -187: assert_equal "abacize", "abacization".step_2 -188: assert_equal "abacate", "abacational".step_2 -189: assert_equal "abacate", "abacation".step_2 -190: assert_equal "abacate", "abacator".step_2 -191: assert_equal "abacal", "abacalism".step_2 -192: assert_equal "abacal", "abacaliti".step_2 -193: assert_equal "abacal", "abacalli".step_2 -194: assert_equal "abacful", "abacfulness".step_2 -195: assert_equal "abacous", "abacousli".step_2 -196: assert_equal "abacous", "abacousness".step_2 -197: assert_equal "abacive", "abaciveness".step_2 -198: assert_equal "abacive", "abaciviti".step_2 -199: assert_equal "abiliti", "abiliti".step_2 -200: assert_equal "abacble", "abacbiliti".step_2 -201: assert_equal "abacble", "abacbli".step_2 -202: assert_equal "abacful", "abacfulli".step_2 -203: assert_equal "abacless", "abaclessli".step_2 -204: assert_equal "abaclog", "abaclogi".step_2 -205: -206: assert_equal "abac", "abacli".step_2 -207: assert_equal "abd", "abdli".step_2 -208: assert_equal "abe", "abeli".step_2 -209: assert_equal "abg", "abgli".step_2 -210: assert_equal "abh", "abhli".step_2 -211: assert_equal "abk", "abkli".step_2 -212: assert_equal "abm", "abmli".step_2 -213: assert_equal "abn", "abnli".step_2 -214: assert_equal "abr", "abrli".step_2 -215: assert_equal "abt", "abtli".step_2 -216: assert_equal "abali", "abali".step_2 -217: -218: assert_equal "bad", "badli".step_2 -219: assert_equal "fluentli", "fluentli".step_2 -220: assert_equal "geolog", "geologi".step_2 -221: end+ # File test/tc_porter2_parts.rb, line 159 +159: def test_step_2 +160: assert_equal "abac", "abac".porter2_step2 +161: +162: assert_equal "nationalize", "nationalization".porter2_step2 +163: assert_equal "nationalisate", "nationalisation".porter2_step2 +164: assert_equal "nationalize", "nationalization".porter2_step2(true) +165: assert_equal "nationalise", "nationalisation".porter2_step2(true) +166: # Repeat the steps to ensure that the english-gb behaviour isn't sticky +167: assert_equal "nationalize", "nationalization".porter2_step2(false) +168: assert_equal "nationalisate", "nationalisation".porter2_step2(false) +169: assert_equal "nationalize", "nationalization".porter2_step2 +170: assert_equal "nationalisate", "nationalisation".porter2_step2 +171: +172: assert_equal "nationalize", "nationalizer".porter2_step2 +173: assert_equal "nationaliser", "nationaliser".porter2_step2 +174: assert_equal "nationalize", "nationalizer".porter2_step2(true) +175: assert_equal "nationalise", "nationaliser".porter2_step2(true) +176: +177: assert_equal "abaction", "abactional".porter2_step2 +178: assert_equal "abacence", "abacenci".porter2_step2 +179: assert_equal "abacance", "abacanci".porter2_step2 +180: assert_equal "abacable", "abacabli".porter2_step2 +181: assert_equal "abacent", "abacentli".porter2_step2 +182: assert_equal "abacize", "abacizer".porter2_step2 +183: assert_equal "abacize", "abacization".porter2_step2 +184: assert_equal "abacate", "abacational".porter2_step2 +185: assert_equal "abacate", "abacation".porter2_step2 +186: assert_equal "abacate", "abacator".porter2_step2 +187: assert_equal "abacal", "abacalism".porter2_step2 +188: assert_equal "abacal", "abacaliti".porter2_step2 +189: assert_equal "abacal", "abacalli".porter2_step2 +190: assert_equal "abacful", "abacfulness".porter2_step2 +191: assert_equal "abacous", "abacousli".porter2_step2 +192: assert_equal "abacous", "abacousness".porter2_step2 +193: assert_equal "abacive", "abaciveness".porter2_step2 +194: assert_equal "abacive", "abaciviti".porter2_step2 +195: assert_equal "abiliti", "abiliti".porter2_step2 +196: assert_equal "abacble", "abacbiliti".porter2_step2 +197: assert_equal "abacble", "abacbli".porter2_step2 +198: assert_equal "abacful", "abacfulli".porter2_step2 +199: assert_equal "abacless", "abaclessli".porter2_step2 +200: assert_equal "abaclog", "abaclogi".porter2_step2 +201: +202: assert_equal "abac", "abacli".porter2_step2 +203: assert_equal "abd", "abdli".porter2_step2 +204: assert_equal "abe", "abeli".porter2_step2 +205: assert_equal "abg", "abgli".porter2_step2 +206: assert_equal "abh", "abhli".porter2_step2 +207: assert_equal "abk", "abkli".porter2_step2 +208: assert_equal "abm", "abmli".porter2_step2 +209: assert_equal "abn", "abnli".porter2_step2 +210: assert_equal "abr", "abrli".porter2_step2 +211: assert_equal "abt", "abtli".porter2_step2 +212: assert_equal "abali", "abali".porter2_step2 +213: +214: assert_equal "bad", "badli".porter2_step2 +215: assert_equal "fluentli", "fluentli".porter2_step2 +216: assert_equal "geolog", "geologi".porter2_step2 +217: end
- # File test/tc_porter2_parts.rb, line 223 -223: def test_step_3 -224: assert_equal "abac", "abac".step_3("") -225: -226: assert_equal "national", "nationalize".step_3 -227: assert_equal "nationalise", "nationalise".step_3 -228: assert_equal "national", "nationalise".step_3(true) -229: # Repeat the steps to ensure that the english-gb behaviour isn't sticky -230: assert_equal "national", "nationalize".step_3(false) -231: assert_equal "nationalise", "nationalise".step_3(false) -232: assert_equal "national", "nationalize".step_3 -233: assert_equal "nationalise", "nationalise".step_3 -234: -235: assert_equal "abaction", "abactional".step_3 -236: assert_equal "abacate", "abacational".step_3 -237: assert_equal "abacic", "abacicate".step_3 -238: assert_equal "abacic", "abaciciti".step_3 -239: assert_equal "abacic", "abacical".step_3 -240: assert_equal "abac", "abacful".step_3 -241: assert_equal "abac", "abacness".step_3 -242: -243: assert_equal "abacabac", "abacabacative".step_3 -244: assert_equal "abacabac", "abacabacative".step_3 -245: -246: assert_equal "dryness", "dryness".step_3 -247: end+ # File test/tc_porter2_parts.rb, line 219 +219: def test_step_3 +220: assert_equal "abac", "abac".porter2_step3("") +221: +222: assert_equal "national", "nationalize".porter2_step3 +223: assert_equal "nationalise", "nationalise".porter2_step3 +224: assert_equal "national", "nationalise".porter2_step3(true) +225: # Repeat the steps to ensure that the english-gb behaviour isn't sticky +226: assert_equal "national", "nationalize".porter2_step3(false) +227: assert_equal "nationalise", "nationalise".porter2_step3(false) +228: assert_equal "national", "nationalize".porter2_step3 +229: assert_equal "nationalise", "nationalise".porter2_step3 +230: +231: assert_equal "abaction", "abactional".porter2_step3 +232: assert_equal "abacate", "abacational".porter2_step3 +233: assert_equal "abacic", "abacicate".porter2_step3 +234: assert_equal "abacic", "abaciciti".porter2_step3 +235: assert_equal "abacic", "abacical".porter2_step3 +236: assert_equal "abac", "abacful".porter2_step3 +237: assert_equal "abac", "abacness".porter2_step3 +238: +239: assert_equal "abacabac", "abacabacative".porter2_step3 +240: assert_equal "abacabac", "abacabacative".porter2_step3 +241: +242: assert_equal "dryness", "dryness".porter2_step3 +243: end
- # File test/tc_porter2_parts.rb, line 249 -249: def test_step_4 -250: assert_equal "abac", "abac".step_4("") -251: -252: assert_equal "nation", "nationize".step_4 -253: assert_equal "nationise", "nationise".step_4 -254: assert_equal "nation", "nationize".step_4(true) -255: assert_equal "nation", "nationise".step_4(true) -256: assert_equal "nation", "nationize".step_4(false) -257: assert_equal "nationise", "nationise".step_4(false) -258: assert_equal "nation", "nationize".step_4() -259: assert_equal "nationise", "nationise".step_4() -260: -261: assert_equal "abac", "abacal".step_4 -262: assert_equal "abac", "abacance".step_4 -263: assert_equal "abac", "abacence".step_4 -264: assert_equal "abac", "abacer".step_4 -265: assert_equal "abac", "abacic".step_4 -266: assert_equal "abacer", "abacerable".step_4 -267: assert_equal "abac", "abacible".step_4 -268: assert_equal "abac", "abacant".step_4 -269: assert_equal "abac", "abacement".step_4 # Check we handle overlapping suffixes properly -270: assert_equal "abacac", "abacacement".step_4 -271: assert_equal "abacac", "abacacment".step_4 -272: assert_equal "abac", "abacment".step_4 -273: assert_equal "abac", "abacent".step_4 -274: assert_equal "abac", "abacism".step_4 -275: assert_equal "abac", "abacate".step_4 -276: assert_equal "abac", "abaciti".step_4 -277: assert_equal "abac", "abacous".step_4 -278: assert_equal "abac", "abacive".step_4 -279: assert_equal "abac", "abacize".step_4 -280: assert_equal "abacion", "abacion".step_4 -281: assert_equal "abacs", "abacsion".step_4 -282: assert_equal "abact", "abaction".step_4 -283: assert_equal "abction", "abction".step_4 -284: assert_equal "ablut", "ablution".step_4 -285: assert_equal "agreement", "agreement".step_4 -286: -287: assert_equal "abcal", "abcal".step_4 # No removal if suffix isn't in R2 -288: end+ # File test/tc_porter2_parts.rb, line 245 +245: def test_step_4 +246: assert_equal "abac", "abac".porter2_step4("") +247: +248: assert_equal "nation", "nationize".porter2_step4 +249: assert_equal "nationise", "nationise".porter2_step4 +250: assert_equal "nation", "nationize".porter2_step4(true) +251: assert_equal "nation", "nationise".porter2_step4(true) +252: assert_equal "nation", "nationize".porter2_step4(false) +253: assert_equal "nationise", "nationise".porter2_step4(false) +254: assert_equal "nation", "nationize".porter2_step4() +255: assert_equal "nationise", "nationise".porter2_step4() +256: +257: assert_equal "abac", "abacal".porter2_step4 +258: assert_equal "abac", "abacance".porter2_step4 +259: assert_equal "abac", "abacence".porter2_step4 +260: assert_equal "abac", "abacer".porter2_step4 +261: assert_equal "abac", "abacic".porter2_step4 +262: assert_equal "abacer", "abacerable".porter2_step4 +263: assert_equal "abac", "abacible".porter2_step4 +264: assert_equal "abac", "abacant".porter2_step4 +265: assert_equal "abac", "abacement".porter2_step4 # Check we handle overlapping suffixes properly +266: assert_equal "abacac", "abacacement".porter2_step4 +267: assert_equal "abacac", "abacacment".porter2_step4 +268: assert_equal "abac", "abacment".porter2_step4 +269: assert_equal "abac", "abacent".porter2_step4 +270: assert_equal "abac", "abacism".porter2_step4 +271: assert_equal "abac", "abacate".porter2_step4 +272: assert_equal "abac", "abaciti".porter2_step4 +273: assert_equal "abac", "abacous".porter2_step4 +274: assert_equal "abac", "abacive".porter2_step4 +275: assert_equal "abac", "abacize".porter2_step4 +276: assert_equal "abacion", "abacion".porter2_step4 +277: assert_equal "abacs", "abacsion".porter2_step4 +278: assert_equal "abact", "abaction".porter2_step4 +279: assert_equal "abction", "abction".porter2_step4 +280: assert_equal "ablut", "ablution".porter2_step4 +281: assert_equal "agreement", "agreement".porter2_step4 +282: +283: assert_equal "abcal", "abcal".porter2_step4 # No removal if suffix isn't in R2 +284: end
- # File test/tc_porter2_parts.rb, line 290 -290: def test_step_5 -291: assert_equal "abac", "abac".step_5 -292: -293: assert_equal "abacl", "abacll".step_5 -294: assert_equal "abcll", "abcll".step_5 -295: -296: assert_equal "abc", "abc".step_5 -297: assert_equal "abl", "able".step_5 -298: assert_equal "abe", "abe".step_5 -299: assert_equal "abac", "abace".step_5 -300: assert_equal "bawac", "bawace".step_5 -301: end+ # File test/tc_porter2_parts.rb, line 286 +286: def test_step_5 +287: assert_equal "abac", "abac".porter2_step5 +288: +289: assert_equal "abacl", "abacll".porter2_step5 +290: assert_equal "abcll", "abcll".porter2_step5 +291: +292: assert_equal "abc", "abc".porter2_step5 +293: assert_equal "abl", "able".porter2_step5 +294: assert_equal "abe", "abe".porter2_step5 +295: assert_equal "abac", "abace".porter2_step5 +296: assert_equal "bawac", "bawace".porter2_step5 +297: end
- # File test/tc_porter2_parts.rb, line 18 -18: def test_tidy -19: assert_equal "abacde", "abacde".porter2_tidy -20: assert_equal "abacde", " abacde ".porter2_tidy -21: assert_equal "abacde", "ABACDE".porter2_tidy -22: assert_equal "ab'cde", "abâcde".porter2_tidy -23: assert_equal "ab'cde", "abâcde".porter2_tidy -24: assert_equal "ab'c'de", "abâcâde".porter2_tidy -25: assert_equal "ab'c'de", "abâcâde".porter2_tidy -26: assert_equal "''abacde", "ââabacde".porter2_tidy -27: end+ # File test/tc_porter2_parts.rb, line 13 +13: def test_tidy +14: assert_equal "abacde", "abacde".porter2_tidy +15: assert_equal "abacde", " abacde ".porter2_tidy +16: assert_equal "abacde", "ABACDE".porter2_tidy +17: assert_equal "ab'cde", "abâcde".porter2_tidy +18: assert_equal "ab'cde", "abâcde".porter2_tidy +19: assert_equal "ab'c'de", "abâcâde".porter2_tidy +20: assert_equal "ab'c'de", "abâcâde".porter2_tidy +21: assert_equal "''abacde", "ââabacde".porter2_tidy +22: end