From a222cc928502d3bb6ecd3d03c532ce7f9804acb4 Mon Sep 17 00:00:00 2001 From: Neil Smith Date: Wed, 5 Jan 2011 11:41:22 +0000 Subject: [PATCH] Split constants into a module, procedures directly into String. --- doc/Stemmable.html | 1104 ----------------------------- doc/String.html | 1046 ++++++++++++++++++++++++++- doc/TestPorter2.html | 664 +++++++++-------- doc/created.rid | 9 +- doc/index.html | 44 +- doc/lib/porter2_rb.html | 6 +- doc/test/tc_porter2_full_rb.html | 2 +- doc/test/tc_porter2_parts_rb.html | 2 +- lib/porter2.rb | 731 +++++++++---------- test/tc_porter2_full.rb | 8 +- test/tc_porter2_parts.rb | 314 ++++---- 11 files changed, 1906 insertions(+), 2024 deletions(-) delete mode 100644 doc/Stemmable.html diff --git a/doc/Stemmable.html b/doc/Stemmable.html deleted file mode 100644 index 9594c5a..0000000 --- a/doc/Stemmable.html +++ /dev/null @@ -1,1104 +0,0 @@ - - - - - - - Module: Stemmable - - - - - - - - - - - -
-
-
-

- Home - Classes - Methods -

-
-
- -
-
-

In Files

-
- -
-
- - -
- - - -
- - - -
-

Class Index - [+]

-
-
- Quicksearch - -
-
- - - -
- - -
-
- -
-

Stemmable

- -
-

-Porter stemmer in Ruby. -

-

-This is the Porter 2 stemming algorithm, as described at snowball.tartarus.org/algorithms/english/stemmer.html -The original paper is: -

-
-  Porter, 1980, An algorithm for suffix stripping, Program, Vol. 14,
-  no. 3, pp 130-137
- -
- - - -
-

Constants

-
- -
C
- -

-A non-vowel -

- - -
V
- -

-A vowel -

- - -
CW
- -

-A non-vowel other than w, x, or Y -

- - -
Double
- -

-Doubles created when added a suffix: these are undoubled when stemmed -

- - -
Valid_LI
- -

-A valid letter that can come before ‘li’ -

- - -
SHORT_SYLLABLE
- -

-A specification for a short syllable -

- - -
STEP_2_MAPS
- -

-Suffix transformations used in Step 2. (ogi, li endings dealt with in -procedure) -

- - -
STEP_3_MAPS
- -

-Suffix transformations used in Step 3. (ative ending dealt with in -procedure) -

- - -
STEP_4_MAPS
- -

-Suffix transformations used in Step 4. -

- - -
SPECIAL_CASES
- -

-Special-case stemmings -

- - -
STEP_1A_SPECIAL_CASES
- -

-Special case words to ignore after step 1a. -

- - -
-
- - - - - - - -
-

Public Instance Methods

- - -
- - -
- - porter2_ends_with_short_syllable?() - click to toggle source - -
- -
- -

-A short syllable in a word is either -

-
    -
  1. -a vowel followed by a non-vowel other than w, x or Y and preceded by -

    -
  2. -
-

-a non-vowel, or -

-
    -
  1. -a vowel at the beginning of the word followed by a non-vowel. -

    -
  2. -
- - - -
-
-     # File lib/porter2.rb, line 155
-155:   def porter2_ends_with_short_syllable?
-156:     self =~ /#{SHORT_SYLLABLE}$/ ? true : false
-157:   end
-
- -
- - - - -
- - -
- - -
- - porter2_is_short_word?() - click to toggle source - -
- -
- -

-A word is short if it ends in a short syllable, and if R1 is null -

- - - -
-
-     # File lib/porter2.rb, line 160
-160:   def porter2_is_short_word?
-161:     self.porter2_ends_with_short_syllable? and self.porter2_r1.empty?
-162:   end
-
- -
- - - - -
- - -
- - -
- - porter2_postprocess() - click to toggle source - -
- -
- - - - - -
-
-     # File lib/porter2.rb, line 311
-311:   def porter2_postprocess
-312:     self.gsub(/Y/, 'y')
-313:   end
-
- -
- - - - -
- - -
- - -
- - porter2_preprocess() - click to toggle source - -
- -
- - - - - -
-
-     # File lib/porter2.rb, line 122
-122:   def porter2_preprocess    
-123:     w = self.dup
-124: 
-125:     # remove any initial apostrophe

-126:     w.gsub!(/^'*(.)/, '\1')
-127:     
-128:     # set initial y, or y after a vowel, to Y

-129:     w.gsub!(/^y/, "Y")
-130:     w.gsub!(/(#{V})y/, '\1Y')
-131:     
-132:     w
-133:   end
-
- -
- - - - -
- - -
- - -
- - porter2_r1() - click to toggle source - -
- -
- -

-The word after the first non-vowel after the first vowel -

- - - -
-
-     # File lib/porter2.rb, line 136
-136:   def porter2_r1
-137:     if self =~ /^(gener|commun|arsen)(?<r1>.*)/
-138:       Regexp.last_match(:r1)
-139:     else
-140:       self =~ /#{V}#{C}(?<r1>.*)$/
-141:       Regexp.last_match(:r1) || ""
-142:     end
-143:   end
-
- -
- - - - -
- - -
- - -
- - porter2_r2() - click to toggle source - -
- -
- -

-R1 after the first non-vowel after the first vowel -

- - - -
-
-     # File lib/porter2.rb, line 146
-146:   def porter2_r2
-147:     self.porter2_r1 =~ /#{V}#{C}(?<r2>.*)$/
-148:     Regexp.last_match(:r2) || ""
-149:   end
-
- -
- - - - -
- - -
- - -
- - porter2_stem(gb_english = false) - click to toggle source - -
- -
- - - - - -
-
-     # File lib/porter2.rb, line 316
-316:   def porter2_stem(gb_english = false)
-317:     preword = self.porter2_tidy
-318:     return preword if preword.length <= 2
-319: 
-320:     word = preword.porter2_preprocess
-321:     
-322:     if SPECIAL_CASES.has_key? word
-323:       SPECIAL_CASES[word]
-324:     else
-325:       w1a = word.step_0.step_1a
-326:       if STEP_1A_SPECIAL_CASES.include? w1a 
-327:         w1a
-328:       else
-329:         w1a.step_1b(gb_english).step_1c.step_2(gb_english).step_3(gb_english).step_4(gb_english).step_5.porter2_postprocess
-330:       end
-331:     end
-332:   end
-
- -
- - -
- Also aliased as: stem -
- - - -
- - -
- - -
- - porter2_stem_verbose(gb_english = false) - click to toggle source - -
- -
- - - - - -
-
-     # File lib/porter2.rb, line 334
-334:   def porter2_stem_verbose(gb_english = false)
-335:     preword = self.porter2_tidy
-336:     puts "Preword: #{preword}"
-337:     return preword if preword.length <= 2
-338: 
-339:     word = preword.porter2_preprocess
-340:     puts "Preprocessed: #{word}"
-341:     
-342:     if SPECIAL_CASES.has_key? word
-343:       puts "Returning #{word} as special case #{SPECIAL_CASES[word]}"
-344:       SPECIAL_CASES[word]
-345:     else
-346:       r1 = word.porter2_r1
-347:       r2 = word.porter2_r2
-348:       puts "R1 = #{r1}, R2 = #{r2}"
-349:     
-350:       w0 = word.step_0 ; puts "After step 0:  #{w0} (R1 = #{w0.porter2_r1}, R2 = #{w0.porter2_r2})"
-351:       w1a = w0.step_1a ; puts "After step 1a: #{w1a} (R1 = #{w1a.porter2_r1}, R2 = #{w1a.porter2_r2})"
-352:       
-353:       if STEP_1A_SPECIAL_CASES.include? w1a
-354:         puts "Returning #{w1a} as 1a special case"
-355:         w1a
-356:       else
-357:         w1b = w1a.step_1b(gb_english) ; puts "After step 1b: #{w1b} (R1 = #{w1b.porter2_r1}, R2 = #{w1b.porter2_r2})"
-358:         w1c = w1b.step_1c ; puts "After step 1c: #{w1c} (R1 = #{w1c.porter2_r1}, R2 = #{w1c.porter2_r2})"
-359:         w2 = w1c.step_2(gb_english) ; puts "After step 2:  #{w2} (R1 = #{w2.porter2_r1}, R2 = #{w2.porter2_r2})"
-360:         w3 = w2.step_3(gb_english) ; puts "After step 3:  #{w3} (R1 = #{w3.porter2_r1}, R2 = #{w3.porter2_r2})"
-361:         w4 = w3.step_4(gb_english) ; puts "After step 4:  #{w4} (R1 = #{w4.porter2_r1}, R2 = #{w4.porter2_r2})"
-362:         w5 = w4.step_5 ; puts "After step 5:  #{w5}"
-363:         wpost = w5.porter2_postprocess ; puts "After postprocess: #{wpost}"
-364:         wpost
-365:       end
-366:     end
-367:   end
-
- -
- - - - -
- - -
- - -
- - porter2_tidy() - click to toggle source - -
- -
- -

-Tidy up the word before we get down to the algorithm -

- - - -
-
-     # File lib/porter2.rb, line 112
-112:   def porter2_tidy
-113:     preword = self.to_s.strip.downcase
-114:     
-115:     # map apostrophe-like characters to apostrophes

-116:     preword.gsub!(/‘/, "'")
-117:     preword.gsub!(/’/, "'")
-118: 
-119:     preword
-120:   end
-
- -
- - - - -
- - -
- - -
- - stem(gb_english = false) - click to toggle source - -
- -
- - - - - -
- - - - -
- Alias for: porter2_stem -
- -
- - -
- - -
- - step_0() - click to toggle source - -
- -
- -

-Search for the longest among the suffixes, -

-
    -
  • -’ -

    -
  • -
  • -’s -

    -
  • -
  • -’s’ -

    -
  • -
-

-and remove if found. -

- - - -
-
-     # File lib/porter2.rb, line 169
-169:   def step_0
-170:     self.sub!(/(.)('s'|'s|')$/, '\1') || self
-171:   end
-
- -
- - - - -
- - -
- - -
- - step_1a() - click to toggle source - -
- -
- -

-Remove plural suffixes -

- - - -
-
-     # File lib/porter2.rb, line 174
-174:   def step_1a
-175:     if self =~ /sses$/
-176:       self.sub(/sses$/, 'ss')
-177:     elsif self =~ /..(ied|ies)$/
-178:       self.sub(/(ied|ies)$/, 'i')
-179:     elsif self =~ /(ied|ies)$/
-180:       self.sub(/(ied|ies)$/, 'ie')
-181:     elsif self =~ /(us|ss)$/
-182:       self
-183:     elsif self =~ /s$/
-184:       if self =~ /(#{V}.+)s$/
-185:         self.sub(/s$/, '') 
-186:       else
-187:         self
-188:       end
-189:     else
-190:       self
-191:     end
-192:   end
-
- -
- - - - -
- - -
- - -
- - step_1b(gb_english = false) - click to toggle source - -
- -
- - - - - -
-
-     # File lib/porter2.rb, line 194
-194:   def step_1b(gb_english = false)
-195:     if self =~ /(eed|eedly)$/
-196:       if self.porter2_r1 =~ /(eed|eedly)$/
-197:         self.sub(/(eed|eedly)$/, 'ee')
-198:       else
-199:         self
-200:       end
-201:     else
-202:       w = self.dup
-203:       if w =~ /#{V}.*(ed|edly|ing|ingly)$/
-204:         w.sub!(/(ed|edly|ing|ingly)$/, '')
-205:         if w =~ /(at|lb|iz)$/
-206:           w += 'e' 
-207:         elsif w =~ /is$/ and gb_english
-208:           w += 'e' 
-209:         elsif w =~ /#{Double}$/
-210:           w.chop!
-211:         elsif w.porter2_is_short_word?
-212:           w += 'e'
-213:         end
-214:       end
-215:       w
-216:     end
-217:   end
-
- -
- - - - -
- - -
- - -
- - step_1c() - click to toggle source - -
- -
- - - - - -
-
-     # File lib/porter2.rb, line 220
-220:   def step_1c
-221:     if self =~ /.+#{C}(y|Y)$/
-222:       self.sub(/(y|Y)$/, 'i')
-223:     else
-224:       self
-225:     end
-226:   end
-
- -
- - - - -
- - -
- - -
- - step_2(gb_english = false) - click to toggle source - -
- -
- - - - - -
-
-     # File lib/porter2.rb, line 229
-229:   def step_2(gb_english = false)
-230:     r1 = self.porter2_r1
-231:     s2m = STEP_2_MAPS.dup
-232:     if gb_english
-233:       s2m["iser"] = "ise"
-234:       s2m["isation"] = "ise"
-235:     end
-236:     step_2_re = Regexp.union(s2m.keys.map {|r| Regexp.new(r + "$")})
-237:     if self =~ step_2_re
-238:       if r1 =~ /#{$&}$/
-239:         self.sub(/#{$&}$/, s2m[$&])
-240:       else
-241:         self
-242:       end
-243:     elsif r1 =~ /li$/ and self =~ /(#{Valid_LI})li$/
-244:       self.sub(/li$/, '')
-245:     elsif r1 =~ /ogi$/ and self =~ /logi$/
-246:       self.sub(/ogi$/, 'og')
-247:     else
-248:       self
-249:     end
-250:   end
-
- -
- - - - -
- - -
- - -
- - step_3(gb_english = false) - click to toggle source - -
- -
- - - - - -
-
-     # File lib/porter2.rb, line 253
-253:   def step_3(gb_english = false)
-254:     if self =~ /ative$/ and self.porter2_r2 =~ /ative$/
-255:       self.sub(/ative$/, '')
-256:     else
-257:       s3m = STEP_3_MAPS.dup
-258:       if gb_english
-259:         s3m["alise"] = "al"
-260:       end
-261:       step_3_re = Regexp.union(s3m.keys.map {|r| Regexp.new(r + "$")})
-262:       r1 = self.porter2_r1
-263:       if self =~ step_3_re and r1 =~ /#{$&}$/ 
-264:         self.sub(/#{$&}$/, s3m[$&])
-265:       else
-266:         self
-267:       end
-268:     end
-269:   end
-
- -
- - - - -
- - -
- - -
- - step_4(gb_english = false) - click to toggle source - -
- -
- - - - - -
-
-     # File lib/porter2.rb, line 272
-272:   def step_4(gb_english = false)
-273:     if self.porter2_r2 =~ /ion$/ and self =~ /(s|t)ion$/
-274:       self.sub(/ion$/, '')
-275:     else
-276:       s4m = STEP_4_MAPS.dup
-277:       if gb_english
-278:         s4m["ise"] = ""
-279:       end
-280:       step_4_re = Regexp.union(s4m.keys.map {|r| Regexp.new(r + "$")})
-281:       r2 = self.porter2_r2
-282:       if self =~ step_4_re
-283:         if r2 =~ /#{$&}/
-284:           self.sub(/#{$&}$/, s4m[$&])
-285:         else
-286:           self
-287:         end
-288:       else
-289:         self
-290:       end
-291:     end
-292:   end
-
- -
- - - - -
- - -
- - -
- - step_5() - click to toggle source - -
- -
- - - - - -
-
-     # File lib/porter2.rb, line 295
-295:   def step_5
-296:     if self =~ /ll$/ and self.porter2_r2 =~ /l$/
-297:       self.sub(/ll$/, 'l') 
-298:     elsif self =~ /e$/ and self.porter2_r2 =~ /e$/ 
-299:       self.sub(/e$/, '') 
-300:     else
-301:       r1 = self.porter2_r1
-302:       if self =~ /e$/ and r1 =~ /e$/ and not self =~ /#{SHORT_SYLLABLE}e$/
-303:         self.sub(/e$/, '')
-304:       else
-305:         self
-306:       end
-307:     end
-308:   end
-
- -
- - - - -
- - -
- - -
- - -
- -

Disabled; run with --debug to generate this.

- -
- -
-

[Validate]

-

Generated with the Darkfish - Rdoc Generator 1.1.6.

-
- - - - diff --git a/doc/String.html b/doc/String.html index 9a5f5bb..e0921af 100644 --- a/doc/String.html +++ b/doc/String.html @@ -38,9 +38,6 @@
@@ -550,20 +548,20 @@ end
-     # File test/tc_porter2_parts.rb, line 105
-105:   def test_step_0
-106:     assert_equal "abac", "abac".step_0
-107:     assert_equal "abac", "abac'".step_0
-108:     assert_equal "abac", "abac's".step_0
-109:     assert_equal "abac", "abac's'".step_0
-110:     assert_equal "ab'c", "ab'c".step_0
-111:     assert_equal "ab'sc", "ab'sc".step_0
-112:     assert_equal "ab's'c", "ab's'c".step_0
-113:     assert_equal "ab'sc", "ab'sc's".step_0
-114:     assert_equal "'", "'".step_0
-115:     assert_equal "'s", "'s".step_0
-116:     assert_equal "'s", "'s'".step_0
-117:   end
+ # File test/tc_porter2_parts.rb, line 101 +101: def test_step_0 +102: assert_equal "abac", "abac".porter2_step0 +103: assert_equal "abac", "abac'".porter2_step0 +104: assert_equal "abac", "abac's".porter2_step0 +105: assert_equal "abac", "abac's'".porter2_step0 +106: assert_equal "ab'c", "ab'c".porter2_step0 +107: assert_equal "ab'sc", "ab'sc".porter2_step0 +108: assert_equal "ab's'c", "ab's'c".porter2_step0 +109: assert_equal "ab'sc", "ab'sc's".porter2_step0 +110: assert_equal "'", "'".porter2_step0 +111: assert_equal "'s", "'s".porter2_step0 +112: assert_equal "'s", "'s'".porter2_step0 +113: end
@@ -594,21 +592,21 @@ end
-     # File test/tc_porter2_parts.rb, line 119
-119:   def test_step_1a
-120:     assert_equal "abacde", "abacde".step_1a
-121:     assert_equal "abacess", "abacesses".step_1a
-122:     assert_equal "tie", "ties".step_1a
-123:     assert_equal "tie", "tied".step_1a
-124:     assert_equal "cri", "cries".step_1a
-125:     assert_equal "cri", "cried".step_1a
-126:     assert_equal "gas", "gas".step_1a
-127:     assert_equal "this", "this".step_1a
-128:     assert_equal "gap", "gaps".step_1a
-129:     assert_equal "kiwi", "kiwis".step_1a
-130:     assert_equal "abacus", "abacus".step_1a
-131:     assert_equal "abacess", "abacess".step_1a
-132:   end
+ # File test/tc_porter2_parts.rb, line 115 +115: def test_step_1a +116: assert_equal "abacde", "abacde".porter2_step1a +117: assert_equal "abacess", "abacesses".porter2_step1a +118: assert_equal "tie", "ties".porter2_step1a +119: assert_equal "tie", "tied".porter2_step1a +120: assert_equal "cri", "cries".porter2_step1a +121: assert_equal "cri", "cried".porter2_step1a +122: assert_equal "gas", "gas".porter2_step1a +123: assert_equal "this", "this".porter2_step1a +124: assert_equal "gap", "gaps".porter2_step1a +125: assert_equal "kiwi", "kiwis".porter2_step1a +126: assert_equal "abacus", "abacus".porter2_step1a +127: assert_equal "abacess", "abacess".porter2_step1a +128: end
@@ -639,28 +637,28 @@ end
-     # File test/tc_porter2_parts.rb, line 134
-134:   def test_step_1b
-135:     assert_equal "abacde", "abacde".step_1b
-136:     words_non_gb = {"luxuriated" => "luxuriate", "luxuriating" => "luxuriate", 
-137:              "hopping" => "hop", "hopped" => "hop",
-138:              "hoped" => "hope", "hoping" => "hope",
-139:              "atomized" => "atomize", "atomised" => "atomis",
-140:              "addicted" => "addict", "bleed" => "bleed" }
-141:     words_non_gb.each do |original, stemmed|
-142:       assert_equal stemmed, original.step_1b, 
-143:           "#{original} should have stemmed to #{stemmed} but got #{original.step_1b(original.porter2_r1)} instead"
-144:     end
-145:     words_gb = {"luxuriated" => "luxuriate", "luxuriating" => "luxuriate", 
-146:              "hopping" => "hop", "hopped" => "hop",
-147:              "hoped" => "hope", "hoping" => "hope",
-148:              "atomized" => "atomize", "atomised" => "atomise",
-149:              "addicted" => "addict", "bleed" => "bleed" }
-150:     words_gb.each do |original, stemmed|
-151:       assert_equal stemmed, original.step_1b(true), 
-152:           "#{original} should have stemmed to #{stemmed} but got #{original.step_1b(original.porter2_r1)} instead"
-153:     end
-154:   end
+ # File test/tc_porter2_parts.rb, line 130 +130: def test_step_1b +131: assert_equal "abacde", "abacde".porter2_step1b +132: words_non_gb = {"luxuriated" => "luxuriate", "luxuriating" => "luxuriate", +133: "hopping" => "hop", "hopped" => "hop", +134: "hoped" => "hope", "hoping" => "hope", +135: "atomized" => "atomize", "atomised" => "atomis", +136: "addicted" => "addict", "bleed" => "bleed" } +137: words_non_gb.each do |original, stemmed| +138: assert_equal stemmed, original.porter2_step1b, +139: "#{original} should have stemmed to #{stemmed} but got #{original.porter2_step1b(original.porter2_r1)} instead" +140: end +141: words_gb = {"luxuriated" => "luxuriate", "luxuriating" => "luxuriate", +142: "hopping" => "hop", "hopped" => "hop", +143: "hoped" => "hope", "hoping" => "hope", +144: "atomized" => "atomize", "atomised" => "atomise", +145: "addicted" => "addict", "bleed" => "bleed" } +146: words_gb.each do |original, stemmed| +147: assert_equal stemmed, original.porter2_step1b(true), +148: "#{original} should have stemmed to #{stemmed} but got #{original.porter2_step1b(original.porter2_r1)} instead" +149: end +150: end
@@ -691,13 +689,13 @@ end
-     # File test/tc_porter2_parts.rb, line 156
-156:   def test_step_1c
-157:     assert_equal "cri", "cry".step_1c
-158:     assert_equal "by", "by".step_1c
-159:     assert_equal "saY", "saY".step_1c
-160:     assert_equal "abbeY", "abbeY".step_1c
-161:   end
+ # File test/tc_porter2_parts.rb, line 152 +152: def test_step_1c +153: assert_equal "cri", "cry".porter2_step1c +154: assert_equal "by", "by".porter2_step1c +155: assert_equal "saY", "saY".porter2_step1c +156: assert_equal "abbeY", "abbeY".porter2_step1c +157: end
@@ -728,66 +726,66 @@ end
-     # File test/tc_porter2_parts.rb, line 163
-163:   def test_step_2
-164:     assert_equal "abac", "abac".step_2
-165:     
-166:     assert_equal "nationalize", "nationalization".step_2
-167:     assert_equal "nationalisate", "nationalisation".step_2
-168:     assert_equal "nationalize", "nationalization".step_2(true)
-169:     assert_equal "nationalise", "nationalisation".step_2(true)
-170:     # Repeat the steps to ensure that the english-gb behaviour isn't sticky
-171:     assert_equal "nationalize", "nationalization".step_2(false)
-172:     assert_equal "nationalisate", "nationalisation".step_2(false)
-173:     assert_equal "nationalize", "nationalization".step_2
-174:     assert_equal "nationalisate", "nationalisation".step_2
-175:     
-176:     assert_equal "nationalize", "nationalizer".step_2
-177:     assert_equal "nationaliser", "nationaliser".step_2
-178:     assert_equal "nationalize", "nationalizer".step_2(true)
-179:     assert_equal "nationalise", "nationaliser".step_2(true)
-180:     
-181:     assert_equal "abaction", "abactional".step_2
-182:     assert_equal "abacence", "abacenci".step_2
-183:     assert_equal "abacance", "abacanci".step_2
-184:     assert_equal "abacable", "abacabli".step_2
-185:     assert_equal "abacent", "abacentli".step_2
-186:     assert_equal "abacize", "abacizer".step_2
-187:     assert_equal "abacize", "abacization".step_2
-188:     assert_equal "abacate", "abacational".step_2
-189:     assert_equal "abacate", "abacation".step_2
-190:     assert_equal "abacate", "abacator".step_2
-191:     assert_equal "abacal", "abacalism".step_2
-192:     assert_equal "abacal", "abacaliti".step_2
-193:     assert_equal "abacal", "abacalli".step_2
-194:     assert_equal "abacful", "abacfulness".step_2
-195:     assert_equal "abacous", "abacousli".step_2
-196:     assert_equal "abacous", "abacousness".step_2
-197:     assert_equal "abacive", "abaciveness".step_2
-198:     assert_equal "abacive", "abaciviti".step_2
-199:     assert_equal "abiliti", "abiliti".step_2
-200:     assert_equal "abacble", "abacbiliti".step_2
-201:     assert_equal "abacble", "abacbli".step_2
-202:     assert_equal "abacful", "abacfulli".step_2
-203:     assert_equal "abacless", "abaclessli".step_2
-204:     assert_equal "abaclog", "abaclogi".step_2
-205:     
-206:     assert_equal "abac", "abacli".step_2
-207:     assert_equal "abd", "abdli".step_2
-208:     assert_equal "abe", "abeli".step_2
-209:     assert_equal "abg", "abgli".step_2
-210:     assert_equal "abh", "abhli".step_2
-211:     assert_equal "abk", "abkli".step_2
-212:     assert_equal "abm", "abmli".step_2
-213:     assert_equal "abn", "abnli".step_2
-214:     assert_equal "abr", "abrli".step_2
-215:     assert_equal "abt", "abtli".step_2
-216:     assert_equal "abali", "abali".step_2
-217: 
-218:     assert_equal "bad", "badli".step_2
-219:     assert_equal "fluentli", "fluentli".step_2
-220:     assert_equal "geolog", "geologi".step_2
-221:   end
+ # File test/tc_porter2_parts.rb, line 159 +159: def test_step_2 +160: assert_equal "abac", "abac".porter2_step2 +161: +162: assert_equal "nationalize", "nationalization".porter2_step2 +163: assert_equal "nationalisate", "nationalisation".porter2_step2 +164: assert_equal "nationalize", "nationalization".porter2_step2(true) +165: assert_equal "nationalise", "nationalisation".porter2_step2(true) +166: # Repeat the steps to ensure that the english-gb behaviour isn't sticky +167: assert_equal "nationalize", "nationalization".porter2_step2(false) +168: assert_equal "nationalisate", "nationalisation".porter2_step2(false) +169: assert_equal "nationalize", "nationalization".porter2_step2 +170: assert_equal "nationalisate", "nationalisation".porter2_step2 +171: +172: assert_equal "nationalize", "nationalizer".porter2_step2 +173: assert_equal "nationaliser", "nationaliser".porter2_step2 +174: assert_equal "nationalize", "nationalizer".porter2_step2(true) +175: assert_equal "nationalise", "nationaliser".porter2_step2(true) +176: +177: assert_equal "abaction", "abactional".porter2_step2 +178: assert_equal "abacence", "abacenci".porter2_step2 +179: assert_equal "abacance", "abacanci".porter2_step2 +180: assert_equal "abacable", "abacabli".porter2_step2 +181: assert_equal "abacent", "abacentli".porter2_step2 +182: assert_equal "abacize", "abacizer".porter2_step2 +183: assert_equal "abacize", "abacization".porter2_step2 +184: assert_equal "abacate", "abacational".porter2_step2 +185: assert_equal "abacate", "abacation".porter2_step2 +186: assert_equal "abacate", "abacator".porter2_step2 +187: assert_equal "abacal", "abacalism".porter2_step2 +188: assert_equal "abacal", "abacaliti".porter2_step2 +189: assert_equal "abacal", "abacalli".porter2_step2 +190: assert_equal "abacful", "abacfulness".porter2_step2 +191: assert_equal "abacous", "abacousli".porter2_step2 +192: assert_equal "abacous", "abacousness".porter2_step2 +193: assert_equal "abacive", "abaciveness".porter2_step2 +194: assert_equal "abacive", "abaciviti".porter2_step2 +195: assert_equal "abiliti", "abiliti".porter2_step2 +196: assert_equal "abacble", "abacbiliti".porter2_step2 +197: assert_equal "abacble", "abacbli".porter2_step2 +198: assert_equal "abacful", "abacfulli".porter2_step2 +199: assert_equal "abacless", "abaclessli".porter2_step2 +200: assert_equal "abaclog", "abaclogi".porter2_step2 +201: +202: assert_equal "abac", "abacli".porter2_step2 +203: assert_equal "abd", "abdli".porter2_step2 +204: assert_equal "abe", "abeli".porter2_step2 +205: assert_equal "abg", "abgli".porter2_step2 +206: assert_equal "abh", "abhli".porter2_step2 +207: assert_equal "abk", "abkli".porter2_step2 +208: assert_equal "abm", "abmli".porter2_step2 +209: assert_equal "abn", "abnli".porter2_step2 +210: assert_equal "abr", "abrli".porter2_step2 +211: assert_equal "abt", "abtli".porter2_step2 +212: assert_equal "abali", "abali".porter2_step2 +213: +214: assert_equal "bad", "badli".porter2_step2 +215: assert_equal "fluentli", "fluentli".porter2_step2 +216: assert_equal "geolog", "geologi".porter2_step2 +217: end
@@ -818,32 +816,32 @@ end
-     # File test/tc_porter2_parts.rb, line 223
-223:   def test_step_3
-224:     assert_equal "abac", "abac".step_3("")
-225:     
-226:     assert_equal "national", "nationalize".step_3
-227:     assert_equal "nationalise", "nationalise".step_3
-228:     assert_equal "national", "nationalise".step_3(true)
-229:     # Repeat the steps to ensure that the english-gb behaviour isn't sticky
-230:     assert_equal "national", "nationalize".step_3(false)
-231:     assert_equal "nationalise", "nationalise".step_3(false)
-232:     assert_equal "national", "nationalize".step_3
-233:     assert_equal "nationalise", "nationalise".step_3
-234:     
-235:     assert_equal "abaction", "abactional".step_3
-236:     assert_equal "abacate", "abacational".step_3
-237:     assert_equal "abacic", "abacicate".step_3
-238:     assert_equal "abacic", "abaciciti".step_3
-239:     assert_equal "abacic", "abacical".step_3
-240:     assert_equal "abac", "abacful".step_3
-241:     assert_equal "abac", "abacness".step_3
-242:     
-243:     assert_equal "abacabac", "abacabacative".step_3
-244:     assert_equal "abacabac", "abacabacative".step_3
-245:   
-246:     assert_equal "dryness", "dryness".step_3
-247:   end
+ # File test/tc_porter2_parts.rb, line 219 +219: def test_step_3 +220: assert_equal "abac", "abac".porter2_step3("") +221: +222: assert_equal "national", "nationalize".porter2_step3 +223: assert_equal "nationalise", "nationalise".porter2_step3 +224: assert_equal "national", "nationalise".porter2_step3(true) +225: # Repeat the steps to ensure that the english-gb behaviour isn't sticky +226: assert_equal "national", "nationalize".porter2_step3(false) +227: assert_equal "nationalise", "nationalise".porter2_step3(false) +228: assert_equal "national", "nationalize".porter2_step3 +229: assert_equal "nationalise", "nationalise".porter2_step3 +230: +231: assert_equal "abaction", "abactional".porter2_step3 +232: assert_equal "abacate", "abacational".porter2_step3 +233: assert_equal "abacic", "abacicate".porter2_step3 +234: assert_equal "abacic", "abaciciti".porter2_step3 +235: assert_equal "abacic", "abacical".porter2_step3 +236: assert_equal "abac", "abacful".porter2_step3 +237: assert_equal "abac", "abacness".porter2_step3 +238: +239: assert_equal "abacabac", "abacabacative".porter2_step3 +240: assert_equal "abacabac", "abacabacative".porter2_step3 +241: +242: assert_equal "dryness", "dryness".porter2_step3 +243: end
@@ -874,47 +872,47 @@ end
-     # File test/tc_porter2_parts.rb, line 249
-249:   def test_step_4
-250:     assert_equal "abac", "abac".step_4("")
-251:     
-252:     assert_equal "nation", "nationize".step_4
-253:     assert_equal "nationise", "nationise".step_4
-254:     assert_equal "nation", "nationize".step_4(true)
-255:     assert_equal "nation", "nationise".step_4(true)
-256:     assert_equal "nation", "nationize".step_4(false)
-257:     assert_equal "nationise", "nationise".step_4(false)
-258:     assert_equal "nation", "nationize".step_4()
-259:     assert_equal "nationise", "nationise".step_4()
-260:     
-261:     assert_equal "abac", "abacal".step_4
-262:     assert_equal "abac", "abacance".step_4
-263:     assert_equal "abac", "abacence".step_4
-264:     assert_equal "abac", "abacer".step_4
-265:     assert_equal "abac", "abacic".step_4
-266:     assert_equal "abacer", "abacerable".step_4
-267:     assert_equal "abac", "abacible".step_4
-268:     assert_equal "abac", "abacant".step_4
-269:     assert_equal "abac", "abacement".step_4     # Check we handle overlapping suffixes properly
-270:     assert_equal "abacac", "abacacement".step_4
-271:     assert_equal "abacac", "abacacment".step_4
-272:     assert_equal "abac", "abacment".step_4
-273:     assert_equal "abac", "abacent".step_4
-274:     assert_equal "abac", "abacism".step_4
-275:     assert_equal "abac", "abacate".step_4
-276:     assert_equal "abac", "abaciti".step_4
-277:     assert_equal "abac", "abacous".step_4
-278:     assert_equal "abac", "abacive".step_4
-279:     assert_equal "abac", "abacize".step_4
-280:     assert_equal "abacion", "abacion".step_4
-281:     assert_equal "abacs", "abacsion".step_4
-282:     assert_equal "abact", "abaction".step_4
-283:     assert_equal "abction", "abction".step_4
-284:     assert_equal "ablut", "ablution".step_4
-285:     assert_equal "agreement", "agreement".step_4
-286:     
-287:     assert_equal "abcal", "abcal".step_4        # No removal if suffix isn't in R2
-288:   end
+ # File test/tc_porter2_parts.rb, line 245 +245: def test_step_4 +246: assert_equal "abac", "abac".porter2_step4("") +247: +248: assert_equal "nation", "nationize".porter2_step4 +249: assert_equal "nationise", "nationise".porter2_step4 +250: assert_equal "nation", "nationize".porter2_step4(true) +251: assert_equal "nation", "nationise".porter2_step4(true) +252: assert_equal "nation", "nationize".porter2_step4(false) +253: assert_equal "nationise", "nationise".porter2_step4(false) +254: assert_equal "nation", "nationize".porter2_step4() +255: assert_equal "nationise", "nationise".porter2_step4() +256: +257: assert_equal "abac", "abacal".porter2_step4 +258: assert_equal "abac", "abacance".porter2_step4 +259: assert_equal "abac", "abacence".porter2_step4 +260: assert_equal "abac", "abacer".porter2_step4 +261: assert_equal "abac", "abacic".porter2_step4 +262: assert_equal "abacer", "abacerable".porter2_step4 +263: assert_equal "abac", "abacible".porter2_step4 +264: assert_equal "abac", "abacant".porter2_step4 +265: assert_equal "abac", "abacement".porter2_step4 # Check we handle overlapping suffixes properly +266: assert_equal "abacac", "abacacement".porter2_step4 +267: assert_equal "abacac", "abacacment".porter2_step4 +268: assert_equal "abac", "abacment".porter2_step4 +269: assert_equal "abac", "abacent".porter2_step4 +270: assert_equal "abac", "abacism".porter2_step4 +271: assert_equal "abac", "abacate".porter2_step4 +272: assert_equal "abac", "abaciti".porter2_step4 +273: assert_equal "abac", "abacous".porter2_step4 +274: assert_equal "abac", "abacive".porter2_step4 +275: assert_equal "abac", "abacize".porter2_step4 +276: assert_equal "abacion", "abacion".porter2_step4 +277: assert_equal "abacs", "abacsion".porter2_step4 +278: assert_equal "abact", "abaction".porter2_step4 +279: assert_equal "abction", "abction".porter2_step4 +280: assert_equal "ablut", "ablution".porter2_step4 +281: assert_equal "agreement", "agreement".porter2_step4 +282: +283: assert_equal "abcal", "abcal".porter2_step4 # No removal if suffix isn't in R2 +284: end
@@ -945,19 +943,19 @@ end
-     # File test/tc_porter2_parts.rb, line 290
-290:   def test_step_5
-291:     assert_equal "abac", "abac".step_5
-292:     
-293:     assert_equal "abacl", "abacll".step_5
-294:     assert_equal "abcll", "abcll".step_5
-295:     
-296:     assert_equal "abc", "abc".step_5
-297:     assert_equal "abl", "able".step_5
-298:     assert_equal "abe", "abe".step_5
-299:     assert_equal "abac", "abace".step_5
-300:     assert_equal "bawac", "bawace".step_5
-301:   end
+ # File test/tc_porter2_parts.rb, line 286 +286: def test_step_5 +287: assert_equal "abac", "abac".porter2_step5 +288: +289: assert_equal "abacl", "abacll".porter2_step5 +290: assert_equal "abcll", "abcll".porter2_step5 +291: +292: assert_equal "abc", "abc".porter2_step5 +293: assert_equal "abl", "able".porter2_step5 +294: assert_equal "abe", "abe".porter2_step5 +295: assert_equal "abac", "abace".porter2_step5 +296: assert_equal "bawac", "bawace".porter2_step5 +297: end
@@ -988,17 +986,17 @@ end
-    # File test/tc_porter2_parts.rb, line 18
-18:   def test_tidy
-19:     assert_equal "abacde", "abacde".porter2_tidy
-20:     assert_equal "abacde", "  abacde  ".porter2_tidy
-21:     assert_equal "abacde", "ABACDE".porter2_tidy
-22:     assert_equal "ab'cde", "ab‘cde".porter2_tidy
-23:     assert_equal "ab'cde", "ab’cde".porter2_tidy
-24:     assert_equal "ab'c'de", "ab’c’de".porter2_tidy
-25:     assert_equal "ab'c'de", "ab‘c‘de".porter2_tidy
-26:     assert_equal "''abacde", "’‘abacde".porter2_tidy
-27:   end
+ # File test/tc_porter2_parts.rb, line 13 +13: def test_tidy +14: assert_equal "abacde", "abacde".porter2_tidy +15: assert_equal "abacde", " abacde ".porter2_tidy +16: assert_equal "abacde", "ABACDE".porter2_tidy +17: assert_equal "ab'cde", "ab‘cde".porter2_tidy +18: assert_equal "ab'cde", "ab’cde".porter2_tidy +19: assert_equal "ab'c'de", "ab’c’de".porter2_tidy +20: assert_equal "ab'c'de", "ab‘c‘de".porter2_tidy +21: assert_equal "''abacde", "’‘abacde".porter2_tidy +22: end
diff --git a/doc/created.rid b/doc/created.rid index 3424fc2..fd2cc82 100644 --- a/doc/created.rid +++ b/doc/created.rid @@ -1,5 +1,6 @@ -Tue, 04 Jan 2011 16:27:15 +0000 -./test/tc_porter2_parts.rb Sun, 02 Jan 2011 23:49:23 +0000 +Wed, 05 Jan 2011 11:40:35 +0000 +./test/tc_porter2_parts.rb Wed, 05 Jan 2011 11:38:33 +0000 ./test/ts_porter2.rb Mon, 03 Jan 2011 00:20:11 +0000 -./test/tc_porter2_full.rb Sun, 02 Jan 2011 23:49:55 +0000 -./lib/porter2.rb Tue, 04 Jan 2011 16:27:11 +0000 +./test/tc_porter2_full.rb Wed, 05 Jan 2011 11:35:59 +0000 +./lib/porter2_module.rb Wed, 05 Jan 2011 11:34:03 +0000 +./lib/porter2.rb Wed, 05 Jan 2011 11:33:59 +0000 diff --git a/doc/index.html b/doc/index.html index 6eb787f..6c609f9 100644 --- a/doc/index.html +++ b/doc/index.html @@ -31,7 +31,7 @@

Classes/Modules