X-Git-Url: https://git.njae.me.uk/?a=blobdiff_plain;f=lib%2Fporter2.rb;fp=lib%2Fporter2.rb;h=3f43303a65e3e11499bd263f72cc1ee3369a8e34;hb=a222cc928502d3bb6ecd3d03c532ce7f9804acb4;hp=2236b12272ad81b2db73a637176d693406b0c761;hpb=9dd2505747fbbd6040e9516dd0ae2e8ec5c6a4d2;p=porter2stemmer.git

diff --git a/lib/porter2.rb b/lib/porter2.rb
index 2236b12..3f43303 100644
--- a/lib/porter2.rb
+++ b/lib/porter2.rb
@@ -1,377 +1,354 @@
-# coding: utf-8
-
-# Porter 2 stemmer in Ruby.
-#
-# This is the Porter 2 stemming algorithm, as described at 
-# http://snowball.tartarus.org/algorithms/english/stemmer.html
-# The original paper is:
-#
-# Porter, 1980, "An algorithm for suffix stripping", _Program_, Vol. 14,
-# no. 3, pp 130-137
-
-module Stemmable
-  # A non-vowel
-  C = "[^aeiouy]"
-
-  # A vowel
-  V = "[aeiouy]"
-
-  # A non-vowel other than w, x, or Y
-  CW = "[^aeiouywxY]"
-
-  # Doubles created when added a suffix: these are undoubled when stemmed
-  Double = "(bb|dd|ff|gg|mm|nn|pp|rr|tt)"
-
-  # A valid letter that can come before 'li'
-  Valid_LI = "[cdeghkmnrt]"
-
-  # A specification for a short syllable
-  SHORT_SYLLABLE = "((#{C}#{V}#{CW})|(^#{V}#{C}))"
-
-  # Suffix transformations used in Step 2.
-  # (ogi, li endings dealt with in procedure)
-  STEP_2_MAPS = {"tional" => "tion",
-		 "enci" => "ence",
-                 "anci" => "ance",
-                 "abli" => "able",
-                 "entli" => "ent",
-                 "ization" => "ize",
-                 "izer" => "ize",
-                 "ational" => "ate",
-                 "ation" => "ate",
-                 "ator" => "ate",
-                 "alism" => "al",
-                 "aliti" => "al",
-                 "alli" => "al",
-                 "fulness" => "ful",
-                 "ousli" => "ous",
-                 "ousness" => "ous",
-                 "iveness" => "ive",
-                 "iviti" => "ive",
-                 "biliti" => "ble",
-                 "bli" => "ble",
-                 "fulli" => "ful",
-                 "lessli" => "less" }
-
-  # Suffix transformations used in Step 3.
-  # (ative ending dealt with in procedure)  
-  STEP_3_MAPS = {"tional" => "tion",
-                 "ational" => "ate",
-                 "alize" => "al",
-                 "icate" => "ic",
-                 "iciti" => "ic",
-                 "ical" => "ic",
-                 "ful" => "",
-                 "ness" => "" }
-  
-  # Suffix transformations used in Step 4.
-  STEP_4_MAPS = {"al" => "",
-                 "ance" => "",
-                 "ence" => "",
-                 "er" => "",
-                 "ic" => "",
-                 "able" => "",
-                 "ible" => "",
-                 "ant" => "",
-                 "ement" => "",
-                 "ment" => "",
-                 "ent" => "",
-                 "ism" => "",
-                 "ate" => "",
-                 "iti" => "",
-                 "ous" => "",
-                 "ive" => "",
-                 "ize" => "" }
-  
-  # Special-case stemmings 
-  SPECIAL_CASES = {"skis" => "ski",
-                   "skies" => "sky",
-                    
-                   "dying" => "die",
-                   "lying" => "lie",
-                   "tying" => "tie",
-                   "idly" =>  "idl",
-                   "gently" => "gentl",
-                   "ugly" => "ugli",
-                   "early" => "earli",
-                   "only" => "onli",
-                   "singly" =>"singl",
-                    
-                   "sky" => "sky",
-                   "news" => "news",
-                   "howe" => "howe",
-                   "atlas" => "atlas",
-                   "cosmos" => "cosmos",
-                   "bias" => "bias",
-                   "andes" => "andes" }
-   
-  # Special case words to ignore after step 1a.
-  STEP_1A_SPECIAL_CASES = %w[ inning outing canning herring earring proceed exceed succeed ]
-
-  # Tidy up the word before we get down to the algorithm
-  def porter2_tidy
-    preword = self.to_s.strip.downcase
-    
-    # map apostrophe-like characters to apostrophes
-    preword.gsub!(/â/, "'")
-    preword.gsub!(/â/, "'")
-
-    preword
-  end
-     
-  def porter2_preprocess    
-    w = self.dup
-
-    # remove any initial apostrophe
-    w.gsub!(/^'*(.)/, '\1')
-    
-    # set initial y, or y after a vowel, to Y
-    w.gsub!(/^y/, "Y")
-    w.gsub!(/(#{V})y/, '\1Y')
-    
-    w
-  end
-    
-  # The word after the first non-vowel after the first vowel
-  def porter2_r1
-    if self =~ /^(gener|commun|arsen)(?<r1>.*)/
-      Regexp.last_match(:r1)
-    else
-      self =~ /#{V}#{C}(?<r1>.*)$/
-      Regexp.last_match(:r1) || ""
-    end
-  end
-  
-  # R1 after the first non-vowel after the first vowel
-  def porter2_r2
-    self.porter2_r1 =~ /#{V}#{C}(?<r2>.*)$/
-    Regexp.last_match(:r2) || ""
-  end
-  
-  # A short syllable in a word is either 
-  # 1. a vowel followed by a non-vowel other than w, x or Y and preceded by a non-vowel, or 
-  # 2. a vowel at the beginning of the word followed by a non-vowel. 
-  def porter2_ends_with_short_syllable?
-    self =~ /#{SHORT_SYLLABLE}$/ ? true : false
-  end
-
-  # A word is short if it ends in a short syllable, and if R1 is null
-  def porter2_is_short_word?
-    self.porter2_ends_with_short_syllable? and self.porter2_r1.empty?
-  end
-  
-  # Search for the longest among the suffixes, 
-  # * '
-  # * 's
-  # * 's'
-  # and remove if found.
-  def step_0
-    self.sub!(/(.)('s'|'s|')$/, '\1') || self
-  end
-  
-  # Remove plural suffixes
-  def step_1a
-    if self =~ /sses$/
-      self.sub(/sses$/, 'ss')
-    elsif self =~ /..(ied|ies)$/
-      self.sub(/(ied|ies)$/, 'i')
-    elsif self =~ /(ied|ies)$/
-      self.sub(/(ied|ies)$/, 'ie')
-    elsif self =~ /(us|ss)$/
-      self
-    elsif self =~ /s$/
-      if self =~ /(#{V}.+)s$/
-        self.sub(/s$/, '') 
-      else
-        self
-      end
-    else
-      self
-    end
-  end
-  
-  def step_1b(gb_english = false)
-    if self =~ /(eed|eedly)$/
-      if self.porter2_r1 =~ /(eed|eedly)$/
-        self.sub(/(eed|eedly)$/, 'ee')
-      else
-        self
-      end
-    else
-      w = self.dup
-      if w =~ /#{V}.*(ed|edly|ing|ingly)$/
-        w.sub!(/(ed|edly|ing|ingly)$/, '')
-        if w =~ /(at|lb|iz)$/
-          w += 'e' 
-        elsif w =~ /is$/ and gb_english
-          w += 'e' 
-        elsif w =~ /#{Double}$/
-	  w.chop!
-        elsif w.porter2_is_short_word?
-          w += 'e'
-        end
-      end
-      w
-    end
-  end
-
-  
-  def step_1c
-    if self =~ /.+#{C}(y|Y)$/
-      self.sub(/(y|Y)$/, 'i')
-    else
-      self
-    end
-  end
-  
-
-  def step_2(gb_english = false)
-    r1 = self.porter2_r1
-    s2m = STEP_2_MAPS.dup
-    if gb_english
-      s2m["iser"] = "ise"
-      s2m["isation"] = "ise"
-    end
-    step_2_re = Regexp.union(s2m.keys.map {|r| Regexp.new(r + "$")})
-    if self =~ step_2_re
-      if r1 =~ /#{$&}$/
-        self.sub(/#{$&}$/, s2m[$&])
-      else
-        self
-      end
-    elsif r1 =~ /li$/ and self =~ /(#{Valid_LI})li$/
-      self.sub(/li$/, '')
-    elsif r1 =~ /ogi$/ and self =~ /logi$/
-      self.sub(/ogi$/, 'og')
-    else
-      self
-    end
-  end
-     
-  
-  def step_3(gb_english = false)
-    if self =~ /ative$/ and self.porter2_r2 =~ /ative$/
-      self.sub(/ative$/, '')
-    else
-      s3m = STEP_3_MAPS.dup
-      if gb_english
-	s3m["alise"] = "al"
-      end
-      step_3_re = Regexp.union(s3m.keys.map {|r| Regexp.new(r + "$")})
-      r1 = self.porter2_r1
-      if self =~ step_3_re and r1 =~ /#{$&}$/ 
-	self.sub(/#{$&}$/, s3m[$&])
-      else
-	self
-      end
-    end
-  end
-  
-  
-  def step_4(gb_english = false)
-    if self.porter2_r2 =~ /ion$/ and self =~ /(s|t)ion$/
-      self.sub(/ion$/, '')
-    else
-      s4m = STEP_4_MAPS.dup
-      if gb_english
-        s4m["ise"] = ""
-      end
-      step_4_re = Regexp.union(s4m.keys.map {|r| Regexp.new(r + "$")})
-      r2 = self.porter2_r2
-      if self =~ step_4_re
-        if r2 =~ /#{$&}/
-          self.sub(/#{$&}$/, s4m[$&])
-        else
-          self
-        end
-      else
-        self
-      end
-    end
-  end
-
-  
-  def step_5
-    if self =~ /ll$/ and self.porter2_r2 =~ /l$/
-      self.sub(/ll$/, 'l') 
-    elsif self =~ /e$/ and self.porter2_r2 =~ /e$/ 
-      self.sub(/e$/, '') 
-    else
-      r1 = self.porter2_r1
-      if self =~ /e$/ and r1 =~ /e$/ and not self =~ /#{SHORT_SYLLABLE}e$/
-        self.sub(/e$/, '')
-      else
-        self
-      end
-    end
-  end
-  
-  
-  def porter2_postprocess
-    self.gsub(/Y/, 'y')
-  end
-
-  
-  def porter2_stem(gb_english = false)
-    preword = self.porter2_tidy
-    return preword if preword.length <= 2
-
-    word = preword.porter2_preprocess
-    
-    if SPECIAL_CASES.has_key? word
-      SPECIAL_CASES[word]
-    else
-      w1a = word.step_0.step_1a
-      if STEP_1A_SPECIAL_CASES.include? w1a 
-	w1a
-      else
-        w1a.step_1b(gb_english).step_1c.step_2(gb_english).step_3(gb_english).step_4(gb_english).step_5.porter2_postprocess
-      end
-    end
-  end  
-  
-  def porter2_stem_verbose(gb_english = false)
-    preword = self.porter2_tidy
-    puts "Preword: #{preword}"
-    return preword if preword.length <= 2
-
-    word = preword.porter2_preprocess
-    puts "Preprocessed: #{word}"
-    
-    if SPECIAL_CASES.has_key? word
-      puts "Returning #{word} as special case #{SPECIAL_CASES[word]}"
-      SPECIAL_CASES[word]
-    else
-      r1 = word.porter2_r1
-      r2 = word.porter2_r2
-      puts "R1 = #{r1}, R2 = #{r2}"
-    
-      w0 = word.step_0 ; puts "After step 0:  #{w0} (R1 = #{w0.porter2_r1}, R2 = #{w0.porter2_r2})"
-      w1a = w0.step_1a ; puts "After step 1a: #{w1a} (R1 = #{w1a.porter2_r1}, R2 = #{w1a.porter2_r2})"
-      
-      if STEP_1A_SPECIAL_CASES.include? w1a
-        puts "Returning #{w1a} as 1a special case"
-	w1a
-      else
-        w1b = w1a.step_1b(gb_english) ; puts "After step 1b: #{w1b} (R1 = #{w1b.porter2_r1}, R2 = #{w1b.porter2_r2})"
-        w1c = w1b.step_1c ; puts "After step 1c: #{w1c} (R1 = #{w1c.porter2_r1}, R2 = #{w1c.porter2_r2})"
-        w2 = w1c.step_2(gb_english) ; puts "After step 2:  #{w2} (R1 = #{w2.porter2_r1}, R2 = #{w2.porter2_r2})"
-        w3 = w2.step_3(gb_english) ; puts "After step 3:  #{w3} (R1 = #{w3.porter2_r1}, R2 = #{w3.porter2_r2})"
-        w4 = w3.step_4(gb_english) ; puts "After step 4:  #{w4} (R1 = #{w4.porter2_r1}, R2 = #{w4.porter2_r2})"
-        w5 = w4.step_5 ; puts "After step 5:  #{w5}"
-        wpost = w5.porter2_postprocess ; puts "After postprocess: #{wpost}"
-        wpost
-      end
-    end
-  end  
-  
-  alias stem porter2_stem
-
-end
-
-# Add stem method to all Strings
-class String
-  include Stemmable
-  
-  # private :porter2_preprocess, :porter2_r1, :porter2_r2
-end
+# coding: utf-8
+
+require 'porter2_module'
+
+# ==The Porter 2 stemmer
+#
+# This is the Porter 2 stemming algorithm, as described at 
+# http://snowball.tartarus.org/algorithms/english/stemmer.html
+# The original paper is:
+#
+# Porter, 1980, "An algorithm for suffix stripping", _Program_, Vol. 14,
+# no. 3, pp 130-137
+#
+# Constants for the stemmer are in the Porter2 module.
+#
+# Procedures that implement the stemmer are added to the String class.
+# 
+# The stemmer algorithm is implemented in the porter2_stem procedure.
+# 
+# ==Internationalisation
+# There isn't much, as this is a stemmer that only works for English.
+#
+# The +gb_english+ flag to the various procedures allows the stemmer to treat the British 
+# English '-ise' the same as the American English '-ize'.
+#
+# ==Longest suffixes
+# Several places in the algorithm require matching the longest suffix of a word. The 
+# regexp engine in Ruby 1.9 seems to handle alterntives in regexps by finding the 
+# alternative that matches at the first position in the string. As we're only talking 
+# about suffixes, that first match is also the longest suffix. If the regexp engine changes,
+# this behaviour may change and break the stemmer.
+
+class String
+  # Tidy up the word before we get down to the algorithm
+  def porter2_tidy
+    preword = self.to_s.strip.downcase
+    
+    # map apostrophe-like characters to apostrophes
+    preword.gsub!(/â/, "'")
+    preword.gsub!(/â/, "'")
+
+    preword
+  end
+     
+
+  # Preprocess the word. 
+  # Remove any initial ', if present. Then, set initial y, or y after a vowel, to Y
+  #
+  # (The comment to 'establish the regions R1 and R2' in the original description 
+  # is an implementation optimisation that identifies where the regions start. As
+  # no modifications are made to the word that affect those positions, you may want
+  # to cache them now. This implementation doesn't do that.)
+  def porter2_preprocess    
+    w = self.dup
+
+    # remove any initial apostrophe
+    w.gsub!(/^'*(.)/, '\1')
+    
+    # set initial y, or y after a vowel, to Y
+    w.gsub!(/^y/, "Y")
+    w.gsub!(/(#{Porter2::V})y/, '\1Y')
+    
+    w
+  end
+    
+
+  # R1 is the portion of the word after the first non-vowel after the first vowel
+  # (with words beginning 'gener-', 'commun-', and 'arsen-' treated as special cases
+  def porter2_r1
+    if self =~ /^(gener|commun|arsen)(?<r1>.*)/
+      Regexp.last_match(:r1)
+    else
+      self =~ /#{Porter2::V}#{Porter2::C}(?<r1>.*)$/
+      Regexp.last_match(:r1) || ""
+    end
+  end
+
+
+  # R2 is the portion of R1 (porter2_r1) after the first non-vowel after the first vowel
+  def porter2_r2
+    self.porter2_r1 =~ /#{Porter2::V}#{Porter2::C}(?<r2>.*)$/
+    Regexp.last_match(:r2) || ""
+  end
+  
+
+  # Returns true if the word ends with a short syllable
+  def porter2_ends_with_short_syllable?
+    self =~ /#{Porter2::SHORT_SYLLABLE}$/ ? true : false
+  end
+
+
+  # A word is short if it ends in a short syllable, and R1 is null
+  def porter2_is_short_word?
+    self.porter2_ends_with_short_syllable? and self.porter2_r1.empty?
+  end
+  
+
+  # Search for the longest among the suffixes, 
+  # * '
+  # * 's
+  # * 's'
+  # and remove if found.
+  def porter2_step0
+    self.sub!(/(.)('s'|'s|')$/, '\1') || self
+  end
+  
+
+  # Search for the longest among the following suffixes, and perform the action indicated. 
+  # sses:: replace by ss 
+  # ied, ies:: replace by i if preceded by more than one letter, otherwise by ie
+  # s:: delete if the preceding word part contains a vowel not immediately before the s
+  # us, ss:: do nothing
+  def porter2_step1a
+    if self =~ /sses$/
+      self.sub(/sses$/, 'ss')
+    elsif self =~ /..(ied|ies)$/
+      self.sub(/(ied|ies)$/, 'i')
+    elsif self =~ /(ied|ies)$/
+      self.sub(/(ied|ies)$/, 'ie')
+    elsif self =~ /(us|ss)$/
+      self
+    elsif self =~ /s$/
+      if self =~ /(#{Porter2::V}.+)s$/
+        self.sub(/s$/, '') 
+      else
+        self
+      end
+    else
+      self
+    end
+  end
+  
+
+  # Search for the longest among the following suffixes, and perform the action indicated. 
+  # eed, eedly:: replace by ee if the suffix is also in R1 
+  # ed, edly, ing, ingly:: delete if the preceding word part contains a vowel and, 
+  #                        after the deletion:
+  #                        * if the word ends at, bl or iz: add e, or
+  #                        * if the word ends with a double: remove the last letter, or
+  #                        * if the word is short: add e
+  # 
+  # (If gb_english is +true+, treat the 'is' suffix as 'iz' above.)
+  def porter2_step1b(gb_english = false)
+    if self =~ /(eed|eedly)$/
+      if self.porter2_r1 =~ /(eed|eedly)$/
+        self.sub(/(eed|eedly)$/, 'ee')
+      else
+        self
+      end
+    else
+      w = self.dup
+      if w =~ /#{Porter2::V}.*(ed|edly|ing|ingly)$/
+        w.sub!(/(ed|edly|ing|ingly)$/, '')
+        if w =~ /(at|lb|iz)$/
+          w += 'e' 
+        elsif w =~ /is$/ and gb_english
+          w += 'e' 
+        elsif w =~ /#{Porter2::Double}$/
+	  w.chop!
+        elsif w.porter2_is_short_word?
+          w += 'e'
+        end
+      end
+      w
+    end
+  end
+
+
+  # Replace a suffix of y or Y by i if it is preceded by a non-vowel which is 
+  # not the first letter of the word.
+  def porter2_step1c
+    if self =~ /.+#{Porter2::C}(y|Y)$/
+      self.sub(/(y|Y)$/, 'i')
+    else
+      self
+    end
+  end
+  
+
+  # Search for the longest among the suffixes listed in the keys of Porter2::STEP_2_MAPS. 
+  # If one is found and that suffix occurs in R1, replace it with the value 
+  # found in STEP_2_MAPS.
+  #
+  # (Suffixes 'ogi' and 'li' are treated as special cases in the procedure.)
+  # 
+  # (If gb_english is +true+, replace the 'iser' and 'isation' suffixes with
+  # 'ise', similarly to how 'izer' and 'ization' are treated.)
+  def porter2_step2(gb_english = false)
+    r1 = self.porter2_r1
+    s2m = Porter2::STEP_2_MAPS.dup
+    if gb_english
+      s2m["iser"] = "ise"
+      s2m["isation"] = "ise"
+    end
+    step_2_re = Regexp.union(s2m.keys.map {|r| Regexp.new(r + "$")})
+    if self =~ step_2_re
+      if r1 =~ /#{$&}$/
+        self.sub(/#{$&}$/, s2m[$&])
+      else
+        self
+      end
+    elsif r1 =~ /li$/ and self =~ /(#{Porter2::Valid_LI})li$/
+      self.sub(/li$/, '')
+    elsif r1 =~ /ogi$/ and self =~ /logi$/
+      self.sub(/ogi$/, 'og')
+    else
+      self
+    end
+  end
+     
+
+  # Search for the longest among the suffixes listed in the keys of Porter2::STEP_3_MAPS. 
+  # If one is found and that suffix occurs in R1, replace it with the value 
+  # found in STEP_3_MAPS.
+  #
+  # (Suffix 'ative' is treated as a special case in the procedure.)
+  # 
+  # (If gb_english is +true+, replace the 'alise' suffix with
+  # 'al', similarly to how 'alize' is treated.)
+  def porter2_step3(gb_english = false)
+    if self =~ /ative$/ and self.porter2_r2 =~ /ative$/
+      self.sub(/ative$/, '')
+    else
+      s3m = Porter2::STEP_3_MAPS.dup
+      if gb_english
+	s3m["alise"] = "al"
+      end
+      step_3_re = Regexp.union(s3m.keys.map {|r| Regexp.new(r + "$")})
+      r1 = self.porter2_r1
+      if self =~ step_3_re and r1 =~ /#{$&}$/ 
+	self.sub(/#{$&}$/, s3m[$&])
+      else
+	self
+      end
+    end
+  end
+  
+
+  # Search for the longest among the suffixes listed in the keys of Porter2::STEP_4_MAPS. 
+  # If one is found and that suffix occurs in R2, replace it with the value 
+  # found in STEP_4_MAPS.
+  #
+  # (Suffix 'ion' is treated as a special case in the procedure.)
+  # 
+  # (If gb_english is +true+, delete the 'ise' suffix if found.)
+  def porter2_step4(gb_english = false)
+    if self.porter2_r2 =~ /ion$/ and self =~ /(s|t)ion$/
+      self.sub(/ion$/, '')
+    else
+      s4m = Porter2::STEP_4_MAPS.dup
+      if gb_english
+        s4m["ise"] = ""
+      end
+      step_4_re = Regexp.union(s4m.keys.map {|r| Regexp.new(r + "$")})
+      r2 = self.porter2_r2
+      if self =~ step_4_re
+        if r2 =~ /#{$&}/
+          self.sub(/#{$&}$/, s4m[$&])
+        else
+          self
+        end
+      else
+        self
+      end
+    end
+  end
+
+
+  # Search for the the following suffixes, and, if found, perform the action indicated. 
+  # e:: delete if in R2, or in R1 and not preceded by a short syllable
+  # l:: delete if in R2 and preceded by l
+  def porter2_step5
+    if self =~ /ll$/ and self.porter2_r2 =~ /l$/
+      self.sub(/ll$/, 'l') 
+    elsif self =~ /e$/ and self.porter2_r2 =~ /e$/ 
+      self.sub(/e$/, '') 
+    else
+      r1 = self.porter2_r1
+      if self =~ /e$/ and r1 =~ /e$/ and not self =~ /#{Porter2::SHORT_SYLLABLE}e$/
+        self.sub(/e$/, '')
+      else
+        self
+      end
+    end
+  end
+  
+
+  # Turn all Y letters into y
+  def porter2_postprocess
+    self.gsub(/Y/, 'y')
+  end
+
+  public
+  
+  # Perform the stemming procedure. If +gb_english+ is true, treat '-ise' and similar suffixes
+  # as '-ize' in American English.
+  def porter2_stem(gb_english = false)
+    preword = self.porter2_tidy
+    return preword if preword.length <= 2
+
+    word = preword.porter2_preprocess
+    
+    if Porter2::SPECIAL_CASES.has_key? word
+      Porter2::SPECIAL_CASES[word]
+    else
+      w1a = word.porter2_step0.porter2_step1a
+      if Porter2::STEP_1A_SPECIAL_CASES.include? w1a 
+	w1a
+      else
+        w1a.porter2_step1b(gb_english).porter2_step1c.porter2_step2(gb_english).porter2_step3(gb_english).porter2_step4(gb_english).porter2_step5.porter2_postprocess
+      end
+    end
+  end  
+  
+  # A verbose version of porter2_stem that prints the output of each stage to STDOUT
+  def porter2_stem_verbose(gb_english = false)
+    preword = self.porter2_tidy
+    puts "Preword: #{preword}"
+    return preword if preword.length <= 2
+
+    word = preword.porter2_preprocess
+    puts "Preprocessed: #{word}"
+    
+    if Porter2::SPECIAL_CASES.has_key? word
+      puts "Returning #{word} as special case #{Porter2::SPECIAL_CASES[word]}"
+      Porter2::SPECIAL_CASES[word]
+    else
+      r1 = word.porter2_r1
+      r2 = word.porter2_r2
+      puts "R1 = #{r1}, R2 = #{r2}"
+    
+      w0 = word.porter2_step0 ; puts "After step 0:  #{w0} (R1 = #{w0.porter2_r1}, R2 = #{w0.porter2_r2})"
+      w1a = w0.porter2_step1a ; puts "After step 1a: #{w1a} (R1 = #{w1a.porter2_r1}, R2 = #{w1a.porter2_r2})"
+      
+      if Porter2::STEP_1A_SPECIAL_CASES.include? w1a
+        puts "Returning #{w1a} as 1a special case"
+	w1a
+      else
+        w1b = w1a.porter2_step1b(gb_english) ; puts "After step 1b: #{w1b} (R1 = #{w1b.porter2_r1}, R2 = #{w1b.porter2_r2})"
+        w1c = w1b.porter2_step1c ; puts "After step 1c: #{w1c} (R1 = #{w1c.porter2_r1}, R2 = #{w1c.porter2_r2})"
+        w2 = w1c.porter2_step2(gb_english) ; puts "After step 2:  #{w2} (R1 = #{w2.porter2_r1}, R2 = #{w2.porter2_r2})"
+        w3 = w2.porter2_step3(gb_english) ; puts "After step 3:  #{w3} (R1 = #{w3.porter2_r1}, R2 = #{w3.porter2_r2})"
+        w4 = w3.porter2_step4(gb_english) ; puts "After step 4:  #{w4} (R1 = #{w4.porter2_r1}, R2 = #{w4.porter2_r2})"
+        w5 = w4.porter2_step5 ; puts "After step 5:  #{w5}"
+        wpost = w5.porter2_postprocess ; puts "After postprocess: #{wpost}"
+        wpost
+      end
+    end
+  end  
+  
+  alias stem porter2_stem
+
+end
+