Updated documentation
authorNeil Smith <neil.d1@njae.me.uk>
Fri, 11 Mar 2011 12:08:28 +0000 (12:08 +0000)
committerNeil Smith <neil.d1@njae.me.uk>
Fri, 11 Mar 2011 12:08:28 +0000 (12:08 +0000)
15 files changed:
Readme.rdoc [new file with mode: 0644]
doc/Porter2.html [new file with mode: 0644]
doc/Readme_rdoc.html [new file with mode: 0644]
doc/String.html
doc/TestPorter2.html
doc/created.rid
doc/index.html
doc/lib/porter2_constants_rb.html [new file with mode: 0644]
doc/lib/porter2_implementation_rb.html [new file with mode: 0644]
doc/lib/porter2_module_rb.html [new file with mode: 0644]
doc/lib/porter2_rb.html
doc/lib/porter2_string_rb.html [new file with mode: 0644]
lib/porter2.rb
lib/porter2_constants.rb [new file with mode: 0644]
lib/porter2_implementation.rb [new file with mode: 0644]

diff --git a/Readme.rdoc b/Readme.rdoc
new file mode 100644 (file)
index 0000000..4ed0e6f
--- /dev/null
@@ -0,0 +1,53 @@
+# coding: utf-8
+
+# ==The Porter 2 stemmer
+# This is the Porter 2 stemming algorithm, as described at 
+# http://snowball.tartarus.org/algorithms/english/stemmer.html
+# The original paper is:
+#
+# Porter, 1980, "An algorithm for suffix stripping", _Program_, Vol. 14,
+# no. 3, pp 130-137
+#
+# ==Features of this implementation
+# This stemmer is written in pure Ruby, making it easy to modify for language variants. 
+# For instance, the original Porter stemmer only works for American English and does
+# not recognise British English's '-ise' as an alternate spelling of '-ize'. This 
+# implementation has been extended to handle correctly British English.
+#
+# This stemmer also features a comprehensive test set of over 29,000 words, taken from the 
+# {Porter 2 stemmer website}[http://snowball.tartarus.org/algorithms/english/stemmer.html].
+#
+# ==Files
+# Constants for the stemmer are in the Porter2 module.
+#
+# Procedures that implement the stemmer are added to the String class.
+# 
+# The stemmer algorithm is implemented in the String#porter2_stem procedure.
+#
+# ==Internationalisation
+# There isn't much, as this is a stemmer that only works for English.
+#
+# The +gb_english+ flag to the various procedures allows the stemmer to treat the British 
+# English '-ise' the same as the American English '-ize'.
+#
+# ==Longest suffixes
+# Several places in the algorithm require matching the longest suffix of a word. The 
+# regexp engine in Ruby 1.9 seems to handle alterntives in regexps by finding the 
+# alternative that matches at the first position in the string. As we're only talking 
+# about suffixes, that first match is also the longest suffix. If the regexp engine changes,
+# this behaviour may change and break the stemmer.
+#
+# ==Usage
+# Call the String#porter2_stem or String#stem methods on a string to return its stem
+#  "consistency".stem       # => "consist"
+#  "knitting".stem          # => "knit"
+#  "articulated".stem       # => "articul"
+#  "nationalize".stem       # => "nation"
+#  "nationalise".stem       # => "nationalis"
+#  "nationalise".stem(true) # => "nation"
+#
+# ==Author
+# The Porter 2 stemming algorithm was developed by 
+# {Martin Porter}[http://snowball.tartarus.org/algorithms/english/stemmer.html]. 
+# This implementation is by {Neil Smith}[http://www.njae.me.uk].
+
diff --git a/doc/Porter2.html b/doc/Porter2.html
new file mode 100644 (file)
index 0000000..9b9e84b
--- /dev/null
@@ -0,0 +1,251 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
+       "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
+<head>
+       <meta content="text/html; charset=utf-8" http-equiv="Content-Type" />
+
+       <title>Module: Porter2</title>
+
+       <link rel="stylesheet" href="./rdoc.css" type="text/css" media="screen" />
+
+       <script src="./js/jquery.js" type="text/javascript"
+               charset="utf-8"></script>
+       <script src="./js/thickbox-compressed.js" type="text/javascript"
+               charset="utf-8"></script>
+       <script src="./js/quicksearch.js" type="text/javascript"
+               charset="utf-8"></script>
+       <script src="./js/darkfish.js" type="text/javascript"
+               charset="utf-8"></script>
+
+</head>
+<body class="module">
+
+       <div id="metadata">
+               <div id="home-metadata">
+                       <div id="home-section" class="section">
+        <h3 class="section-header">
+          <a href="./index.html">Home</a>
+          <a href="./index.html#classes">Classes</a>
+          <a href="./index.html#methods">Methods</a>
+        </h3>
+                       </div>
+               </div>
+
+               <div id="file-metadata">
+                       <div id="file-list-section" class="section">
+                               <h3 class="section-header">In Files</h3>
+                               <div class="section-body">
+                                       <ul>
+                                       
+                                               <li><a href="./lib/porter2_constants_rb.html?TB_iframe=true&amp;height=550&amp;width=785"
+                                                       class="thickbox" title="lib/porter2_constants.rb">lib/porter2_constants.rb</a></li>
+                                       
+                                       </ul>
+                               </div>
+                       </div>
+
+                       
+               </div>
+
+               <div id="class-metadata">
+
+                       <!-- Parent Class -->
+                       
+
+                       <!-- Namespace Contents -->
+                       
+
+                       <!-- Method Quickref -->
+                       
+
+                       <!-- Included Modules -->
+                       
+               </div>
+
+               <div id="project-metadata">
+                       
+                       
+                       <div id="fileindex-section" class="section project-section">
+                               <h3 class="section-header">Files</h3>
+                               <ul>
+                               
+                                       <li class="file"><a href="./Readme_rdoc.html">Readme.rdoc</a></li>
+                               
+                               </ul>
+                       </div>
+                       
+
+                       <div id="classindex-section" class="section project-section">
+                               <h3 class="section-header">Class Index
+                                       <span class="search-toggle"><img src="./images/find.png"
+                                               height="16" width="16" alt="[+]"
+                                               title="show/hide quicksearch" /></span></h3>
+                               <form action="#" method="get" accept-charset="utf-8" class="initially-hidden">
+                               <fieldset>
+                                       <legend>Quicksearch</legend>
+                                       <input type="text" name="quicksearch" value=""
+                                               class="quicksearch-field" />
+                               </fieldset>
+                               </form>
+
+                               <ul class="link-list">
+                               
+                                       <li><a href="./Porter2.html">Porter2</a></li>
+                               
+                                       <li><a href="./String.html">String</a></li>
+                               
+                                       <li><a href="./TestPorter2.html">TestPorter2</a></li>
+                               
+                               </ul>
+                               <div id="no-class-search-results" style="display: none;">No matching classes.</div>
+                       </div>
+
+                       
+               </div>
+       </div>
+
+       <div id="documentation">
+               <h1 class="module">Porter2</h1>
+
+               <div id="description">
+                       <p>
+Constants for the Porter 2 stemmer\r
+</p>
+
+               </div>
+
+               <!-- Constants -->
+               
+               <div id="constants-list" class="section">
+                       <h3 class="section-header">Constants</h3>
+                       <dl>
+                       
+                               <dt><a name="C">C</a></dt>
+                               
+                               <dd class="description"><p>
+A non-vowel\r
+</p></dd>
+                               
+                       
+                               <dt><a name="V">V</a></dt>
+                               
+                               <dd class="description"><p>
+A vowel: a e i o u y\r
+</p></dd>
+                               
+                       
+                               <dt><a name="CW">CW</a></dt>
+                               
+                               <dd class="description"><p>
+A non-vowel other than w, x, or Y\r
+</p></dd>
+                               
+                       
+                               <dt><a name="Double">Double</a></dt>
+                               
+                               <dd class="description"><p>
+Doubles created when adding a suffix: these are undoubled when stemmed\r
+</p></dd>
+                               
+                       
+                               <dt><a name="Valid_LI">Valid_LI</a></dt>
+                               
+                               <dd class="description"><p>
+A valid letter that can come before &#8216;li&#8217; (or &#8216;ly&#8217;)\r
+</p></dd>
+                               
+                       
+                               <dt><a name="SHORT_SYLLABLE">SHORT_SYLLABLE</a></dt>
+                               
+                               <dd class="description"><p>
+A specification for a short syllable.\r
+</p>
+<p>
+A short syllable in a word is either: \r
+</p>
+<ol>
+<li><p>
+a vowel followed by a non-vowel other than w, x or Y and preceded by a
+non-vowel, or \r
+</p>
+</li>
+<li><p>
+a vowel at the beginning of the word followed by a non-vowel.\r
+</p>
+</li>
+</ol>
+<p>
+(The original document is silent on whether sequences of two or more
+non-vowels make a\r syllable long. But as this specification is only used to
+find sequences of non-vowel -\r vowel - non-vowel - end-of-word, this
+ambiguity does not have an effect.)\r
+</p></dd>
+                               
+                       
+                               <dt><a name="STEP_2_MAPS">STEP_2_MAPS</a></dt>
+                               
+                               <dd class="description"><p>
+Suffix transformations used in porter2_step2.\r (ogi, li endings dealt with
+in procedure)\r
+</p></dd>
+                               
+                       
+                               <dt><a name="STEP_3_MAPS">STEP_3_MAPS</a></dt>
+                               
+                               <dd class="description"><p>
+Suffix transformations used in porter2_step3.\r (ative ending dealt with in
+procedure)  \r
+</p></dd>
+                               
+                       
+                               <dt><a name="STEP_4_MAPS">STEP_4_MAPS</a></dt>
+                               
+                               <dd class="description"><p>
+Suffix transformations used in porter2_step4.\r (ion ending dealt with in
+procedure)\r
+</p></dd>
+                               
+                       
+                               <dt><a name="SPECIAL_CASES">SPECIAL_CASES</a></dt>
+                               
+                               <dd class="description"><p>
+Special-case stemmings \r
+</p></dd>
+                               
+                       
+                               <dt><a name="STEP_1A_SPECIAL_CASES">STEP_1A_SPECIAL_CASES</a></dt>
+                               
+                               <dd class="description"><p>
+Special case words to stop processing after step 1a.\r
+</p></dd>
+                               
+                       
+                       </dl>
+               </div>
+               
+
+               <!-- Attributes -->
+               
+
+               <!-- Methods -->
+               
+
+       </div>
+
+
+       <div id="rdoc-debugging-section-dump" class="debugging-section">
+       
+               <p>Disabled; run with --debug to generate this.</p>
+       
+       </div>
+
+       <div id="validator-badges">
+               <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
+               <p><small>Generated with the <a href="http://deveiate.org/projects/Darkfish-Rdoc/">Darkfish
+                       Rdoc Generator</a> 1.1.6</small>.</p>
+       </div>
+
+</body>
+</html>
+
diff --git a/doc/Readme_rdoc.html b/doc/Readme_rdoc.html
new file mode 100644 (file)
index 0000000..ccb076c
--- /dev/null
@@ -0,0 +1,165 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
+       "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
+<head>
+       <meta content="text/html; charset=utf-8" http-equiv="Content-Type" />
+
+       <title>File: Readme.rdoc [RDoc Documentation]</title>
+
+       <link type="text/css" media="screen" href="./rdoc.css" rel="stylesheet" />
+
+       <script src="./js/jquery.js" type="text/javascript"
+               charset="utf-8"></script>
+       <script src="./js/thickbox-compressed.js" type="text/javascript"
+               charset="utf-8"></script>
+       <script src="./js/quicksearch.js" type="text/javascript"
+               charset="utf-8"></script>
+       <script src="./js/darkfish.js" type="text/javascript"
+               charset="utf-8"></script>
+</head>
+
+<body class="file">
+       <div id="metadata">
+               <div id="home-metadata">
+                       <div id="home-section" class="section">
+        <h3 class="section-header">
+          <a href="./index.html">Home</a>
+          <a href="./index.html#classes">Classes</a>
+          <a href="./index.html#methods">Methods</a>
+        </h3>
+                       </div>
+               </div>
+
+               <div id="project-metadata">
+                       
+                       
+                       <div id="fileindex-section" class="section project-section">
+                               <h3 class="section-header">Files</h3>
+                               <ul>
+                               
+                                       <li class="file"><a href="./Readme_rdoc.html">Readme.rdoc</a></li>
+                               
+                               </ul>
+                       </div>
+                       
+
+                       <div id="classindex-section" class="section project-section">
+                               <h3 class="section-header">Class Index
+                                       <span class="search-toggle"><img src="./images/find.png"
+                                               height="16" width="16" alt="[+]"
+                                               title="show/hide quicksearch" /></span></h3>
+                               <form action="#" method="get" accept-charset="utf-8" class="initially-hidden">
+                               <fieldset>
+                                       <legend>Quicksearch</legend>
+                                       <input type="text" name="quicksearch" value=""
+                                               class="quicksearch-field" />
+                               </fieldset>
+                               </form>
+
+                               <ul class="link-list">
+                               
+                                       <li><a href="./Porter2.html">Porter2</a></li>
+                               
+                                       <li><a href="./String.html">String</a></li>
+                               
+                                       <li><a href="./TestPorter2.html">TestPorter2</a></li>
+                               
+                               </ul>
+                               <div id="no-class-search-results" style="display: none;">No matching classes.</div>
+                       </div>
+
+                       
+               </div>
+       </div>
+
+       <div id="documentation">
+               <h2>The Porter 2 stemmer</h2>
+<p>
+This is the Porter 2 stemming algorithm, as described at  <a
+href="http://snowball.tartarus.org/algorithms/english/stemmer.html">snowball.tartarus.org/algorithms/english/stemmer.html</a>
+The original paper is:
+</p>
+<p>
+Porter, 1980, &#8220;An algorithm for suffix stripping&#8221;,
+<em>Program</em>, Vol. 14, no. 3, pp 130-137
+</p>
+<h2>Features of this implementation</h2>
+<p>
+This stemmer is written in pure Ruby, making it easy to modify for language
+variants.  For instance, the original Porter stemmer only works for
+American English and does not recognise British English&#8217;s
+&#8217;-ise&#8217; as an alternate spelling of &#8217;-ize&#8217;. This 
+implementation has been extended to handle correctly British English.
+</p>
+<p>
+This stemmer also features a comprehensive test set of over 29,000 words,
+taken from the  <a
+href="http://snowball.tartarus.org/algorithms/english/stemmer.html">Porter
+2 stemmer website</a>.
+</p>
+<h2>Files</h2>
+<p>
+Constants for the stemmer are in the <a href="Porter2.html">Porter2</a>
+module.
+</p>
+<p>
+Procedures that implement the stemmer are added to the <a
+href="String.html">String</a> class.
+</p>
+<p>
+The stemmer algorithm is implemented in the <a
+href="String.html#method-i-porter2_stem">String#porter2_stem</a> procedure.
+</p>
+<h2>Internationalisation</h2>
+<p>
+There isn&#8217;t much, as this is a stemmer that only works for English.
+</p>
+<p>
+The <tt>gb_english</tt> flag to the various procedures allows the stemmer
+to treat the British  English &#8217;-ise&#8217; the same as the American
+English &#8217;-ize&#8217;.
+</p>
+<h2>Longest suffixes</h2>
+<p>
+Several places in the algorithm require matching the longest suffix of a
+word. The  regexp engine in Ruby 1.9 seems to handle alterntives in regexps
+by finding the  alternative that matches at the first position in the
+string. As we&#8217;re only talking  about suffixes, that first match is
+also the longest suffix. If the regexp engine changes, this behaviour may
+change and break the stemmer.
+</p>
+<h2>Usage</h2>
+<p>
+Call the <a
+href="String.html#method-i-porter2_stem">String#porter2_stem</a> or <a
+href="String.html#method-i-stem">String#stem</a> methods on a string to
+return its stem
+</p>
+<pre>
+ &quot;consistency&quot;.stem       # =&gt; &quot;consist&quot;
+ &quot;knitting&quot;.stem          # =&gt; &quot;knit&quot;
+ &quot;articulated&quot;.stem       # =&gt; &quot;articul&quot;
+ &quot;nationalize&quot;.stem       # =&gt; &quot;nation&quot;
+ &quot;nationalise&quot;.stem       # =&gt; &quot;nationalis&quot;
+ &quot;nationalise&quot;.stem(true) # =&gt; &quot;nation&quot;
+</pre>
+<h2>Author</h2>
+<p>
+The Porter 2 stemming algorithm was developed by  <a
+href="http://snowball.tartarus.org/algorithms/english/stemmer.html">Martin
+Porter</a>.  This implementation is by <a href="http://www.njae.me.uk">Neil
+Smith</a>.
+</p>
+
+       </div>
+
+       <div id="validator-badges">
+               <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
+               <p><small>Generated with the <a href="http://deveiate.org/projects/Darkfish-Rdoc/">Darkfish
+                       Rdoc Generator</a> 1.1.6</small>.</p>
+       </div>
+</body>
+</html>
+
index e0921af7f555c9e878e958a8eb0df0b0bf313e72..f04ae9aa7771b3591944e48aa30427256ac4f35d 100644 (file)
@@ -38,8 +38,8 @@
                                <div class="section-body">
                                        <ul>
                                        
-                                               <li><a href="./lib/porter2_rb.html?TB_iframe=true&amp;height=550&amp;width=785"
-                                                       class="thickbox" title="lib/porter2.rb">lib/porter2.rb</a></li>
+                                               <li><a href="./lib/porter2_implementation_rb.html?TB_iframe=true&amp;height=550&amp;width=785"
+                                                       class="thickbox" title="lib/porter2_implementation.rb">lib/porter2_implementation.rb</a></li>
                                        
                                        </ul>
                                </div>
                <div id="project-metadata">
                        
                        
+                       <div id="fileindex-section" class="section project-section">
+                               <h3 class="section-header">Files</h3>
+                               <ul>
+                               
+                                       <li class="file"><a href="./Readme_rdoc.html">Readme.rdoc</a></li>
+                               
+                               </ul>
+                       </div>
+                       
 
                        <div id="classindex-section" class="section project-section">
                                <h3 class="section-header">Class Index
                <h1 class="class">String</h1>
 
                <div id="description">
-                       <h2>The Porter 2 stemmer</h2>
-<p>
-This is the Porter 2 stemming algorithm, as described at  <a
-href="http://snowball.tartarus.org/algorithms/english/stemmer.html">snowball.tartarus.org/algorithms/english/stemmer.html</a>
-The original paper is:
-</p>
-<p>
-Porter, 1980, &#8220;An algorithm for suffix stripping&#8221;,
-<em>Program</em>, Vol. 14, no. 3, pp 130-137
-</p>
-<p>
-Constants for the stemmer are in the <a href="Porter2.html">Porter2</a>
-module.
-</p>
-<p>
-Procedures that implement the stemmer are added to the <a
-href="String.html">String</a> class.
-</p>
-<p>
-The stemmer algorithm is implemented in the <a
-href="String.html#method-i-porter2_stem">porter2_stem</a> procedure.
-</p>
-<h2>Internationalisation</h2>
-<p>
-There isn&#8217;t much, as this is a stemmer that only works for English.
-</p>
-<p>
-The <tt>gb_english</tt> flag to the various procedures allows the stemmer
-to treat the British  English &#8217;-ise&#8217; the same as the American
-English &#8217;-ize&#8217;.
-</p>
-<h2>Longest suffixes</h2>
-<p>
-Several places in the algorithm require matching the longest suffix of a
-word. The  regexp engine in Ruby 1.9 seems to handle alterntives in regexps
-by finding the  alternative that matches at the first position in the
-string. As we&#8217;re only talking  about suffixes, that first match is
-also the longest suffix. If the regexp engine changes, this behaviour may
-change and break the stemmer.
+                       <p>
+Implementation of the Porter 2 stemmer. <a
+href="String.html#method-i-porter2_stem">String#porter2_stem</a> is the
+main stemming procedure.
 </p>
 
                </div>
@@ -227,10 +201,10 @@ Returns true if the word ends with a short syllable
                                        <div class="method-source-code"
                                                id="porter-ends-with-short-syllable--source">
 <pre>
-    <span class="ruby-comment cmt"># File lib/porter2.rb, line 87</span>
-87:   <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_ends_with_short_syllable?</span>
-88:     <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-node">/#{Porter2::SHORT_SYLLABLE}$/</span> <span class="ruby-operator">?</span> <span class="ruby-keyword kw">true</span> <span class="ruby-operator">:</span> <span class="ruby-keyword kw">false</span>
-89:   <span class="ruby-keyword kw">end</span></pre>
+    <span class="ruby-comment cmt"># File lib/porter2_implementation.rb, line 59</span>
+59:   <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_ends_with_short_syllable?</span>
+60:     <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-node">/#{Porter2::SHORT_SYLLABLE}$/</span> <span class="ruby-operator">?</span> <span class="ruby-keyword kw">true</span> <span class="ruby-operator">:</span> <span class="ruby-keyword kw">false</span>
+61:   <span class="ruby-keyword kw">end</span></pre>
                                        </div>
                                        
                                </div>
@@ -263,10 +237,10 @@ A word is short if it ends in a short syllable, and R1 is null
                                        <div class="method-source-code"
                                                id="porter-is-short-word--source">
 <pre>
-    <span class="ruby-comment cmt"># File lib/porter2.rb, line 93</span>
-93:   <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_is_short_word?</span>
-94:     <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_ends_with_short_syllable?</span> <span class="ruby-keyword kw">and</span> <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_r1</span>.<span class="ruby-identifier">empty?</span>
-95:   <span class="ruby-keyword kw">end</span></pre>
+    <span class="ruby-comment cmt"># File lib/porter2_implementation.rb, line 65</span>
+65:   <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_is_short_word?</span>
+66:     <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_ends_with_short_syllable?</span> <span class="ruby-keyword kw">and</span> <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_r1</span>.<span class="ruby-identifier">empty?</span>
+67:   <span class="ruby-keyword kw">end</span></pre>
                                        </div>
                                        
                                </div>
@@ -299,10 +273,10 @@ Turn all Y letters into y
                                        <div class="method-source-code"
                                                id="porter-postprocess-source">
 <pre>
-     <span class="ruby-comment cmt"># File lib/porter2.rb, line 289</span>
-289:   <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_postprocess</span>
-290:     <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">gsub</span>(<span class="ruby-regexp re">/Y/</span>, <span class="ruby-value str">'y'</span>)
-291:   <span class="ruby-keyword kw">end</span></pre>
+     <span class="ruby-comment cmt"># File lib/porter2_implementation.rb, line 261</span>
+261:   <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_postprocess</span>
+262:     <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">gsub</span>(<span class="ruby-regexp re">/Y/</span>, <span class="ruby-value str">'y'</span>)
+263:   <span class="ruby-keyword kw">end</span></pre>
                                        </div>
                                        
                                </div>
@@ -343,19 +317,19 @@ doesn&#8217;t do that.)
                                        <div class="method-source-code"
                                                id="porter-preprocess-source">
 <pre>
-    <span class="ruby-comment cmt"># File lib/porter2.rb, line 53</span>
-53:   <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_preprocess</span>    
-54:     <span class="ruby-identifier">w</span> = <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">dup</span>
-55
-56:     <span class="ruby-comment cmt"># remove any initial apostrophe</span>
-57:     <span class="ruby-identifier">w</span>.<span class="ruby-identifier">gsub!</span>(<span class="ruby-regexp re">/^'*(.)/</span>, <span class="ruby-value str">'\1'</span>)
-58:     
-59:     <span class="ruby-comment cmt"># set initial y, or y after a vowel, to Y</span>
-60:     <span class="ruby-identifier">w</span>.<span class="ruby-identifier">gsub!</span>(<span class="ruby-regexp re">/^y/</span>, <span class="ruby-value str">&quot;Y&quot;</span>)
-61:     <span class="ruby-identifier">w</span>.<span class="ruby-identifier">gsub!</span>(<span class="ruby-node">/(#{Porter2::V})y/</span>, <span class="ruby-value str">'\1Y'</span>)
-62:     
-63:     <span class="ruby-identifier">w</span>
-64:   <span class="ruby-keyword kw">end</span></pre>
+    <span class="ruby-comment cmt"># File lib/porter2_implementation.rb, line 25</span>
+25:   <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_preprocess</span>    
+26:     <span class="ruby-identifier">w</span> = <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">dup</span>
+27
+28:     <span class="ruby-comment cmt"># remove any initial apostrophe</span>
+29:     <span class="ruby-identifier">w</span>.<span class="ruby-identifier">gsub!</span>(<span class="ruby-regexp re">/^'*(.)/</span>, <span class="ruby-value str">'\1'</span>)
+30:     
+31:     <span class="ruby-comment cmt"># set initial y, or y after a vowel, to Y</span>
+32:     <span class="ruby-identifier">w</span>.<span class="ruby-identifier">gsub!</span>(<span class="ruby-regexp re">/^y/</span>, <span class="ruby-value str">&quot;Y&quot;</span>)
+33:     <span class="ruby-identifier">w</span>.<span class="ruby-identifier">gsub!</span>(<span class="ruby-node">/(#{Porter2::V})y/</span>, <span class="ruby-value str">'\1Y'</span>)
+34:     
+35:     <span class="ruby-identifier">w</span>
+36:   <span class="ruby-keyword kw">end</span></pre>
                                        </div>
                                        
                                </div>
@@ -390,15 +364,15 @@ and &#8216;arsen-&#8217; treated as special cases
                                        <div class="method-source-code"
                                                id="porter-r--source">
 <pre>
-    <span class="ruby-comment cmt"># File lib/porter2.rb, line 69</span>
-69:   <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_r1</span>
-70:     <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/^(gener|commun|arsen)(?&lt;r1&gt;.*)/</span>
-71:       <span class="ruby-constant">Regexp</span>.<span class="ruby-identifier">last_match</span>(<span class="ruby-value">:r1</span>)
-72:     <span class="ruby-keyword kw">else</span>
-73:       <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-node">/#{Porter2::V}#{Porter2::C}(?&lt;r1&gt;.*)$/</span>
-74:       <span class="ruby-constant">Regexp</span>.<span class="ruby-identifier">last_match</span>(<span class="ruby-value">:r1</span>) <span class="ruby-operator">||</span> <span class="ruby-value str">&quot;&quot;</span>
-75:     <span class="ruby-keyword kw">end</span>
-76:   <span class="ruby-keyword kw">end</span></pre>
+    <span class="ruby-comment cmt"># File lib/porter2_implementation.rb, line 41</span>
+41:   <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_r1</span>
+42:     <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/^(gener|commun|arsen)(?&lt;r1&gt;.*)/</span>
+43:       <span class="ruby-constant">Regexp</span>.<span class="ruby-identifier">last_match</span>(<span class="ruby-value">:r1</span>)
+44:     <span class="ruby-keyword kw">else</span>
+45:       <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-node">/#{Porter2::V}#{Porter2::C}(?&lt;r1&gt;.*)$/</span>
+46:       <span class="ruby-constant">Regexp</span>.<span class="ruby-identifier">last_match</span>(<span class="ruby-value">:r1</span>) <span class="ruby-operator">||</span> <span class="ruby-value str">&quot;&quot;</span>
+47:     <span class="ruby-keyword kw">end</span>
+48:   <span class="ruby-keyword kw">end</span></pre>
                                        </div>
                                        
                                </div>
@@ -433,11 +407,11 @@ non-vowel after the first vowel
                                        <div class="method-source-code"
                                                id="porter-r--source">
 <pre>
-    <span class="ruby-comment cmt"># File lib/porter2.rb, line 80</span>
-80:   <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_r2</span>
-81:     <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_r1</span> <span class="ruby-operator">=~</span> <span class="ruby-node">/#{Porter2::V}#{Porter2::C}(?&lt;r2&gt;.*)$/</span>
-82:     <span class="ruby-constant">Regexp</span>.<span class="ruby-identifier">last_match</span>(<span class="ruby-value">:r2</span>) <span class="ruby-operator">||</span> <span class="ruby-value str">&quot;&quot;</span>
-83:   <span class="ruby-keyword kw">end</span></pre>
+    <span class="ruby-comment cmt"># File lib/porter2_implementation.rb, line 52</span>
+52:   <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_r2</span>
+53:     <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_r1</span> <span class="ruby-operator">=~</span> <span class="ruby-node">/#{Porter2::V}#{Porter2::C}(?&lt;r2&gt;.*)$/</span>
+54:     <span class="ruby-constant">Regexp</span>.<span class="ruby-identifier">last_match</span>(<span class="ruby-value">:r2</span>) <span class="ruby-operator">||</span> <span class="ruby-value str">&quot;&quot;</span>
+55:   <span class="ruby-keyword kw">end</span></pre>
                                        </div>
                                        
                                </div>
@@ -472,24 +446,24 @@ English.
                                        <div class="method-source-code"
                                                id="porter-stem-source">
 <pre>
-     <span class="ruby-comment cmt"># File lib/porter2.rb, line 297</span>
-297:   <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_stem</span>(<span class="ruby-identifier">gb_english</span> = <span class="ruby-keyword kw">false</span>)
-298:     <span class="ruby-identifier">preword</span> = <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_tidy</span>
-299:     <span class="ruby-keyword kw">return</span> <span class="ruby-identifier">preword</span> <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">preword</span>.<span class="ruby-identifier">length</span> <span class="ruby-operator">&lt;=</span> <span class="ruby-value">2</span>
-300
-301:     <span class="ruby-identifier">word</span> = <span class="ruby-identifier">preword</span>.<span class="ruby-identifier">porter2_preprocess</span>
-302:     
-303:     <span class="ruby-keyword kw">if</span> <span class="ruby-constant">Porter2</span><span class="ruby-operator">::</span><span class="ruby-constant">SPECIAL_CASES</span>.<span class="ruby-identifier">has_key?</span> <span class="ruby-identifier">word</span>
-304:       <span class="ruby-constant">Porter2</span><span class="ruby-operator">::</span><span class="ruby-constant">SPECIAL_CASES</span>[<span class="ruby-identifier">word</span>]
-305:     <span class="ruby-keyword kw">else</span>
-306:       <span class="ruby-identifier">w1a</span> = <span class="ruby-identifier">word</span>.<span class="ruby-identifier">porter2_step0</span>.<span class="ruby-identifier">porter2_step1a</span>
-307:       <span class="ruby-keyword kw">if</span> <span class="ruby-constant">Porter2</span><span class="ruby-operator">::</span><span class="ruby-constant">STEP_1A_SPECIAL_CASES</span>.<span class="ruby-identifier">include?</span> <span class="ruby-identifier">w1a</span> 
-308:         <span class="ruby-identifier">w1a</span>
-309:       <span class="ruby-keyword kw">else</span>
-310:         <span class="ruby-identifier">w1a</span>.<span class="ruby-identifier">porter2_step1b</span>(<span class="ruby-identifier">gb_english</span>).<span class="ruby-identifier">porter2_step1c</span>.<span class="ruby-identifier">porter2_step2</span>(<span class="ruby-identifier">gb_english</span>).<span class="ruby-identifier">porter2_step3</span>(<span class="ruby-identifier">gb_english</span>).<span class="ruby-identifier">porter2_step4</span>(<span class="ruby-identifier">gb_english</span>).<span class="ruby-identifier">porter2_step5</span>.<span class="ruby-identifier">porter2_postprocess</span>
-311:       <span class="ruby-keyword kw">end</span>
-312:     <span class="ruby-keyword kw">end</span>
-313:   <span class="ruby-keyword kw">end</span></pre>
+     <span class="ruby-comment cmt"># File lib/porter2_implementation.rb, line 269</span>
+269:   <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_stem</span>(<span class="ruby-identifier">gb_english</span> = <span class="ruby-keyword kw">false</span>)
+270:     <span class="ruby-identifier">preword</span> = <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_tidy</span>
+271:     <span class="ruby-keyword kw">return</span> <span class="ruby-identifier">preword</span> <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">preword</span>.<span class="ruby-identifier">length</span> <span class="ruby-operator">&lt;=</span> <span class="ruby-value">2</span>
+272
+273:     <span class="ruby-identifier">word</span> = <span class="ruby-identifier">preword</span>.<span class="ruby-identifier">porter2_preprocess</span>
+274:     
+275:     <span class="ruby-keyword kw">if</span> <span class="ruby-constant">Porter2</span><span class="ruby-operator">::</span><span class="ruby-constant">SPECIAL_CASES</span>.<span class="ruby-identifier">has_key?</span> <span class="ruby-identifier">word</span>
+276:       <span class="ruby-constant">Porter2</span><span class="ruby-operator">::</span><span class="ruby-constant">SPECIAL_CASES</span>[<span class="ruby-identifier">word</span>]
+277:     <span class="ruby-keyword kw">else</span>
+278:       <span class="ruby-identifier">w1a</span> = <span class="ruby-identifier">word</span>.<span class="ruby-identifier">porter2_step0</span>.<span class="ruby-identifier">porter2_step1a</span>
+279:       <span class="ruby-keyword kw">if</span> <span class="ruby-constant">Porter2</span><span class="ruby-operator">::</span><span class="ruby-constant">STEP_1A_SPECIAL_CASES</span>.<span class="ruby-identifier">include?</span> <span class="ruby-identifier">w1a</span> 
+280:         <span class="ruby-identifier">w1a</span>
+281:       <span class="ruby-keyword kw">else</span>
+282:         <span class="ruby-identifier">w1a</span>.<span class="ruby-identifier">porter2_step1b</span>(<span class="ruby-identifier">gb_english</span>).<span class="ruby-identifier">porter2_step1c</span>.<span class="ruby-identifier">porter2_step2</span>(<span class="ruby-identifier">gb_english</span>).<span class="ruby-identifier">porter2_step3</span>(<span class="ruby-identifier">gb_english</span>).<span class="ruby-identifier">porter2_step4</span>(<span class="ruby-identifier">gb_english</span>).<span class="ruby-identifier">porter2_step5</span>.<span class="ruby-identifier">porter2_postprocess</span>
+283:       <span class="ruby-keyword kw">end</span>
+284:     <span class="ruby-keyword kw">end</span>
+285:   <span class="ruby-keyword kw">end</span></pre>
                                        </div>
                                        
                                </div>
@@ -528,41 +502,41 @@ output of each stage to STDOUT
                                        <div class="method-source-code"
                                                id="porter-stem-verbose-source">
 <pre>
-     <span class="ruby-comment cmt"># File lib/porter2.rb, line 316</span>
-316:   <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_stem_verbose</span>(<span class="ruby-identifier">gb_english</span> = <span class="ruby-keyword kw">false</span>)
-317:     <span class="ruby-identifier">preword</span> = <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_tidy</span>
-318:     <span class="ruby-identifier">puts</span> <span class="ruby-node">&quot;Preword: #{preword}&quot;</span>
-319:     <span class="ruby-keyword kw">return</span> <span class="ruby-identifier">preword</span> <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">preword</span>.<span class="ruby-identifier">length</span> <span class="ruby-operator">&lt;=</span> <span class="ruby-value">2</span>
-320
-321:     <span class="ruby-identifier">word</span> = <span class="ruby-identifier">preword</span>.<span class="ruby-identifier">porter2_preprocess</span>
-322:     <span class="ruby-identifier">puts</span> <span class="ruby-node">&quot;Preprocessed: #{word}&quot;</span>
-323:     
-324:     <span class="ruby-keyword kw">if</span> <span class="ruby-constant">Porter2</span><span class="ruby-operator">::</span><span class="ruby-constant">SPECIAL_CASES</span>.<span class="ruby-identifier">has_key?</span> <span class="ruby-identifier">word</span>
-325:       <span class="ruby-identifier">puts</span> <span class="ruby-node">&quot;Returning #{word} as special case #{Porter2::SPECIAL_CASES[word]}&quot;</span>
-326:       <span class="ruby-constant">Porter2</span><span class="ruby-operator">::</span><span class="ruby-constant">SPECIAL_CASES</span>[<span class="ruby-identifier">word</span>]
-327:     <span class="ruby-keyword kw">else</span>
-328:       <span class="ruby-identifier">r1</span> = <span class="ruby-identifier">word</span>.<span class="ruby-identifier">porter2_r1</span>
-329:       <span class="ruby-identifier">r2</span> = <span class="ruby-identifier">word</span>.<span class="ruby-identifier">porter2_r2</span>
-330:       <span class="ruby-identifier">puts</span> <span class="ruby-node">&quot;R1 = #{r1}, R2 = #{r2}&quot;</span>
-331:     
-332:       <span class="ruby-identifier">w0</span> = <span class="ruby-identifier">word</span>.<span class="ruby-identifier">porter2_step0</span> ; <span class="ruby-identifier">puts</span> <span class="ruby-node">&quot;After step 0:  #{w0} (R1 = #{w0.porter2_r1}, R2 = #{w0.porter2_r2})&quot;</span>
-333:       <span class="ruby-identifier">w1a</span> = <span class="ruby-identifier">w0</span>.<span class="ruby-identifier">porter2_step1a</span> ; <span class="ruby-identifier">puts</span> <span class="ruby-node">&quot;After step 1a: #{w1a} (R1 = #{w1a.porter2_r1}, R2 = #{w1a.porter2_r2})&quot;</span>
-334:       
-335:       <span class="ruby-keyword kw">if</span> <span class="ruby-constant">Porter2</span><span class="ruby-operator">::</span><span class="ruby-constant">STEP_1A_SPECIAL_CASES</span>.<span class="ruby-identifier">include?</span> <span class="ruby-identifier">w1a</span>
-336:         <span class="ruby-identifier">puts</span> <span class="ruby-node">&quot;Returning #{w1a} as 1a special case&quot;</span>
-337:         <span class="ruby-identifier">w1a</span>
-338:       <span class="ruby-keyword kw">else</span>
-339:         <span class="ruby-identifier">w1b</span> = <span class="ruby-identifier">w1a</span>.<span class="ruby-identifier">porter2_step1b</span>(<span class="ruby-identifier">gb_english</span>) ; <span class="ruby-identifier">puts</span> <span class="ruby-node">&quot;After step 1b: #{w1b} (R1 = #{w1b.porter2_r1}, R2 = #{w1b.porter2_r2})&quot;</span>
-340:         <span class="ruby-identifier">w1c</span> = <span class="ruby-identifier">w1b</span>.<span class="ruby-identifier">porter2_step1c</span> ; <span class="ruby-identifier">puts</span> <span class="ruby-node">&quot;After step 1c: #{w1c} (R1 = #{w1c.porter2_r1}, R2 = #{w1c.porter2_r2})&quot;</span>
-341:         <span class="ruby-identifier">w2</span> = <span class="ruby-identifier">w1c</span>.<span class="ruby-identifier">porter2_step2</span>(<span class="ruby-identifier">gb_english</span>) ; <span class="ruby-identifier">puts</span> <span class="ruby-node">&quot;After step 2:  #{w2} (R1 = #{w2.porter2_r1}, R2 = #{w2.porter2_r2})&quot;</span>
-342:         <span class="ruby-identifier">w3</span> = <span class="ruby-identifier">w2</span>.<span class="ruby-identifier">porter2_step3</span>(<span class="ruby-identifier">gb_english</span>) ; <span class="ruby-identifier">puts</span> <span class="ruby-node">&quot;After step 3:  #{w3} (R1 = #{w3.porter2_r1}, R2 = #{w3.porter2_r2})&quot;</span>
-343:         <span class="ruby-identifier">w4</span> = <span class="ruby-identifier">w3</span>.<span class="ruby-identifier">porter2_step4</span>(<span class="ruby-identifier">gb_english</span>) ; <span class="ruby-identifier">puts</span> <span class="ruby-node">&quot;After step 4:  #{w4} (R1 = #{w4.porter2_r1}, R2 = #{w4.porter2_r2})&quot;</span>
-344:         <span class="ruby-identifier">w5</span> = <span class="ruby-identifier">w4</span>.<span class="ruby-identifier">porter2_step5</span> ; <span class="ruby-identifier">puts</span> <span class="ruby-node">&quot;After step 5:  #{w5}&quot;</span>
-345:         <span class="ruby-identifier">wpost</span> = <span class="ruby-identifier">w5</span>.<span class="ruby-identifier">porter2_postprocess</span> ; <span class="ruby-identifier">puts</span> <span class="ruby-node">&quot;After postprocess: #{wpost}&quot;</span>
-346:         <span class="ruby-identifier">wpost</span>
-347:       <span class="ruby-keyword kw">end</span>
-348:     <span class="ruby-keyword kw">end</span>
-349:   <span class="ruby-keyword kw">end</span></pre>
+     <span class="ruby-comment cmt"># File lib/porter2_implementation.rb, line 288</span>
+288:   <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_stem_verbose</span>(<span class="ruby-identifier">gb_english</span> = <span class="ruby-keyword kw">false</span>)
+289:     <span class="ruby-identifier">preword</span> = <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_tidy</span>
+290:     <span class="ruby-identifier">puts</span> <span class="ruby-node">&quot;Preword: #{preword}&quot;</span>
+291:     <span class="ruby-keyword kw">return</span> <span class="ruby-identifier">preword</span> <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">preword</span>.<span class="ruby-identifier">length</span> <span class="ruby-operator">&lt;=</span> <span class="ruby-value">2</span>
+292
+293:     <span class="ruby-identifier">word</span> = <span class="ruby-identifier">preword</span>.<span class="ruby-identifier">porter2_preprocess</span>
+294:     <span class="ruby-identifier">puts</span> <span class="ruby-node">&quot;Preprocessed: #{word}&quot;</span>
+295:     
+296:     <span class="ruby-keyword kw">if</span> <span class="ruby-constant">Porter2</span><span class="ruby-operator">::</span><span class="ruby-constant">SPECIAL_CASES</span>.<span class="ruby-identifier">has_key?</span> <span class="ruby-identifier">word</span>
+297:       <span class="ruby-identifier">puts</span> <span class="ruby-node">&quot;Returning #{word} as special case #{Porter2::SPECIAL_CASES[word]}&quot;</span>
+298:       <span class="ruby-constant">Porter2</span><span class="ruby-operator">::</span><span class="ruby-constant">SPECIAL_CASES</span>[<span class="ruby-identifier">word</span>]
+299:     <span class="ruby-keyword kw">else</span>
+300:       <span class="ruby-identifier">r1</span> = <span class="ruby-identifier">word</span>.<span class="ruby-identifier">porter2_r1</span>
+301:       <span class="ruby-identifier">r2</span> = <span class="ruby-identifier">word</span>.<span class="ruby-identifier">porter2_r2</span>
+302:       <span class="ruby-identifier">puts</span> <span class="ruby-node">&quot;R1 = #{r1}, R2 = #{r2}&quot;</span>
+303:     
+304:       <span class="ruby-identifier">w0</span> = <span class="ruby-identifier">word</span>.<span class="ruby-identifier">porter2_step0</span> ; <span class="ruby-identifier">puts</span> <span class="ruby-node">&quot;After step 0:  #{w0} (R1 = #{w0.porter2_r1}, R2 = #{w0.porter2_r2})&quot;</span>
+305:       <span class="ruby-identifier">w1a</span> = <span class="ruby-identifier">w0</span>.<span class="ruby-identifier">porter2_step1a</span> ; <span class="ruby-identifier">puts</span> <span class="ruby-node">&quot;After step 1a: #{w1a} (R1 = #{w1a.porter2_r1}, R2 = #{w1a.porter2_r2})&quot;</span>
+306:       
+307:       <span class="ruby-keyword kw">if</span> <span class="ruby-constant">Porter2</span><span class="ruby-operator">::</span><span class="ruby-constant">STEP_1A_SPECIAL_CASES</span>.<span class="ruby-identifier">include?</span> <span class="ruby-identifier">w1a</span>
+308:         <span class="ruby-identifier">puts</span> <span class="ruby-node">&quot;Returning #{w1a} as 1a special case&quot;</span>
+309:         <span class="ruby-identifier">w1a</span>
+310:       <span class="ruby-keyword kw">else</span>
+311:         <span class="ruby-identifier">w1b</span> = <span class="ruby-identifier">w1a</span>.<span class="ruby-identifier">porter2_step1b</span>(<span class="ruby-identifier">gb_english</span>) ; <span class="ruby-identifier">puts</span> <span class="ruby-node">&quot;After step 1b: #{w1b} (R1 = #{w1b.porter2_r1}, R2 = #{w1b.porter2_r2})&quot;</span>
+312:         <span class="ruby-identifier">w1c</span> = <span class="ruby-identifier">w1b</span>.<span class="ruby-identifier">porter2_step1c</span> ; <span class="ruby-identifier">puts</span> <span class="ruby-node">&quot;After step 1c: #{w1c} (R1 = #{w1c.porter2_r1}, R2 = #{w1c.porter2_r2})&quot;</span>
+313:         <span class="ruby-identifier">w2</span> = <span class="ruby-identifier">w1c</span>.<span class="ruby-identifier">porter2_step2</span>(<span class="ruby-identifier">gb_english</span>) ; <span class="ruby-identifier">puts</span> <span class="ruby-node">&quot;After step 2:  #{w2} (R1 = #{w2.porter2_r1}, R2 = #{w2.porter2_r2})&quot;</span>
+314:         <span class="ruby-identifier">w3</span> = <span class="ruby-identifier">w2</span>.<span class="ruby-identifier">porter2_step3</span>(<span class="ruby-identifier">gb_english</span>) ; <span class="ruby-identifier">puts</span> <span class="ruby-node">&quot;After step 3:  #{w3} (R1 = #{w3.porter2_r1}, R2 = #{w3.porter2_r2})&quot;</span>
+315:         <span class="ruby-identifier">w4</span> = <span class="ruby-identifier">w3</span>.<span class="ruby-identifier">porter2_step4</span>(<span class="ruby-identifier">gb_english</span>) ; <span class="ruby-identifier">puts</span> <span class="ruby-node">&quot;After step 4:  #{w4} (R1 = #{w4.porter2_r1}, R2 = #{w4.porter2_r2})&quot;</span>
+316:         <span class="ruby-identifier">w5</span> = <span class="ruby-identifier">w4</span>.<span class="ruby-identifier">porter2_step5</span> ; <span class="ruby-identifier">puts</span> <span class="ruby-node">&quot;After step 5:  #{w5}&quot;</span>
+317:         <span class="ruby-identifier">wpost</span> = <span class="ruby-identifier">w5</span>.<span class="ruby-identifier">porter2_postprocess</span> ; <span class="ruby-identifier">puts</span> <span class="ruby-node">&quot;After postprocess: #{wpost}&quot;</span>
+318:         <span class="ruby-identifier">wpost</span>
+319:       <span class="ruby-keyword kw">end</span>
+320:     <span class="ruby-keyword kw">end</span>
+321:   <span class="ruby-keyword kw">end</span></pre>
                                        </div>
                                        
                                </div>
@@ -612,10 +586,10 @@ and remove if found.
                                        <div class="method-source-code"
                                                id="porter-step--source">
 <pre>
-     <span class="ruby-comment cmt"># File lib/porter2.rb, line 103</span>
-103:   <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_step0</span>
-104:     <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub!</span>(<span class="ruby-regexp re">/(.)('s'|'s|')$/</span>, <span class="ruby-value str">'\1'</span>) <span class="ruby-operator">||</span> <span class="ruby-keyword kw">self</span>
-105:   <span class="ruby-keyword kw">end</span></pre>
+    <span class="ruby-comment cmt"># File lib/porter2_implementation.rb, line 75</span>
+75:   <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_step0</span>
+76:     <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub!</span>(<span class="ruby-regexp re">/(.)('s'|'s|')$/</span>, <span class="ruby-value str">'\1'</span>) <span class="ruby-operator">||</span> <span class="ruby-keyword kw">self</span>
+77:   <span class="ruby-keyword kw">end</span></pre>
                                        </div>
                                        
                                </div>
@@ -668,26 +642,26 @@ do nothing
                                        <div class="method-source-code"
                                                id="porter-step-a-source">
 <pre>
-     <span class="ruby-comment cmt"># File lib/porter2.rb, line 113</span>
-113:   <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_step1a</span>
-114:     <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/sses$/</span>
-115:       <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/sses$/</span>, <span class="ruby-value str">'ss'</span>)
-116:     <span class="ruby-keyword kw">elsif</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/..(ied|ies)$/</span>
-117:       <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/(ied|ies)$/</span>, <span class="ruby-value str">'i'</span>)
-118:     <span class="ruby-keyword kw">elsif</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/(ied|ies)$/</span>
-119:       <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/(ied|ies)$/</span>, <span class="ruby-value str">'ie'</span>)
-120:     <span class="ruby-keyword kw">elsif</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/(us|ss)$/</span>
-121:       <span class="ruby-keyword kw">self</span>
-122:     <span class="ruby-keyword kw">elsif</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/s$/</span>
-123:       <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-node">/(#{Porter2::V}.+)s$/</span>
-124:         <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/s$/</span>, <span class="ruby-value str">''</span>) 
-125:       <span class="ruby-keyword kw">else</span>
-126:         <span class="ruby-keyword kw">self</span>
-127:       <span class="ruby-keyword kw">end</span>
-128:     <span class="ruby-keyword kw">else</span>
-129:       <span class="ruby-keyword kw">self</span>
-130:     <span class="ruby-keyword kw">end</span>
-131:   <span class="ruby-keyword kw">end</span></pre>
+     <span class="ruby-comment cmt"># File lib/porter2_implementation.rb, line 85</span>
+ 85:   <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_step1a</span>
+ 86:     <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/sses$/</span>
+ 87:       <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/sses$/</span>, <span class="ruby-value str">'ss'</span>)
+ 88:     <span class="ruby-keyword kw">elsif</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/..(ied|ies)$/</span>
+ 89:       <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/(ied|ies)$/</span>, <span class="ruby-value str">'i'</span>)
+ 90:     <span class="ruby-keyword kw">elsif</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/(ied|ies)$/</span>
+ 91:       <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/(ied|ies)$/</span>, <span class="ruby-value str">'ie'</span>)
+ 92:     <span class="ruby-keyword kw">elsif</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/(us|ss)$/</span>
+ 93:       <span class="ruby-keyword kw">self</span>
+ 94:     <span class="ruby-keyword kw">elsif</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/s$/</span>
+ 95:       <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-node">/(#{Porter2::V}.+)s$/</span>
+ 96:         <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/s$/</span>, <span class="ruby-value str">''</span>) 
+ 97:       <span class="ruby-keyword kw">else</span>
+ 98:         <span class="ruby-keyword kw">self</span>
+ 99:       <span class="ruby-keyword kw">end</span>
+100:     <span class="ruby-keyword kw">else</span>
+101:       <span class="ruby-keyword kw">self</span>
+102:     <span class="ruby-keyword kw">end</span>
+103:   <span class="ruby-keyword kw">end</span></pre>
                                        </div>
                                        
                                </div>
@@ -754,31 +728,31 @@ if the word is short: add e
                                        <div class="method-source-code"
                                                id="porter-step-b-source">
 <pre>
-     <span class="ruby-comment cmt"># File lib/porter2.rb, line 143</span>
-143:   <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_step1b</span>(<span class="ruby-identifier">gb_english</span> = <span class="ruby-keyword kw">false</span>)
-144:     <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/(eed|eedly)$/</span>
-145:       <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_r1</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/(eed|eedly)$/</span>
-146:         <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/(eed|eedly)$/</span>, <span class="ruby-value str">'ee'</span>)
-147:       <span class="ruby-keyword kw">else</span>
-148:         <span class="ruby-keyword kw">self</span>
-149:       <span class="ruby-keyword kw">end</span>
-150:     <span class="ruby-keyword kw">else</span>
-151:       <span class="ruby-identifier">w</span> = <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">dup</span>
-152:       <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">w</span> <span class="ruby-operator">=~</span> <span class="ruby-node">/#{Porter2::V}.*(ed|edly|ing|ingly)$/</span>
-153:         <span class="ruby-identifier">w</span>.<span class="ruby-identifier">sub!</span>(<span class="ruby-regexp re">/(ed|edly|ing|ingly)$/</span>, <span class="ruby-value str">''</span>)
-154:         <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">w</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/(at|lb|iz)$/</span>
-155:           <span class="ruby-identifier">w</span> <span class="ruby-operator">+=</span> <span class="ruby-value str">'e'</span> 
-156:         <span class="ruby-keyword kw">elsif</span> <span class="ruby-identifier">w</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/is$/</span> <span class="ruby-keyword kw">and</span> <span class="ruby-identifier">gb_english</span>
-157:           <span class="ruby-identifier">w</span> <span class="ruby-operator">+=</span> <span class="ruby-value str">'e'</span> 
-158:         <span class="ruby-keyword kw">elsif</span> <span class="ruby-identifier">w</span> <span class="ruby-operator">=~</span> <span class="ruby-node">/#{Porter2::Double}$/</span>
-159:           <span class="ruby-identifier">w</span>.<span class="ruby-identifier">chop!</span>
-160:         <span class="ruby-keyword kw">elsif</span> <span class="ruby-identifier">w</span>.<span class="ruby-identifier">porter2_is_short_word?</span>
-161:           <span class="ruby-identifier">w</span> <span class="ruby-operator">+=</span> <span class="ruby-value str">'e'</span>
-162:         <span class="ruby-keyword kw">end</span>
-163:       <span class="ruby-keyword kw">end</span>
-164:       <span class="ruby-identifier">w</span>
-165:     <span class="ruby-keyword kw">end</span>
-166:   <span class="ruby-keyword kw">end</span></pre>
+     <span class="ruby-comment cmt"># File lib/porter2_implementation.rb, line 115</span>
+115:   <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_step1b</span>(<span class="ruby-identifier">gb_english</span> = <span class="ruby-keyword kw">false</span>)
+116:     <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/(eed|eedly)$/</span>
+117:       <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_r1</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/(eed|eedly)$/</span>
+118:         <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/(eed|eedly)$/</span>, <span class="ruby-value str">'ee'</span>)
+119:       <span class="ruby-keyword kw">else</span>
+120:         <span class="ruby-keyword kw">self</span>
+121:       <span class="ruby-keyword kw">end</span>
+122:     <span class="ruby-keyword kw">else</span>
+123:       <span class="ruby-identifier">w</span> = <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">dup</span>
+124:       <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">w</span> <span class="ruby-operator">=~</span> <span class="ruby-node">/#{Porter2::V}.*(ed|edly|ing|ingly)$/</span>
+125:         <span class="ruby-identifier">w</span>.<span class="ruby-identifier">sub!</span>(<span class="ruby-regexp re">/(ed|edly|ing|ingly)$/</span>, <span class="ruby-value str">''</span>)
+126:         <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">w</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/(at|lb|iz)$/</span>
+127:           <span class="ruby-identifier">w</span> <span class="ruby-operator">+=</span> <span class="ruby-value str">'e'</span> 
+128:         <span class="ruby-keyword kw">elsif</span> <span class="ruby-identifier">w</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/is$/</span> <span class="ruby-keyword kw">and</span> <span class="ruby-identifier">gb_english</span>
+129:           <span class="ruby-identifier">w</span> <span class="ruby-operator">+=</span> <span class="ruby-value str">'e'</span> 
+130:         <span class="ruby-keyword kw">elsif</span> <span class="ruby-identifier">w</span> <span class="ruby-operator">=~</span> <span class="ruby-node">/#{Porter2::Double}$/</span>
+131:           <span class="ruby-identifier">w</span>.<span class="ruby-identifier">chop!</span>
+132:         <span class="ruby-keyword kw">elsif</span> <span class="ruby-identifier">w</span>.<span class="ruby-identifier">porter2_is_short_word?</span>
+133:           <span class="ruby-identifier">w</span> <span class="ruby-operator">+=</span> <span class="ruby-value str">'e'</span>
+134:         <span class="ruby-keyword kw">end</span>
+135:       <span class="ruby-keyword kw">end</span>
+136:       <span class="ruby-identifier">w</span>
+137:     <span class="ruby-keyword kw">end</span>
+138:   <span class="ruby-keyword kw">end</span></pre>
                                        </div>
                                        
                                </div>
@@ -812,14 +786,14 @@ not the first letter of the word.
                                        <div class="method-source-code"
                                                id="porter-step-c-source">
 <pre>
-     <span class="ruby-comment cmt"># File lib/porter2.rb, line 171</span>
-171:   <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_step1c</span>
-172:     <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-node">/.+#{Porter2::C}(y|Y)$/</span>
-173:       <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/(y|Y)$/</span>, <span class="ruby-value str">'i'</span>)
-174:     <span class="ruby-keyword kw">else</span>
-175:       <span class="ruby-keyword kw">self</span>
-176:     <span class="ruby-keyword kw">end</span>
-177:   <span class="ruby-keyword kw">end</span></pre>
+     <span class="ruby-comment cmt"># File lib/porter2_implementation.rb, line 143</span>
+143:   <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_step1c</span>
+144:     <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-node">/.+#{Porter2::C}(y|Y)$/</span>
+145:       <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/(y|Y)$/</span>, <span class="ruby-value str">'i'</span>)
+146:     <span class="ruby-keyword kw">else</span>
+147:       <span class="ruby-keyword kw">self</span>
+148:     <span class="ruby-keyword kw">end</span>
+149:   <span class="ruby-keyword kw">end</span></pre>
                                        </div>
                                        
                                </div>
@@ -863,29 +837,29 @@ cases in the procedure.)
                                        <div class="method-source-code"
                                                id="porter-step--source">
 <pre>
-     <span class="ruby-comment cmt"># File lib/porter2.rb, line 188</span>
-188:   <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_step2</span>(<span class="ruby-identifier">gb_english</span> = <span class="ruby-keyword kw">false</span>)
-189:     <span class="ruby-identifier">r1</span> = <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_r1</span>
-190:     <span class="ruby-identifier">s2m</span> = <span class="ruby-constant">Porter2</span><span class="ruby-operator">::</span><span class="ruby-constant">STEP_2_MAPS</span>.<span class="ruby-identifier">dup</span>
-191:     <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">gb_english</span>
-192:       <span class="ruby-identifier">s2m</span>[<span class="ruby-value str">&quot;iser&quot;</span>] = <span class="ruby-value str">&quot;ise&quot;</span>
-193:       <span class="ruby-identifier">s2m</span>[<span class="ruby-value str">&quot;isation&quot;</span>] = <span class="ruby-value str">&quot;ise&quot;</span>
-194:     <span class="ruby-keyword kw">end</span>
-195:     <span class="ruby-identifier">step_2_re</span> = <span class="ruby-constant">Regexp</span>.<span class="ruby-identifier">union</span>(<span class="ruby-identifier">s2m</span>.<span class="ruby-identifier">keys</span>.<span class="ruby-identifier">map</span> {<span class="ruby-operator">|</span><span class="ruby-identifier">r</span><span class="ruby-operator">|</span> <span class="ruby-constant">Regexp</span>.<span class="ruby-identifier">new</span>(<span class="ruby-identifier">r</span> <span class="ruby-operator">+</span> <span class="ruby-value str">&quot;$&quot;</span>)})
-196:     <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-identifier">step_2_re</span>
-197:       <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">r1</span> <span class="ruby-operator">=~</span> <span class="ruby-node">/#{$&amp;}$/</span>
-198:         <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-node">/#{$&amp;}$/</span>, <span class="ruby-identifier">s2m</span>[<span class="ruby-node">$&amp;</span>])
-199:       <span class="ruby-keyword kw">else</span>
-200:         <span class="ruby-keyword kw">self</span>
-201:       <span class="ruby-keyword kw">end</span>
-202:     <span class="ruby-keyword kw">elsif</span> <span class="ruby-identifier">r1</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/li$/</span> <span class="ruby-keyword kw">and</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-node">/(#{Porter2::Valid_LI})li$/</span>
-203:       <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/li$/</span>, <span class="ruby-value str">''</span>)
-204:     <span class="ruby-keyword kw">elsif</span> <span class="ruby-identifier">r1</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/ogi$/</span> <span class="ruby-keyword kw">and</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/logi$/</span>
-205:       <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/ogi$/</span>, <span class="ruby-value str">'og'</span>)
-206:     <span class="ruby-keyword kw">else</span>
-207:       <span class="ruby-keyword kw">self</span>
-208:     <span class="ruby-keyword kw">end</span>
-209:   <span class="ruby-keyword kw">end</span></pre>
+     <span class="ruby-comment cmt"># File lib/porter2_implementation.rb, line 160</span>
+160:   <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_step2</span>(<span class="ruby-identifier">gb_english</span> = <span class="ruby-keyword kw">false</span>)
+161:     <span class="ruby-identifier">r1</span> = <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_r1</span>
+162:     <span class="ruby-identifier">s2m</span> = <span class="ruby-constant">Porter2</span><span class="ruby-operator">::</span><span class="ruby-constant">STEP_2_MAPS</span>.<span class="ruby-identifier">dup</span>
+163:     <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">gb_english</span>
+164:       <span class="ruby-identifier">s2m</span>[<span class="ruby-value str">&quot;iser&quot;</span>] = <span class="ruby-value str">&quot;ise&quot;</span>
+165:       <span class="ruby-identifier">s2m</span>[<span class="ruby-value str">&quot;isation&quot;</span>] = <span class="ruby-value str">&quot;ise&quot;</span>
+166:     <span class="ruby-keyword kw">end</span>
+167:     <span class="ruby-identifier">step_2_re</span> = <span class="ruby-constant">Regexp</span>.<span class="ruby-identifier">union</span>(<span class="ruby-identifier">s2m</span>.<span class="ruby-identifier">keys</span>.<span class="ruby-identifier">map</span> {<span class="ruby-operator">|</span><span class="ruby-identifier">r</span><span class="ruby-operator">|</span> <span class="ruby-constant">Regexp</span>.<span class="ruby-identifier">new</span>(<span class="ruby-identifier">r</span> <span class="ruby-operator">+</span> <span class="ruby-value str">&quot;$&quot;</span>)})
+168:     <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-identifier">step_2_re</span>
+169:       <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">r1</span> <span class="ruby-operator">=~</span> <span class="ruby-node">/#{$&amp;}$/</span>
+170:         <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-node">/#{$&amp;}$/</span>, <span class="ruby-identifier">s2m</span>[<span class="ruby-node">$&amp;</span>])
+171:       <span class="ruby-keyword kw">else</span>
+172:         <span class="ruby-keyword kw">self</span>
+173:       <span class="ruby-keyword kw">end</span>
+174:     <span class="ruby-keyword kw">elsif</span> <span class="ruby-identifier">r1</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/li$/</span> <span class="ruby-keyword kw">and</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-node">/(#{Porter2::Valid_LI})li$/</span>
+175:       <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/li$/</span>, <span class="ruby-value str">''</span>)
+176:     <span class="ruby-keyword kw">elsif</span> <span class="ruby-identifier">r1</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/ogi$/</span> <span class="ruby-keyword kw">and</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/logi$/</span>
+177:       <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/ogi$/</span>, <span class="ruby-value str">'og'</span>)
+178:     <span class="ruby-keyword kw">else</span>
+179:       <span class="ruby-keyword kw">self</span>
+180:     <span class="ruby-keyword kw">end</span>
+181:   <span class="ruby-keyword kw">end</span></pre>
                                        </div>
                                        
                                </div>
@@ -927,24 +901,24 @@ with &#8216;al&#8217;, similarly to how &#8216;alize&#8217; is treated.)
                                        <div class="method-source-code"
                                                id="porter-step--source">
 <pre>
-     <span class="ruby-comment cmt"># File lib/porter2.rb, line 220</span>
-220:   <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_step3</span>(<span class="ruby-identifier">gb_english</span> = <span class="ruby-keyword kw">false</span>)
-221:     <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/ative$/</span> <span class="ruby-keyword kw">and</span> <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_r2</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/ative$/</span>
-222:       <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/ative$/</span>, <span class="ruby-value str">''</span>)
-223:     <span class="ruby-keyword kw">else</span>
-224:       <span class="ruby-identifier">s3m</span> = <span class="ruby-constant">Porter2</span><span class="ruby-operator">::</span><span class="ruby-constant">STEP_3_MAPS</span>.<span class="ruby-identifier">dup</span>
-225:       <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">gb_english</span>
-226:         <span class="ruby-identifier">s3m</span>[<span class="ruby-value str">&quot;alise&quot;</span>] = <span class="ruby-value str">&quot;al&quot;</span>
-227:       <span class="ruby-keyword kw">end</span>
-228:       <span class="ruby-identifier">step_3_re</span> = <span class="ruby-constant">Regexp</span>.<span class="ruby-identifier">union</span>(<span class="ruby-identifier">s3m</span>.<span class="ruby-identifier">keys</span>.<span class="ruby-identifier">map</span> {<span class="ruby-operator">|</span><span class="ruby-identifier">r</span><span class="ruby-operator">|</span> <span class="ruby-constant">Regexp</span>.<span class="ruby-identifier">new</span>(<span class="ruby-identifier">r</span> <span class="ruby-operator">+</span> <span class="ruby-value str">&quot;$&quot;</span>)})
-229:       <span class="ruby-identifier">r1</span> = <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_r1</span>
-230:       <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-identifier">step_3_re</span> <span class="ruby-keyword kw">and</span> <span class="ruby-identifier">r1</span> <span class="ruby-operator">=~</span> <span class="ruby-node">/#{$&amp;}$/</span> 
-231:         <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-node">/#{$&amp;}$/</span>, <span class="ruby-identifier">s3m</span>[<span class="ruby-node">$&amp;</span>])
-232:       <span class="ruby-keyword kw">else</span>
-233:         <span class="ruby-keyword kw">self</span>
-234:       <span class="ruby-keyword kw">end</span>
-235:     <span class="ruby-keyword kw">end</span>
-236:   <span class="ruby-keyword kw">end</span></pre>
+     <span class="ruby-comment cmt"># File lib/porter2_implementation.rb, line 192</span>
+192:   <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_step3</span>(<span class="ruby-identifier">gb_english</span> = <span class="ruby-keyword kw">false</span>)
+193:     <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/ative$/</span> <span class="ruby-keyword kw">and</span> <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_r2</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/ative$/</span>
+194:       <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/ative$/</span>, <span class="ruby-value str">''</span>)
+195:     <span class="ruby-keyword kw">else</span>
+196:       <span class="ruby-identifier">s3m</span> = <span class="ruby-constant">Porter2</span><span class="ruby-operator">::</span><span class="ruby-constant">STEP_3_MAPS</span>.<span class="ruby-identifier">dup</span>
+197:       <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">gb_english</span>
+198:         <span class="ruby-identifier">s3m</span>[<span class="ruby-value str">&quot;alise&quot;</span>] = <span class="ruby-value str">&quot;al&quot;</span>
+199:       <span class="ruby-keyword kw">end</span>
+200:       <span class="ruby-identifier">step_3_re</span> = <span class="ruby-constant">Regexp</span>.<span class="ruby-identifier">union</span>(<span class="ruby-identifier">s3m</span>.<span class="ruby-identifier">keys</span>.<span class="ruby-identifier">map</span> {<span class="ruby-operator">|</span><span class="ruby-identifier">r</span><span class="ruby-operator">|</span> <span class="ruby-constant">Regexp</span>.<span class="ruby-identifier">new</span>(<span class="ruby-identifier">r</span> <span class="ruby-operator">+</span> <span class="ruby-value str">&quot;$&quot;</span>)})
+201:       <span class="ruby-identifier">r1</span> = <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_r1</span>
+202:       <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-identifier">step_3_re</span> <span class="ruby-keyword kw">and</span> <span class="ruby-identifier">r1</span> <span class="ruby-operator">=~</span> <span class="ruby-node">/#{$&amp;}$/</span> 
+203:         <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-node">/#{$&amp;}$/</span>, <span class="ruby-identifier">s3m</span>[<span class="ruby-node">$&amp;</span>])
+204:       <span class="ruby-keyword kw">else</span>
+205:         <span class="ruby-keyword kw">self</span>
+206:       <span class="ruby-keyword kw">end</span>
+207:     <span class="ruby-keyword kw">end</span>
+208:   <span class="ruby-keyword kw">end</span></pre>
                                        </div>
                                        
                                </div>
@@ -986,28 +960,28 @@ found.)
                                        <div class="method-source-code"
                                                id="porter-step--source">
 <pre>
-     <span class="ruby-comment cmt"># File lib/porter2.rb, line 246</span>
-246:   <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_step4</span>(<span class="ruby-identifier">gb_english</span> = <span class="ruby-keyword kw">false</span>)
-247:     <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_r2</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/ion$/</span> <span class="ruby-keyword kw">and</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/(s|t)ion$/</span>
-248:       <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/ion$/</span>, <span class="ruby-value str">''</span>)
-249:     <span class="ruby-keyword kw">else</span>
-250:       <span class="ruby-identifier">s4m</span> = <span class="ruby-constant">Porter2</span><span class="ruby-operator">::</span><span class="ruby-constant">STEP_4_MAPS</span>.<span class="ruby-identifier">dup</span>
-251:       <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">gb_english</span>
-252:         <span class="ruby-identifier">s4m</span>[<span class="ruby-value str">&quot;ise&quot;</span>] = <span class="ruby-value str">&quot;&quot;</span>
-253:       <span class="ruby-keyword kw">end</span>
-254:       <span class="ruby-identifier">step_4_re</span> = <span class="ruby-constant">Regexp</span>.<span class="ruby-identifier">union</span>(<span class="ruby-identifier">s4m</span>.<span class="ruby-identifier">keys</span>.<span class="ruby-identifier">map</span> {<span class="ruby-operator">|</span><span class="ruby-identifier">r</span><span class="ruby-operator">|</span> <span class="ruby-constant">Regexp</span>.<span class="ruby-identifier">new</span>(<span class="ruby-identifier">r</span> <span class="ruby-operator">+</span> <span class="ruby-value str">&quot;$&quot;</span>)})
-255:       <span class="ruby-identifier">r2</span> = <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_r2</span>
-256:       <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-identifier">step_4_re</span>
-257:         <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">r2</span> <span class="ruby-operator">=~</span> <span class="ruby-node">/#{$&amp;}/</span>
-258:           <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-node">/#{$&amp;}$/</span>, <span class="ruby-identifier">s4m</span>[<span class="ruby-node">$&amp;</span>])
-259:         <span class="ruby-keyword kw">else</span>
-260:           <span class="ruby-keyword kw">self</span>
-261:         <span class="ruby-keyword kw">end</span>
-262:       <span class="ruby-keyword kw">else</span>
-263:         <span class="ruby-keyword kw">self</span>
-264:       <span class="ruby-keyword kw">end</span>
-265:     <span class="ruby-keyword kw">end</span>
-266:   <span class="ruby-keyword kw">end</span></pre>
+     <span class="ruby-comment cmt"># File lib/porter2_implementation.rb, line 218</span>
+218:   <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_step4</span>(<span class="ruby-identifier">gb_english</span> = <span class="ruby-keyword kw">false</span>)
+219:     <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_r2</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/ion$/</span> <span class="ruby-keyword kw">and</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/(s|t)ion$/</span>
+220:       <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/ion$/</span>, <span class="ruby-value str">''</span>)
+221:     <span class="ruby-keyword kw">else</span>
+222:       <span class="ruby-identifier">s4m</span> = <span class="ruby-constant">Porter2</span><span class="ruby-operator">::</span><span class="ruby-constant">STEP_4_MAPS</span>.<span class="ruby-identifier">dup</span>
+223:       <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">gb_english</span>
+224:         <span class="ruby-identifier">s4m</span>[<span class="ruby-value str">&quot;ise&quot;</span>] = <span class="ruby-value str">&quot;&quot;</span>
+225:       <span class="ruby-keyword kw">end</span>
+226:       <span class="ruby-identifier">step_4_re</span> = <span class="ruby-constant">Regexp</span>.<span class="ruby-identifier">union</span>(<span class="ruby-identifier">s4m</span>.<span class="ruby-identifier">keys</span>.<span class="ruby-identifier">map</span> {<span class="ruby-operator">|</span><span class="ruby-identifier">r</span><span class="ruby-operator">|</span> <span class="ruby-constant">Regexp</span>.<span class="ruby-identifier">new</span>(<span class="ruby-identifier">r</span> <span class="ruby-operator">+</span> <span class="ruby-value str">&quot;$&quot;</span>)})
+227:       <span class="ruby-identifier">r2</span> = <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_r2</span>
+228:       <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-identifier">step_4_re</span>
+229:         <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">r2</span> <span class="ruby-operator">=~</span> <span class="ruby-node">/#{$&amp;}/</span>
+230:           <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-node">/#{$&amp;}$/</span>, <span class="ruby-identifier">s4m</span>[<span class="ruby-node">$&amp;</span>])
+231:         <span class="ruby-keyword kw">else</span>
+232:           <span class="ruby-keyword kw">self</span>
+233:         <span class="ruby-keyword kw">end</span>
+234:       <span class="ruby-keyword kw">else</span>
+235:         <span class="ruby-keyword kw">self</span>
+236:       <span class="ruby-keyword kw">end</span>
+237:     <span class="ruby-keyword kw">end</span>
+238:   <span class="ruby-keyword kw">end</span></pre>
                                        </div>
                                        
                                </div>
@@ -1051,21 +1025,21 @@ delete if in R2 and preceded by l
                                        <div class="method-source-code"
                                                id="porter-step--source">
 <pre>
-     <span class="ruby-comment cmt"># File lib/porter2.rb, line 272</span>
-272:   <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_step5</span>
-273:     <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/ll$/</span> <span class="ruby-keyword kw">and</span> <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_r2</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/l$/</span>
-274:       <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/ll$/</span>, <span class="ruby-value str">'l'</span>) 
-275:     <span class="ruby-keyword kw">elsif</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/e$/</span> <span class="ruby-keyword kw">and</span> <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_r2</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/e$/</span> 
-276:       <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/e$/</span>, <span class="ruby-value str">''</span>) 
-277:     <span class="ruby-keyword kw">else</span>
-278:       <span class="ruby-identifier">r1</span> = <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_r1</span>
-279:       <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/e$/</span> <span class="ruby-keyword kw">and</span> <span class="ruby-identifier">r1</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/e$/</span> <span class="ruby-keyword kw">and</span> <span class="ruby-keyword kw">not</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-node">/#{Porter2::SHORT_SYLLABLE}e$/</span>
-280:         <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/e$/</span>, <span class="ruby-value str">''</span>)
-281:       <span class="ruby-keyword kw">else</span>
-282:         <span class="ruby-keyword kw">self</span>
-283:       <span class="ruby-keyword kw">end</span>
-284:     <span class="ruby-keyword kw">end</span>
-285:   <span class="ruby-keyword kw">end</span></pre>
+     <span class="ruby-comment cmt"># File lib/porter2_implementation.rb, line 244</span>
+244:   <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_step5</span>
+245:     <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/ll$/</span> <span class="ruby-keyword kw">and</span> <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_r2</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/l$/</span>
+246:       <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/ll$/</span>, <span class="ruby-value str">'l'</span>) 
+247:     <span class="ruby-keyword kw">elsif</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/e$/</span> <span class="ruby-keyword kw">and</span> <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_r2</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/e$/</span> 
+248:       <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/e$/</span>, <span class="ruby-value str">''</span>) 
+249:     <span class="ruby-keyword kw">else</span>
+250:       <span class="ruby-identifier">r1</span> = <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_r1</span>
+251:       <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/e$/</span> <span class="ruby-keyword kw">and</span> <span class="ruby-identifier">r1</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/e$/</span> <span class="ruby-keyword kw">and</span> <span class="ruby-keyword kw">not</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-node">/#{Porter2::SHORT_SYLLABLE}e$/</span>
+252:         <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/e$/</span>, <span class="ruby-value str">''</span>)
+253:       <span class="ruby-keyword kw">else</span>
+254:         <span class="ruby-keyword kw">self</span>
+255:       <span class="ruby-keyword kw">end</span>
+256:     <span class="ruby-keyword kw">end</span>
+257:   <span class="ruby-keyword kw">end</span></pre>
                                        </div>
                                        
                                </div>
@@ -1098,16 +1072,16 @@ Tidy up the word before we get down to the algorithm
                                        <div class="method-source-code"
                                                id="porter-tidy-source">
 <pre>
-    <span class="ruby-comment cmt"># File lib/porter2.rb, line 35</span>
-35:   <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_tidy</span>
-36:     <span class="ruby-identifier">preword</span> = <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">to_s</span>.<span class="ruby-identifier">strip</span>.<span class="ruby-identifier">downcase</span>
-37:     
-38:     <span class="ruby-comment cmt"># map apostrophe-like characters to apostrophes</span>
-39:     <span class="ruby-identifier">preword</span>.<span class="ruby-identifier">gsub!</span>(<span class="ruby-regexp re">/‘/</span>, <span class="ruby-value str">&quot;'&quot;</span>)
-40:     <span class="ruby-identifier">preword</span>.<span class="ruby-identifier">gsub!</span>(<span class="ruby-regexp re">/’/</span>, <span class="ruby-value str">&quot;'&quot;</span>)
-41
-42:     <span class="ruby-identifier">preword</span>
-43:   <span class="ruby-keyword kw">end</span></pre>
+    <span class="ruby-comment cmt"># File lib/porter2_implementation.rb, line 7</span>
+ 7:   <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_tidy</span>
+ 8:     <span class="ruby-identifier">preword</span> = <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">to_s</span>.<span class="ruby-identifier">strip</span>.<span class="ruby-identifier">downcase</span>
+ 9:     
+10:     <span class="ruby-comment cmt"># map apostrophe-like characters to apostrophes</span>
+11:     <span class="ruby-identifier">preword</span>.<span class="ruby-identifier">gsub!</span>(<span class="ruby-regexp re">/‘/</span>, <span class="ruby-value str">&quot;'&quot;</span>)
+12:     <span class="ruby-identifier">preword</span>.<span class="ruby-identifier">gsub!</span>(<span class="ruby-regexp re">/’/</span>, <span class="ruby-value str">&quot;'&quot;</span>)
+13
+14:     <span class="ruby-identifier">preword</span>
+15:   <span class="ruby-keyword kw">end</span></pre>
                                        </div>
                                        
                                </div>
index dab7cbe8e8d4c7a1c5fea5efa1ac14e6d16dc65c..137ff2ac7d9305da2491ec208559ae346a1c3570 100644 (file)
                <div id="project-metadata">
                        
                        
+                       <div id="fileindex-section" class="section project-section">
+                               <h3 class="section-header">Files</h3>
+                               <ul>
+                               
+                                       <li class="file"><a href="./Readme_rdoc.html">Readme.rdoc</a></li>
+                               
+                               </ul>
+                       </div>
+                       
 
                        <div id="classindex-section" class="section project-section">
                                <h3 class="section-header">Class Index
index d9b4f190a8492223149b1a9dec750d078102deba..a564bce4ef11df60887297a3286b132ae863a3c8 100644 (file)
@@ -1,6 +1,8 @@
-Fri, 07 Jan 2011 08:46:50 +0000
+Fri, 11 Feb 2011 13:56:05 +0000
 ./test/tc_porter2_parts.rb     Wed, 05 Jan 2011 11:38:33 +0000
 ./test/ts_porter2.rb   Mon, 03 Jan 2011 00:20:11 +0000
 ./test/tc_porter2_full.rb      Wed, 05 Jan 2011 11:35:59 +0000
-./lib/porter2.rb       Fri, 07 Jan 2011 08:46:31 +0000
-./lib/porter2_constants.rb     Fri, 07 Jan 2011 08:46:16 +0000
+./lib/porter2.rb       Sun, 09 Jan 2011 18:34:08 +0000
+./lib/porter2_constants.rb     Sun, 09 Jan 2011 09:20:05 +0000
+./lib/porter2_implementation.rb        Sat, 08 Jan 2011 10:20:57 +0000
+./Readme.rdoc  Fri, 11 Feb 2011 13:55:53 +0000
index 6c609f905f739f5d60ff1f6b609385f5fff6195e..fc697abdb31b4bb1302adb31693648755188fce1 100644 (file)
 
        
        
+       <h2>Files</h2>
+       <ul>
+               
+                       <li class="file"><a href="Readme_rdoc.html">Readme.rdoc</a></li>
+               
+       </ul>
+       
 
        <h2 id="classes">Classes/Modules</h2>
        <ul>
diff --git a/doc/lib/porter2_constants_rb.html b/doc/lib/porter2_constants_rb.html
new file mode 100644 (file)
index 0000000..e67bf34
--- /dev/null
@@ -0,0 +1,55 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
+       "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
+<head>
+       <meta content="text/html; charset=utf-8" http-equiv="Content-Type" />
+
+       <title>File: porter2_constants.rb [RDoc Documentation]</title>
+
+       <link type="text/css" media="screen" href="../rdoc.css" rel="stylesheet" />
+
+       <script src="../js/jquery.js" type="text/javascript"
+               charset="utf-8"></script>
+       <script src="../js/thickbox-compressed.js" type="text/javascript"
+               charset="utf-8"></script>
+       <script src="../js/quicksearch.js" type="text/javascript"
+               charset="utf-8"></script>
+       <script src="../js/darkfish.js" type="text/javascript"
+               charset="utf-8"></script>
+</head>
+
+<body class="file file-popup">
+       <div id="metadata">
+               <dl>
+                       <dt class="modified-date">Last Modified</dt>
+                       <dd class="modified-date">2011-01-09 09:20:05 +0000</dd>
+
+                       
+                       <dt class="requires">Requires</dt>
+                       <dd class="requires">
+                               <ul>
+                               
+                               </ul>
+                       </dd>
+                       
+
+                       
+               </dl>
+       </div>
+
+       <div id="documentation">
+               
+               <div class="description">
+                       <h2>Description</h2>
+                       <p>
+coding: utf-8\r
+</p>
+
+               </div>
+               
+       </div>
+</body>
+</html>
+
diff --git a/doc/lib/porter2_implementation_rb.html b/doc/lib/porter2_implementation_rb.html
new file mode 100644 (file)
index 0000000..234ea7a
--- /dev/null
@@ -0,0 +1,55 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
+       "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
+<head>
+       <meta content="text/html; charset=utf-8" http-equiv="Content-Type" />
+
+       <title>File: porter2_implementation.rb [RDoc Documentation]</title>
+
+       <link type="text/css" media="screen" href="../rdoc.css" rel="stylesheet" />
+
+       <script src="../js/jquery.js" type="text/javascript"
+               charset="utf-8"></script>
+       <script src="../js/thickbox-compressed.js" type="text/javascript"
+               charset="utf-8"></script>
+       <script src="../js/quicksearch.js" type="text/javascript"
+               charset="utf-8"></script>
+       <script src="../js/darkfish.js" type="text/javascript"
+               charset="utf-8"></script>
+</head>
+
+<body class="file file-popup">
+       <div id="metadata">
+               <dl>
+                       <dt class="modified-date">Last Modified</dt>
+                       <dd class="modified-date">2011-01-08 10:20:57 +0000</dd>
+
+                       
+                       <dt class="requires">Requires</dt>
+                       <dd class="requires">
+                               <ul>
+                               
+                               </ul>
+                       </dd>
+                       
+
+                       
+               </dl>
+       </div>
+
+       <div id="documentation">
+               
+               <div class="description">
+                       <h2>Description</h2>
+                       <p>
+coding: utf-8
+</p>
+
+               </div>
+               
+       </div>
+</body>
+</html>
+
diff --git a/doc/lib/porter2_module_rb.html b/doc/lib/porter2_module_rb.html
new file mode 100644 (file)
index 0000000..e633525
--- /dev/null
@@ -0,0 +1,55 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
+       "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
+<head>
+       <meta content="text/html; charset=utf-8" http-equiv="Content-Type" />
+
+       <title>File: porter2_module.rb [RDoc Documentation]</title>
+
+       <link type="text/css" media="screen" href="../rdoc.css" rel="stylesheet" />
+
+       <script src="../js/jquery.js" type="text/javascript"
+               charset="utf-8"></script>
+       <script src="../js/thickbox-compressed.js" type="text/javascript"
+               charset="utf-8"></script>
+       <script src="../js/quicksearch.js" type="text/javascript"
+               charset="utf-8"></script>
+       <script src="../js/darkfish.js" type="text/javascript"
+               charset="utf-8"></script>
+</head>
+
+<body class="file file-popup">
+       <div id="metadata">
+               <dl>
+                       <dt class="modified-date">Last Modified</dt>
+                       <dd class="modified-date">2011-01-05 11:34:03 +0000</dd>
+
+                       
+                       <dt class="requires">Requires</dt>
+                       <dd class="requires">
+                               <ul>
+                               
+                               </ul>
+                       </dd>
+                       
+
+                       
+               </dl>
+       </div>
+
+       <div id="documentation">
+               
+               <div class="description">
+                       <h2>Description</h2>
+                       <p>
+coding: utf-8\r
+</p>
+
+               </div>
+               
+       </div>
+</body>
+</html>
+
index d4d44d9c49d90a53196e662e49e3ca877c444d52..850b2f21a550ded5ae612f80aa58c2069b0b0e4b 100644 (file)
@@ -24,7 +24,7 @@
        <div id="metadata">
                <dl>
                        <dt class="modified-date">Last Modified</dt>
-                       <dd class="modified-date">2011-01-07 08:46:31 +0000</dd>
+                       <dd class="modified-date">2011-01-09 18:34:08 +0000</dd>
 
                        
                        <dt class="requires">Requires</dt>
@@ -33,6 +33,8 @@
                                
                                        <li>porter2_constants</li>
                                
+                                       <li>porter2_implementation</li>
+                               
                                </ul>
                        </dd>
                        
diff --git a/doc/lib/porter2_string_rb.html b/doc/lib/porter2_string_rb.html
new file mode 100644 (file)
index 0000000..e41e011
--- /dev/null
@@ -0,0 +1,57 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
+       "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
+<head>
+       <meta content="text/html; charset=utf-8" http-equiv="Content-Type" />
+
+       <title>File: porter2_string.rb [RDoc Documentation]</title>
+
+       <link type="text/css" media="screen" href="../rdoc.css" rel="stylesheet" />
+
+       <script src="../js/jquery.js" type="text/javascript"
+               charset="utf-8"></script>
+       <script src="../js/thickbox-compressed.js" type="text/javascript"
+               charset="utf-8"></script>
+       <script src="../js/quicksearch.js" type="text/javascript"
+               charset="utf-8"></script>
+       <script src="../js/darkfish.js" type="text/javascript"
+               charset="utf-8"></script>
+</head>
+
+<body class="file file-popup">
+       <div id="metadata">
+               <dl>
+                       <dt class="modified-date">Last Modified</dt>
+                       <dd class="modified-date">2011-01-05 11:24:47 +0000</dd>
+
+                       
+                       <dt class="requires">Requires</dt>
+                       <dd class="requires">
+                               <ul>
+                               
+                                       <li>porter2_module</li>
+                               
+                               </ul>
+                       </dd>
+                       
+
+                       
+               </dl>
+       </div>
+
+       <div id="documentation">
+               
+               <div class="description">
+                       <h2>Description</h2>
+                       <p>
+coding: utf-8
+</p>
+
+               </div>
+               
+       </div>
+</body>
+</html>
+
index e99e3588ff6c78dbcbe485cfcfc0b592d3f66e84..229128dc2751237b5cf4701a8a7d9add81d3058a 100644 (file)
@@ -1,354 +1,8 @@
 # coding: utf-8
 
-require 'porter2_constants'
-
 # ==The Porter 2 stemmer
-#
-# This is the Porter 2 stemming algorithm, as described at 
-# http://snowball.tartarus.org/algorithms/english/stemmer.html
-# The original paper is:
-#
-# Porter, 1980, "An algorithm for suffix stripping", _Program_, Vol. 14,
-# no. 3, pp 130-137
-#
-# Constants for the stemmer are in the Porter2 module.
-#
-# Procedures that implement the stemmer are added to the String class.
-# 
-# The stemmer algorithm is implemented in the porter2_stem procedure.
-# 
-# ==Internationalisation
-# There isn't much, as this is a stemmer that only works for English.
-#
-# The +gb_english+ flag to the various procedures allows the stemmer to treat the British 
-# English '-ise' the same as the American English '-ize'.
-#
-# ==Longest suffixes
-# Several places in the algorithm require matching the longest suffix of a word. The 
-# regexp engine in Ruby 1.9 seems to handle alterntives in regexps by finding the 
-# alternative that matches at the first position in the string. As we're only talking 
-# about suffixes, that first match is also the longest suffix. If the regexp engine changes,
-# this behaviour may change and break the stemmer.
-
-class String
-  # Tidy up the word before we get down to the algorithm
-  def porter2_tidy
-    preword = self.to_s.strip.downcase
-    
-    # map apostrophe-like characters to apostrophes
-    preword.gsub!(/‘/, "'")
-    preword.gsub!(/’/, "'")
-
-    preword
-  end
-     
-
-  # Preprocess the word. 
-  # Remove any initial ', if present. Then, set initial y, or y after a vowel, to Y
-  #
-  # (The comment to 'establish the regions R1 and R2' in the original description 
-  # is an implementation optimisation that identifies where the regions start. As
-  # no modifications are made to the word that affect those positions, you may want
-  # to cache them now. This implementation doesn't do that.)
-  def porter2_preprocess    
-    w = self.dup
-
-    # remove any initial apostrophe
-    w.gsub!(/^'*(.)/, '\1')
-    
-    # set initial y, or y after a vowel, to Y
-    w.gsub!(/^y/, "Y")
-    w.gsub!(/(#{Porter2::V})y/, '\1Y')
-    
-    w
-  end
-    
-
-  # R1 is the portion of the word after the first non-vowel after the first vowel
-  # (with words beginning 'gener-', 'commun-', and 'arsen-' treated as special cases
-  def porter2_r1
-    if self =~ /^(gener|commun|arsen)(?<r1>.*)/
-      Regexp.last_match(:r1)
-    else
-      self =~ /#{Porter2::V}#{Porter2::C}(?<r1>.*)$/
-      Regexp.last_match(:r1) || ""
-    end
-  end
-
-
-  # R2 is the portion of R1 (porter2_r1) after the first non-vowel after the first vowel
-  def porter2_r2
-    self.porter2_r1 =~ /#{Porter2::V}#{Porter2::C}(?<r2>.*)$/
-    Regexp.last_match(:r2) || ""
-  end
-  
-
-  # Returns true if the word ends with a short syllable
-  def porter2_ends_with_short_syllable?
-    self =~ /#{Porter2::SHORT_SYLLABLE}$/ ? true : false
-  end
-
-
-  # A word is short if it ends in a short syllable, and R1 is null
-  def porter2_is_short_word?
-    self.porter2_ends_with_short_syllable? and self.porter2_r1.empty?
-  end
-  
-
-  # Search for the longest among the suffixes, 
-  # * '
-  # * 's
-  # * 's'
-  # and remove if found.
-  def porter2_step0
-    self.sub!(/(.)('s'|'s|')$/, '\1') || self
-  end
-  
-
-  # Search for the longest among the following suffixes, and perform the action indicated. 
-  # sses:: replace by ss 
-  # ied, ies:: replace by i if preceded by more than one letter, otherwise by ie
-  # s:: delete if the preceding word part contains a vowel not immediately before the s
-  # us, ss:: do nothing
-  def porter2_step1a
-    if self =~ /sses$/
-      self.sub(/sses$/, 'ss')
-    elsif self =~ /..(ied|ies)$/
-      self.sub(/(ied|ies)$/, 'i')
-    elsif self =~ /(ied|ies)$/
-      self.sub(/(ied|ies)$/, 'ie')
-    elsif self =~ /(us|ss)$/
-      self
-    elsif self =~ /s$/
-      if self =~ /(#{Porter2::V}.+)s$/
-        self.sub(/s$/, '') 
-      else
-        self
-      end
-    else
-      self
-    end
-  end
-  
 
-  # Search for the longest among the following suffixes, and perform the action indicated. 
-  # eed, eedly:: replace by ee if the suffix is also in R1 
-  # ed, edly, ing, ingly:: delete if the preceding word part contains a vowel and, 
-  #                        after the deletion:
-  #                        * if the word ends at, bl or iz: add e, or
-  #                        * if the word ends with a double: remove the last letter, or
-  #                        * if the word is short: add e
-  # 
-  # (If gb_english is +true+, treat the 'is' suffix as 'iz' above.)
-  def porter2_step1b(gb_english = false)
-    if self =~ /(eed|eedly)$/
-      if self.porter2_r1 =~ /(eed|eedly)$/
-        self.sub(/(eed|eedly)$/, 'ee')
-      else
-        self
-      end
-    else
-      w = self.dup
-      if w =~ /#{Porter2::V}.*(ed|edly|ing|ingly)$/
-        w.sub!(/(ed|edly|ing|ingly)$/, '')
-        if w =~ /(at|lb|iz)$/
-          w += 'e' 
-        elsif w =~ /is$/ and gb_english
-          w += 'e' 
-        elsif w =~ /#{Porter2::Double}$/
-         w.chop!
-        elsif w.porter2_is_short_word?
-          w += 'e'
-        end
-      end
-      w
-    end
-  end
-
-
-  # Replace a suffix of y or Y by i if it is preceded by a non-vowel which is 
-  # not the first letter of the word.
-  def porter2_step1c
-    if self =~ /.+#{Porter2::C}(y|Y)$/
-      self.sub(/(y|Y)$/, 'i')
-    else
-      self
-    end
-  end
-  
-
-  # Search for the longest among the suffixes listed in the keys of Porter2::STEP_2_MAPS. 
-  # If one is found and that suffix occurs in R1, replace it with the value 
-  # found in STEP_2_MAPS.
-  #
-  # (Suffixes 'ogi' and 'li' are treated as special cases in the procedure.)
-  # 
-  # (If gb_english is +true+, replace the 'iser' and 'isation' suffixes with
-  # 'ise', similarly to how 'izer' and 'ization' are treated.)
-  def porter2_step2(gb_english = false)
-    r1 = self.porter2_r1
-    s2m = Porter2::STEP_2_MAPS.dup
-    if gb_english
-      s2m["iser"] = "ise"
-      s2m["isation"] = "ise"
-    end
-    step_2_re = Regexp.union(s2m.keys.map {|r| Regexp.new(r + "$")})
-    if self =~ step_2_re
-      if r1 =~ /#{$&}$/
-        self.sub(/#{$&}$/, s2m[$&])
-      else
-        self
-      end
-    elsif r1 =~ /li$/ and self =~ /(#{Porter2::Valid_LI})li$/
-      self.sub(/li$/, '')
-    elsif r1 =~ /ogi$/ and self =~ /logi$/
-      self.sub(/ogi$/, 'og')
-    else
-      self
-    end
-  end
-     
-
-  # Search for the longest among the suffixes listed in the keys of Porter2::STEP_3_MAPS. 
-  # If one is found and that suffix occurs in R1, replace it with the value 
-  # found in STEP_3_MAPS.
-  #
-  # (Suffix 'ative' is treated as a special case in the procedure.)
-  # 
-  # (If gb_english is +true+, replace the 'alise' suffix with
-  # 'al', similarly to how 'alize' is treated.)
-  def porter2_step3(gb_english = false)
-    if self =~ /ative$/ and self.porter2_r2 =~ /ative$/
-      self.sub(/ative$/, '')
-    else
-      s3m = Porter2::STEP_3_MAPS.dup
-      if gb_english
-       s3m["alise"] = "al"
-      end
-      step_3_re = Regexp.union(s3m.keys.map {|r| Regexp.new(r + "$")})
-      r1 = self.porter2_r1
-      if self =~ step_3_re and r1 =~ /#{$&}$/ 
-       self.sub(/#{$&}$/, s3m[$&])
-      else
-       self
-      end
-    end
-  end
-  
-
-  # Search for the longest among the suffixes listed in the keys of Porter2::STEP_4_MAPS. 
-  # If one is found and that suffix occurs in R2, replace it with the value 
-  # found in STEP_4_MAPS.
-  #
-  # (Suffix 'ion' is treated as a special case in the procedure.)
-  # 
-  # (If gb_english is +true+, delete the 'ise' suffix if found.)
-  def porter2_step4(gb_english = false)
-    if self.porter2_r2 =~ /ion$/ and self =~ /(s|t)ion$/
-      self.sub(/ion$/, '')
-    else
-      s4m = Porter2::STEP_4_MAPS.dup
-      if gb_english
-        s4m["ise"] = ""
-      end
-      step_4_re = Regexp.union(s4m.keys.map {|r| Regexp.new(r + "$")})
-      r2 = self.porter2_r2
-      if self =~ step_4_re
-        if r2 =~ /#{$&}/
-          self.sub(/#{$&}$/, s4m[$&])
-        else
-          self
-        end
-      else
-        self
-      end
-    end
-  end
-
-
-  # Search for the the following suffixes, and, if found, perform the action indicated. 
-  # e:: delete if in R2, or in R1 and not preceded by a short syllable
-  # l:: delete if in R2 and preceded by l
-  def porter2_step5
-    if self =~ /ll$/ and self.porter2_r2 =~ /l$/
-      self.sub(/ll$/, 'l') 
-    elsif self =~ /e$/ and self.porter2_r2 =~ /e$/ 
-      self.sub(/e$/, '') 
-    else
-      r1 = self.porter2_r1
-      if self =~ /e$/ and r1 =~ /e$/ and not self =~ /#{Porter2::SHORT_SYLLABLE}e$/
-        self.sub(/e$/, '')
-      else
-        self
-      end
-    end
-  end
-  
-
-  # Turn all Y letters into y
-  def porter2_postprocess
-    self.gsub(/Y/, 'y')
-  end
-
-  public
-  
-  # Perform the stemming procedure. If +gb_english+ is true, treat '-ise' and similar suffixes
-  # as '-ize' in American English.
-  def porter2_stem(gb_english = false)
-    preword = self.porter2_tidy
-    return preword if preword.length <= 2
-
-    word = preword.porter2_preprocess
-    
-    if Porter2::SPECIAL_CASES.has_key? word
-      Porter2::SPECIAL_CASES[word]
-    else
-      w1a = word.porter2_step0.porter2_step1a
-      if Porter2::STEP_1A_SPECIAL_CASES.include? w1a 
-       w1a
-      else
-        w1a.porter2_step1b(gb_english).porter2_step1c.porter2_step2(gb_english).porter2_step3(gb_english).porter2_step4(gb_english).porter2_step5.porter2_postprocess
-      end
-    end
-  end  
-  
-  # A verbose version of porter2_stem that prints the output of each stage to STDOUT
-  def porter2_stem_verbose(gb_english = false)
-    preword = self.porter2_tidy
-    puts "Preword: #{preword}"
-    return preword if preword.length <= 2
-
-    word = preword.porter2_preprocess
-    puts "Preprocessed: #{word}"
-    
-    if Porter2::SPECIAL_CASES.has_key? word
-      puts "Returning #{word} as special case #{Porter2::SPECIAL_CASES[word]}"
-      Porter2::SPECIAL_CASES[word]
-    else
-      r1 = word.porter2_r1
-      r2 = word.porter2_r2
-      puts "R1 = #{r1}, R2 = #{r2}"
-    
-      w0 = word.porter2_step0 ; puts "After step 0:  #{w0} (R1 = #{w0.porter2_r1}, R2 = #{w0.porter2_r2})"
-      w1a = w0.porter2_step1a ; puts "After step 1a: #{w1a} (R1 = #{w1a.porter2_r1}, R2 = #{w1a.porter2_r2})"
-      
-      if Porter2::STEP_1A_SPECIAL_CASES.include? w1a
-        puts "Returning #{w1a} as 1a special case"
-       w1a
-      else
-        w1b = w1a.porter2_step1b(gb_english) ; puts "After step 1b: #{w1b} (R1 = #{w1b.porter2_r1}, R2 = #{w1b.porter2_r2})"
-        w1c = w1b.porter2_step1c ; puts "After step 1c: #{w1c} (R1 = #{w1c.porter2_r1}, R2 = #{w1c.porter2_r2})"
-        w2 = w1c.porter2_step2(gb_english) ; puts "After step 2:  #{w2} (R1 = #{w2.porter2_r1}, R2 = #{w2.porter2_r2})"
-        w3 = w2.porter2_step3(gb_english) ; puts "After step 3:  #{w3} (R1 = #{w3.porter2_r1}, R2 = #{w3.porter2_r2})"
-        w4 = w3.porter2_step4(gb_english) ; puts "After step 4:  #{w4} (R1 = #{w4.porter2_r1}, R2 = #{w4.porter2_r2})"
-        w5 = w4.porter2_step5 ; puts "After step 5:  #{w5}"
-        wpost = w5.porter2_postprocess ; puts "After postprocess: #{wpost}"
-        wpost
-      end
-    end
-  end  
-  
-  alias stem porter2_stem
-
-end
+$:.unshift File.join(File.dirname(__FILE__), "..", "lib")
+require 'porter2_constants'
+require 'porter2_implementation'
 
diff --git a/lib/porter2_constants.rb b/lib/porter2_constants.rb
new file mode 100644 (file)
index 0000000..f123bc5
--- /dev/null
@@ -0,0 +1,114 @@
+# coding: utf-8\r
+\r
+# Constants for the Porter 2 stemmer\r
+module Porter2\r
+\r
+  # A non-vowel\r
+  C = "[^aeiouy]"\r
+\r
+  # A vowel: a e i o u y\r
+  V = "[aeiouy]"\r
+\r
+  # A non-vowel other than w, x, or Y\r
+  CW = "[^aeiouywxY]"\r
+\r
+  # Doubles created when adding a suffix: these are undoubled when stemmed\r
+  Double = "(bb|dd|ff|gg|mm|nn|pp|rr|tt)"\r
+\r
+  # A valid letter that can come before 'li' (or 'ly')\r
+  Valid_LI = "[cdeghkmnrt]"\r
+\r
+  # A specification for a short syllable.\r
+  #\r
+  # A short syllable in a word is either: \r
+  # 1. a vowel followed by a non-vowel other than w, x or Y and preceded by a non-vowel, or \r
+  # 2. a vowel at the beginning of the word followed by a non-vowel.\r
+  #\r
+  # (The original document is silent on whether sequences of two or more non-vowels make a\r
+  # syllable long. But as this specification is only used to find sequences of non-vowel -\r
+  # vowel - non-vowel - end-of-word, this ambiguity does not have an effect.)\r
+  SHORT_SYLLABLE = "((#{C}#{V}#{CW})|(^#{V}#{C}))"\r
+\r
+  # Suffix transformations used in porter2_step2.\r
+  # (ogi, li endings dealt with in procedure)\r
+  STEP_2_MAPS = {"tional" => "tion",\r
+                "enci" => "ence",\r
+                 "anci" => "ance",\r
+                 "abli" => "able",\r
+                 "entli" => "ent",\r
+                 "ization" => "ize",\r
+                 "izer" => "ize",\r
+                 "ational" => "ate",\r
+                 "ation" => "ate",\r
+                 "ator" => "ate",\r
+                 "alism" => "al",\r
+                 "aliti" => "al",\r
+                 "alli" => "al",\r
+                 "fulness" => "ful",\r
+                 "ousli" => "ous",\r
+                 "ousness" => "ous",\r
+                 "iveness" => "ive",\r
+                 "iviti" => "ive",\r
+                 "biliti" => "ble",\r
+                 "bli" => "ble",\r
+                 "fulli" => "ful",\r
+                 "lessli" => "less" }\r
+\r
+  # Suffix transformations used in porter2_step3.\r
+  # (ative ending dealt with in procedure)  \r
+  STEP_3_MAPS = {"tional" => "tion",\r
+                 "ational" => "ate",\r
+                 "alize" => "al",\r
+                 "icate" => "ic",\r
+                 "iciti" => "ic",\r
+                 "ical" => "ic",\r
+                 "ful" => "",\r
+                 "ness" => "" }\r
+  \r
+  # Suffix transformations used in porter2_step4.\r
+  # (ion ending dealt with in procedure)\r
+  STEP_4_MAPS = {"al" => "",\r
+                 "ance" => "",\r
+                 "ence" => "",\r
+                 "er" => "",\r
+                 "ic" => "",\r
+                 "able" => "",\r
+                 "ible" => "",\r
+                 "ant" => "",\r
+                 "ement" => "",\r
+                 "ment" => "",\r
+                 "ent" => "",\r
+                 "ism" => "",\r
+                 "ate" => "",\r
+                 "iti" => "",\r
+                 "ous" => "",\r
+                 "ive" => "",\r
+                 "ize" => "" }\r
+  \r
+  # Special-case stemmings \r
+  SPECIAL_CASES = {"skis" => "ski",\r
+                   "skies" => "sky",\r
+                    \r
+                   "dying" => "die",\r
+                   "lying" => "lie",\r
+                   "tying" => "tie",\r
+                   "idly" =>  "idl",\r
+                   "gently" => "gentl",\r
+                   "ugly" => "ugli",\r
+                   "early" => "earli",\r
+                   "only" => "onli",\r
+                   "singly" =>"singl",\r
+                    \r
+                   "sky" => "sky",\r
+                   "news" => "news",\r
+                   "howe" => "howe",\r
+                   "atlas" => "atlas",\r
+                   "cosmos" => "cosmos",\r
+                   "bias" => "bias",\r
+                   "andes" => "andes" }\r
+   \r
+  # Special case words to stop processing after step 1a.\r
+  STEP_1A_SPECIAL_CASES = %w[ inning outing canning herring earring proceed exceed succeed ]\r
+\r
+end\r
+\r
diff --git a/lib/porter2_implementation.rb b/lib/porter2_implementation.rb
new file mode 100644 (file)
index 0000000..906f5bd
--- /dev/null
@@ -0,0 +1,326 @@
+# coding: utf-8
+
+# Implementation of the Porter 2 stemmer. String#porter2_stem is the main stemming procedure.
+
+class String
+  # Tidy up the word before we get down to the algorithm
+  def porter2_tidy
+    preword = self.to_s.strip.downcase
+    
+    # map apostrophe-like characters to apostrophes
+    preword.gsub!(/‘/, "'")
+    preword.gsub!(/’/, "'")
+
+    preword
+  end
+     
+
+  # Preprocess the word. 
+  # Remove any initial ', if present. Then, set initial y, or y after a vowel, to Y
+  #
+  # (The comment to 'establish the regions R1 and R2' in the original description 
+  # is an implementation optimisation that identifies where the regions start. As
+  # no modifications are made to the word that affect those positions, you may want
+  # to cache them now. This implementation doesn't do that.)
+  def porter2_preprocess    
+    w = self.dup
+
+    # remove any initial apostrophe
+    w.gsub!(/^'*(.)/, '\1')
+    
+    # set initial y, or y after a vowel, to Y
+    w.gsub!(/^y/, "Y")
+    w.gsub!(/(#{Porter2::V})y/, '\1Y')
+    
+    w
+  end
+    
+
+  # R1 is the portion of the word after the first non-vowel after the first vowel
+  # (with words beginning 'gener-', 'commun-', and 'arsen-' treated as special cases
+  def porter2_r1
+    if self =~ /^(gener|commun|arsen)(?<r1>.*)/
+      Regexp.last_match(:r1)
+    else
+      self =~ /#{Porter2::V}#{Porter2::C}(?<r1>.*)$/
+      Regexp.last_match(:r1) || ""
+    end
+  end
+
+
+  # R2 is the portion of R1 (porter2_r1) after the first non-vowel after the first vowel
+  def porter2_r2
+    self.porter2_r1 =~ /#{Porter2::V}#{Porter2::C}(?<r2>.*)$/
+    Regexp.last_match(:r2) || ""
+  end
+  
+
+  # Returns true if the word ends with a short syllable
+  def porter2_ends_with_short_syllable?
+    self =~ /#{Porter2::SHORT_SYLLABLE}$/ ? true : false
+  end
+
+
+  # A word is short if it ends in a short syllable, and R1 is null
+  def porter2_is_short_word?
+    self.porter2_ends_with_short_syllable? and self.porter2_r1.empty?
+  end
+  
+
+  # Search for the longest among the suffixes, 
+  # * '
+  # * 's
+  # * 's'
+  # and remove if found.
+  def porter2_step0
+    self.sub!(/(.)('s'|'s|')$/, '\1') || self
+  end
+  
+
+  # Search for the longest among the following suffixes, and perform the action indicated. 
+  # sses:: replace by ss 
+  # ied, ies:: replace by i if preceded by more than one letter, otherwise by ie
+  # s:: delete if the preceding word part contains a vowel not immediately before the s
+  # us, ss:: do nothing
+  def porter2_step1a
+    if self =~ /sses$/
+      self.sub(/sses$/, 'ss')
+    elsif self =~ /..(ied|ies)$/
+      self.sub(/(ied|ies)$/, 'i')
+    elsif self =~ /(ied|ies)$/
+      self.sub(/(ied|ies)$/, 'ie')
+    elsif self =~ /(us|ss)$/
+      self
+    elsif self =~ /s$/
+      if self =~ /(#{Porter2::V}.+)s$/
+        self.sub(/s$/, '') 
+      else
+        self
+      end
+    else
+      self
+    end
+  end
+  
+
+  # Search for the longest among the following suffixes, and perform the action indicated. 
+  # eed, eedly:: replace by ee if the suffix is also in R1 
+  # ed, edly, ing, ingly:: delete if the preceding word part contains a vowel and, 
+  #                        after the deletion:
+  #                        * if the word ends at, bl or iz: add e, or
+  #                        * if the word ends with a double: remove the last letter, or
+  #                        * if the word is short: add e
+  # 
+  # (If gb_english is +true+, treat the 'is' suffix as 'iz' above.)
+  def porter2_step1b(gb_english = false)
+    if self =~ /(eed|eedly)$/
+      if self.porter2_r1 =~ /(eed|eedly)$/
+        self.sub(/(eed|eedly)$/, 'ee')
+      else
+        self
+      end
+    else
+      w = self.dup
+      if w =~ /#{Porter2::V}.*(ed|edly|ing|ingly)$/
+        w.sub!(/(ed|edly|ing|ingly)$/, '')
+        if w =~ /(at|lb|iz)$/
+          w += 'e' 
+        elsif w =~ /is$/ and gb_english
+          w += 'e' 
+        elsif w =~ /#{Porter2::Double}$/
+         w.chop!
+        elsif w.porter2_is_short_word?
+          w += 'e'
+        end
+      end
+      w
+    end
+  end
+
+
+  # Replace a suffix of y or Y by i if it is preceded by a non-vowel which is 
+  # not the first letter of the word.
+  def porter2_step1c
+    if self =~ /.+#{Porter2::C}(y|Y)$/
+      self.sub(/(y|Y)$/, 'i')
+    else
+      self
+    end
+  end
+  
+
+  # Search for the longest among the suffixes listed in the keys of Porter2::STEP_2_MAPS. 
+  # If one is found and that suffix occurs in R1, replace it with the value 
+  # found in STEP_2_MAPS.
+  #
+  # (Suffixes 'ogi' and 'li' are treated as special cases in the procedure.)
+  # 
+  # (If gb_english is +true+, replace the 'iser' and 'isation' suffixes with
+  # 'ise', similarly to how 'izer' and 'ization' are treated.)
+  def porter2_step2(gb_english = false)
+    r1 = self.porter2_r1
+    s2m = Porter2::STEP_2_MAPS.dup
+    if gb_english
+      s2m["iser"] = "ise"
+      s2m["isation"] = "ise"
+    end
+    step_2_re = Regexp.union(s2m.keys.map {|r| Regexp.new(r + "$")})
+    if self =~ step_2_re
+      if r1 =~ /#{$&}$/
+        self.sub(/#{$&}$/, s2m[$&])
+      else
+        self
+      end
+    elsif r1 =~ /li$/ and self =~ /(#{Porter2::Valid_LI})li$/
+      self.sub(/li$/, '')
+    elsif r1 =~ /ogi$/ and self =~ /logi$/
+      self.sub(/ogi$/, 'og')
+    else
+      self
+    end
+  end
+     
+
+  # Search for the longest among the suffixes listed in the keys of Porter2::STEP_3_MAPS. 
+  # If one is found and that suffix occurs in R1, replace it with the value 
+  # found in STEP_3_MAPS.
+  #
+  # (Suffix 'ative' is treated as a special case in the procedure.)
+  # 
+  # (If gb_english is +true+, replace the 'alise' suffix with
+  # 'al', similarly to how 'alize' is treated.)
+  def porter2_step3(gb_english = false)
+    if self =~ /ative$/ and self.porter2_r2 =~ /ative$/
+      self.sub(/ative$/, '')
+    else
+      s3m = Porter2::STEP_3_MAPS.dup
+      if gb_english
+       s3m["alise"] = "al"
+      end
+      step_3_re = Regexp.union(s3m.keys.map {|r| Regexp.new(r + "$")})
+      r1 = self.porter2_r1
+      if self =~ step_3_re and r1 =~ /#{$&}$/ 
+       self.sub(/#{$&}$/, s3m[$&])
+      else
+       self
+      end
+    end
+  end
+  
+
+  # Search for the longest among the suffixes listed in the keys of Porter2::STEP_4_MAPS. 
+  # If one is found and that suffix occurs in R2, replace it with the value 
+  # found in STEP_4_MAPS.
+  #
+  # (Suffix 'ion' is treated as a special case in the procedure.)
+  # 
+  # (If gb_english is +true+, delete the 'ise' suffix if found.)
+  def porter2_step4(gb_english = false)
+    if self.porter2_r2 =~ /ion$/ and self =~ /(s|t)ion$/
+      self.sub(/ion$/, '')
+    else
+      s4m = Porter2::STEP_4_MAPS.dup
+      if gb_english
+        s4m["ise"] = ""
+      end
+      step_4_re = Regexp.union(s4m.keys.map {|r| Regexp.new(r + "$")})
+      r2 = self.porter2_r2
+      if self =~ step_4_re
+        if r2 =~ /#{$&}/
+          self.sub(/#{$&}$/, s4m[$&])
+        else
+          self
+        end
+      else
+        self
+      end
+    end
+  end
+
+
+  # Search for the the following suffixes, and, if found, perform the action indicated. 
+  # e:: delete if in R2, or in R1 and not preceded by a short syllable
+  # l:: delete if in R2 and preceded by l
+  def porter2_step5
+    if self =~ /ll$/ and self.porter2_r2 =~ /l$/
+      self.sub(/ll$/, 'l') 
+    elsif self =~ /e$/ and self.porter2_r2 =~ /e$/ 
+      self.sub(/e$/, '') 
+    else
+      r1 = self.porter2_r1
+      if self =~ /e$/ and r1 =~ /e$/ and not self =~ /#{Porter2::SHORT_SYLLABLE}e$/
+        self.sub(/e$/, '')
+      else
+        self
+      end
+    end
+  end
+  
+
+  # Turn all Y letters into y
+  def porter2_postprocess
+    self.gsub(/Y/, 'y')
+  end
+
+  public
+  
+  # Perform the stemming procedure. If +gb_english+ is true, treat '-ise' and similar suffixes
+  # as '-ize' in American English.
+  def porter2_stem(gb_english = false)
+    preword = self.porter2_tidy
+    return preword if preword.length <= 2
+
+    word = preword.porter2_preprocess
+    
+    if Porter2::SPECIAL_CASES.has_key? word
+      Porter2::SPECIAL_CASES[word]
+    else
+      w1a = word.porter2_step0.porter2_step1a
+      if Porter2::STEP_1A_SPECIAL_CASES.include? w1a 
+       w1a
+      else
+        w1a.porter2_step1b(gb_english).porter2_step1c.porter2_step2(gb_english).porter2_step3(gb_english).porter2_step4(gb_english).porter2_step5.porter2_postprocess
+      end
+    end
+  end  
+  
+  # A verbose version of porter2_stem that prints the output of each stage to STDOUT
+  def porter2_stem_verbose(gb_english = false)
+    preword = self.porter2_tidy
+    puts "Preword: #{preword}"
+    return preword if preword.length <= 2
+
+    word = preword.porter2_preprocess
+    puts "Preprocessed: #{word}"
+    
+    if Porter2::SPECIAL_CASES.has_key? word
+      puts "Returning #{word} as special case #{Porter2::SPECIAL_CASES[word]}"
+      Porter2::SPECIAL_CASES[word]
+    else
+      r1 = word.porter2_r1
+      r2 = word.porter2_r2
+      puts "R1 = #{r1}, R2 = #{r2}"
+    
+      w0 = word.porter2_step0 ; puts "After step 0:  #{w0} (R1 = #{w0.porter2_r1}, R2 = #{w0.porter2_r2})"
+      w1a = w0.porter2_step1a ; puts "After step 1a: #{w1a} (R1 = #{w1a.porter2_r1}, R2 = #{w1a.porter2_r2})"
+      
+      if Porter2::STEP_1A_SPECIAL_CASES.include? w1a
+        puts "Returning #{w1a} as 1a special case"
+       w1a
+      else
+        w1b = w1a.porter2_step1b(gb_english) ; puts "After step 1b: #{w1b} (R1 = #{w1b.porter2_r1}, R2 = #{w1b.porter2_r2})"
+        w1c = w1b.porter2_step1c ; puts "After step 1c: #{w1c} (R1 = #{w1c.porter2_r1}, R2 = #{w1c.porter2_r2})"
+        w2 = w1c.porter2_step2(gb_english) ; puts "After step 2:  #{w2} (R1 = #{w2.porter2_r1}, R2 = #{w2.porter2_r2})"
+        w3 = w2.porter2_step3(gb_english) ; puts "After step 3:  #{w3} (R1 = #{w3.porter2_r1}, R2 = #{w3.porter2_r2})"
+        w4 = w3.porter2_step4(gb_english) ; puts "After step 4:  #{w4} (R1 = #{w4.porter2_r1}, R2 = #{w4.porter2_r2})"
+        w5 = w4.porter2_step5 ; puts "After step 5:  #{w5}"
+        wpost = w5.porter2_postprocess ; puts "After postprocess: #{wpost}"
+        wpost
+      end
+    end
+  end  
+  
+  alias stem porter2_stem
+
+end
+