--- /dev/null
+# coding: utf-8
+
+# ==The Porter 2 stemmer
+# This is the Porter 2 stemming algorithm, as described at
+# http://snowball.tartarus.org/algorithms/english/stemmer.html
+# The original paper is:
+#
+# Porter, 1980, "An algorithm for suffix stripping", _Program_, Vol. 14,
+# no. 3, pp 130-137
+#
+# ==Features of this implementation
+# This stemmer is written in pure Ruby, making it easy to modify for language variants.
+# For instance, the original Porter stemmer only works for American English and does
+# not recognise British English's '-ise' as an alternate spelling of '-ize'. This
+# implementation has been extended to handle correctly British English.
+#
+# This stemmer also features a comprehensive test set of over 29,000 words, taken from the
+# {Porter 2 stemmer website}[http://snowball.tartarus.org/algorithms/english/stemmer.html].
+#
+# ==Files
+# Constants for the stemmer are in the Porter2 module.
+#
+# Procedures that implement the stemmer are added to the String class.
+#
+# The stemmer algorithm is implemented in the String#porter2_stem procedure.
+#
+# ==Internationalisation
+# There isn't much, as this is a stemmer that only works for English.
+#
+# The +gb_english+ flag to the various procedures allows the stemmer to treat the British
+# English '-ise' the same as the American English '-ize'.
+#
+# ==Longest suffixes
+# Several places in the algorithm require matching the longest suffix of a word. The
+# regexp engine in Ruby 1.9 seems to handle alterntives in regexps by finding the
+# alternative that matches at the first position in the string. As we're only talking
+# about suffixes, that first match is also the longest suffix. If the regexp engine changes,
+# this behaviour may change and break the stemmer.
+#
+# ==Usage
+# Call the String#porter2_stem or String#stem methods on a string to return its stem
+# "consistency".stem # => "consist"
+# "knitting".stem # => "knit"
+# "articulated".stem # => "articul"
+# "nationalize".stem # => "nation"
+# "nationalise".stem # => "nationalis"
+# "nationalise".stem(true) # => "nation"
+#
+# ==Author
+# The Porter 2 stemming algorithm was developed by
+# {Martin Porter}[http://snowball.tartarus.org/algorithms/english/stemmer.html].
+# This implementation is by {Neil Smith}[http://www.njae.me.uk].
+
--- /dev/null
+<?xml version="1.0" encoding="utf-8"?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
+<head>
+ <meta content="text/html; charset=utf-8" http-equiv="Content-Type" />
+
+ <title>Module: Porter2</title>
+
+ <link rel="stylesheet" href="./rdoc.css" type="text/css" media="screen" />
+
+ <script src="./js/jquery.js" type="text/javascript"
+ charset="utf-8"></script>
+ <script src="./js/thickbox-compressed.js" type="text/javascript"
+ charset="utf-8"></script>
+ <script src="./js/quicksearch.js" type="text/javascript"
+ charset="utf-8"></script>
+ <script src="./js/darkfish.js" type="text/javascript"
+ charset="utf-8"></script>
+
+</head>
+<body class="module">
+
+ <div id="metadata">
+ <div id="home-metadata">
+ <div id="home-section" class="section">
+ <h3 class="section-header">
+ <a href="./index.html">Home</a>
+ <a href="./index.html#classes">Classes</a>
+ <a href="./index.html#methods">Methods</a>
+ </h3>
+ </div>
+ </div>
+
+ <div id="file-metadata">
+ <div id="file-list-section" class="section">
+ <h3 class="section-header">In Files</h3>
+ <div class="section-body">
+ <ul>
+
+ <li><a href="./lib/porter2_constants_rb.html?TB_iframe=true&height=550&width=785"
+ class="thickbox" title="lib/porter2_constants.rb">lib/porter2_constants.rb</a></li>
+
+ </ul>
+ </div>
+ </div>
+
+
+ </div>
+
+ <div id="class-metadata">
+
+ <!-- Parent Class -->
+
+
+ <!-- Namespace Contents -->
+
+
+ <!-- Method Quickref -->
+
+
+ <!-- Included Modules -->
+
+ </div>
+
+ <div id="project-metadata">
+
+
+ <div id="fileindex-section" class="section project-section">
+ <h3 class="section-header">Files</h3>
+ <ul>
+
+ <li class="file"><a href="./Readme_rdoc.html">Readme.rdoc</a></li>
+
+ </ul>
+ </div>
+
+
+ <div id="classindex-section" class="section project-section">
+ <h3 class="section-header">Class Index
+ <span class="search-toggle"><img src="./images/find.png"
+ height="16" width="16" alt="[+]"
+ title="show/hide quicksearch" /></span></h3>
+ <form action="#" method="get" accept-charset="utf-8" class="initially-hidden">
+ <fieldset>
+ <legend>Quicksearch</legend>
+ <input type="text" name="quicksearch" value=""
+ class="quicksearch-field" />
+ </fieldset>
+ </form>
+
+ <ul class="link-list">
+
+ <li><a href="./Porter2.html">Porter2</a></li>
+
+ <li><a href="./String.html">String</a></li>
+
+ <li><a href="./TestPorter2.html">TestPorter2</a></li>
+
+ </ul>
+ <div id="no-class-search-results" style="display: none;">No matching classes.</div>
+ </div>
+
+
+ </div>
+ </div>
+
+ <div id="documentation">
+ <h1 class="module">Porter2</h1>
+
+ <div id="description">
+ <p>
+Constants for the Porter 2 stemmer\r
+</p>
+
+ </div>
+
+ <!-- Constants -->
+
+ <div id="constants-list" class="section">
+ <h3 class="section-header">Constants</h3>
+ <dl>
+
+ <dt><a name="C">C</a></dt>
+
+ <dd class="description"><p>
+A non-vowel\r
+</p></dd>
+
+
+ <dt><a name="V">V</a></dt>
+
+ <dd class="description"><p>
+A vowel: a e i o u y\r
+</p></dd>
+
+
+ <dt><a name="CW">CW</a></dt>
+
+ <dd class="description"><p>
+A non-vowel other than w, x, or Y\r
+</p></dd>
+
+
+ <dt><a name="Double">Double</a></dt>
+
+ <dd class="description"><p>
+Doubles created when adding a suffix: these are undoubled when stemmed\r
+</p></dd>
+
+
+ <dt><a name="Valid_LI">Valid_LI</a></dt>
+
+ <dd class="description"><p>
+A valid letter that can come before ‘li’ (or ‘ly’)\r
+</p></dd>
+
+
+ <dt><a name="SHORT_SYLLABLE">SHORT_SYLLABLE</a></dt>
+
+ <dd class="description"><p>
+A specification for a short syllable.\r
+</p>
+<p>
+A short syllable in a word is either: \r
+</p>
+<ol>
+<li><p>
+a vowel followed by a non-vowel other than w, x or Y and preceded by a
+non-vowel, or \r
+</p>
+</li>
+<li><p>
+a vowel at the beginning of the word followed by a non-vowel.\r
+</p>
+</li>
+</ol>
+<p>
+(The original document is silent on whether sequences of two or more
+non-vowels make a\r syllable long. But as this specification is only used to
+find sequences of non-vowel -\r vowel - non-vowel - end-of-word, this
+ambiguity does not have an effect.)\r
+</p></dd>
+
+
+ <dt><a name="STEP_2_MAPS">STEP_2_MAPS</a></dt>
+
+ <dd class="description"><p>
+Suffix transformations used in porter2_step2.\r (ogi, li endings dealt with
+in procedure)\r
+</p></dd>
+
+
+ <dt><a name="STEP_3_MAPS">STEP_3_MAPS</a></dt>
+
+ <dd class="description"><p>
+Suffix transformations used in porter2_step3.\r (ative ending dealt with in
+procedure) \r
+</p></dd>
+
+
+ <dt><a name="STEP_4_MAPS">STEP_4_MAPS</a></dt>
+
+ <dd class="description"><p>
+Suffix transformations used in porter2_step4.\r (ion ending dealt with in
+procedure)\r
+</p></dd>
+
+
+ <dt><a name="SPECIAL_CASES">SPECIAL_CASES</a></dt>
+
+ <dd class="description"><p>
+Special-case stemmings \r
+</p></dd>
+
+
+ <dt><a name="STEP_1A_SPECIAL_CASES">STEP_1A_SPECIAL_CASES</a></dt>
+
+ <dd class="description"><p>
+Special case words to stop processing after step 1a.\r
+</p></dd>
+
+
+ </dl>
+ </div>
+
+
+ <!-- Attributes -->
+
+
+ <!-- Methods -->
+
+
+ </div>
+
+
+ <div id="rdoc-debugging-section-dump" class="debugging-section">
+
+ <p>Disabled; run with --debug to generate this.</p>
+
+ </div>
+
+ <div id="validator-badges">
+ <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
+ <p><small>Generated with the <a href="http://deveiate.org/projects/Darkfish-Rdoc/">Darkfish
+ Rdoc Generator</a> 1.1.6</small>.</p>
+ </div>
+
+</body>
+</html>
+
--- /dev/null
+<?xml version="1.0" encoding="utf-8"?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
+<head>
+ <meta content="text/html; charset=utf-8" http-equiv="Content-Type" />
+
+ <title>File: Readme.rdoc [RDoc Documentation]</title>
+
+ <link type="text/css" media="screen" href="./rdoc.css" rel="stylesheet" />
+
+ <script src="./js/jquery.js" type="text/javascript"
+ charset="utf-8"></script>
+ <script src="./js/thickbox-compressed.js" type="text/javascript"
+ charset="utf-8"></script>
+ <script src="./js/quicksearch.js" type="text/javascript"
+ charset="utf-8"></script>
+ <script src="./js/darkfish.js" type="text/javascript"
+ charset="utf-8"></script>
+</head>
+
+<body class="file">
+ <div id="metadata">
+ <div id="home-metadata">
+ <div id="home-section" class="section">
+ <h3 class="section-header">
+ <a href="./index.html">Home</a>
+ <a href="./index.html#classes">Classes</a>
+ <a href="./index.html#methods">Methods</a>
+ </h3>
+ </div>
+ </div>
+
+ <div id="project-metadata">
+
+
+ <div id="fileindex-section" class="section project-section">
+ <h3 class="section-header">Files</h3>
+ <ul>
+
+ <li class="file"><a href="./Readme_rdoc.html">Readme.rdoc</a></li>
+
+ </ul>
+ </div>
+
+
+ <div id="classindex-section" class="section project-section">
+ <h3 class="section-header">Class Index
+ <span class="search-toggle"><img src="./images/find.png"
+ height="16" width="16" alt="[+]"
+ title="show/hide quicksearch" /></span></h3>
+ <form action="#" method="get" accept-charset="utf-8" class="initially-hidden">
+ <fieldset>
+ <legend>Quicksearch</legend>
+ <input type="text" name="quicksearch" value=""
+ class="quicksearch-field" />
+ </fieldset>
+ </form>
+
+ <ul class="link-list">
+
+ <li><a href="./Porter2.html">Porter2</a></li>
+
+ <li><a href="./String.html">String</a></li>
+
+ <li><a href="./TestPorter2.html">TestPorter2</a></li>
+
+ </ul>
+ <div id="no-class-search-results" style="display: none;">No matching classes.</div>
+ </div>
+
+
+ </div>
+ </div>
+
+ <div id="documentation">
+ <h2>The Porter 2 stemmer</h2>
+<p>
+This is the Porter 2 stemming algorithm, as described at <a
+href="http://snowball.tartarus.org/algorithms/english/stemmer.html">snowball.tartarus.org/algorithms/english/stemmer.html</a>
+The original paper is:
+</p>
+<p>
+Porter, 1980, “An algorithm for suffix stripping”,
+<em>Program</em>, Vol. 14, no. 3, pp 130-137
+</p>
+<h2>Features of this implementation</h2>
+<p>
+This stemmer is written in pure Ruby, making it easy to modify for language
+variants. For instance, the original Porter stemmer only works for
+American English and does not recognise British English’s
+’-ise’ as an alternate spelling of ’-ize’. This
+implementation has been extended to handle correctly British English.
+</p>
+<p>
+This stemmer also features a comprehensive test set of over 29,000 words,
+taken from the <a
+href="http://snowball.tartarus.org/algorithms/english/stemmer.html">Porter
+2 stemmer website</a>.
+</p>
+<h2>Files</h2>
+<p>
+Constants for the stemmer are in the <a href="Porter2.html">Porter2</a>
+module.
+</p>
+<p>
+Procedures that implement the stemmer are added to the <a
+href="String.html">String</a> class.
+</p>
+<p>
+The stemmer algorithm is implemented in the <a
+href="String.html#method-i-porter2_stem">String#porter2_stem</a> procedure.
+</p>
+<h2>Internationalisation</h2>
+<p>
+There isn’t much, as this is a stemmer that only works for English.
+</p>
+<p>
+The <tt>gb_english</tt> flag to the various procedures allows the stemmer
+to treat the British English ’-ise’ the same as the American
+English ’-ize’.
+</p>
+<h2>Longest suffixes</h2>
+<p>
+Several places in the algorithm require matching the longest suffix of a
+word. The regexp engine in Ruby 1.9 seems to handle alterntives in regexps
+by finding the alternative that matches at the first position in the
+string. As we’re only talking about suffixes, that first match is
+also the longest suffix. If the regexp engine changes, this behaviour may
+change and break the stemmer.
+</p>
+<h2>Usage</h2>
+<p>
+Call the <a
+href="String.html#method-i-porter2_stem">String#porter2_stem</a> or <a
+href="String.html#method-i-stem">String#stem</a> methods on a string to
+return its stem
+</p>
+<pre>
+ "consistency".stem # => "consist"
+ "knitting".stem # => "knit"
+ "articulated".stem # => "articul"
+ "nationalize".stem # => "nation"
+ "nationalise".stem # => "nationalis"
+ "nationalise".stem(true) # => "nation"
+</pre>
+<h2>Author</h2>
+<p>
+The Porter 2 stemming algorithm was developed by <a
+href="http://snowball.tartarus.org/algorithms/english/stemmer.html">Martin
+Porter</a>. This implementation is by <a href="http://www.njae.me.uk">Neil
+Smith</a>.
+</p>
+
+ </div>
+
+ <div id="validator-badges">
+ <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
+ <p><small>Generated with the <a href="http://deveiate.org/projects/Darkfish-Rdoc/">Darkfish
+ Rdoc Generator</a> 1.1.6</small>.</p>
+ </div>
+</body>
+</html>
+
<div class="section-body">
<ul>
- <li><a href="./lib/porter2_rb.html?TB_iframe=true&height=550&width=785"
- class="thickbox" title="lib/porter2.rb">lib/porter2.rb</a></li>
+ <li><a href="./lib/porter2_implementation_rb.html?TB_iframe=true&height=550&width=785"
+ class="thickbox" title="lib/porter2_implementation.rb">lib/porter2_implementation.rb</a></li>
</ul>
</div>
<div id="project-metadata">
+ <div id="fileindex-section" class="section project-section">
+ <h3 class="section-header">Files</h3>
+ <ul>
+
+ <li class="file"><a href="./Readme_rdoc.html">Readme.rdoc</a></li>
+
+ </ul>
+ </div>
+
<div id="classindex-section" class="section project-section">
<h3 class="section-header">Class Index
<h1 class="class">String</h1>
<div id="description">
- <h2>The Porter 2 stemmer</h2>
-<p>
-This is the Porter 2 stemming algorithm, as described at <a
-href="http://snowball.tartarus.org/algorithms/english/stemmer.html">snowball.tartarus.org/algorithms/english/stemmer.html</a>
-The original paper is:
-</p>
-<p>
-Porter, 1980, “An algorithm for suffix stripping”,
-<em>Program</em>, Vol. 14, no. 3, pp 130-137
-</p>
-<p>
-Constants for the stemmer are in the <a href="Porter2.html">Porter2</a>
-module.
-</p>
-<p>
-Procedures that implement the stemmer are added to the <a
-href="String.html">String</a> class.
-</p>
-<p>
-The stemmer algorithm is implemented in the <a
-href="String.html#method-i-porter2_stem">porter2_stem</a> procedure.
-</p>
-<h2>Internationalisation</h2>
-<p>
-There isn’t much, as this is a stemmer that only works for English.
-</p>
-<p>
-The <tt>gb_english</tt> flag to the various procedures allows the stemmer
-to treat the British English ’-ise’ the same as the American
-English ’-ize’.
-</p>
-<h2>Longest suffixes</h2>
-<p>
-Several places in the algorithm require matching the longest suffix of a
-word. The regexp engine in Ruby 1.9 seems to handle alterntives in regexps
-by finding the alternative that matches at the first position in the
-string. As we’re only talking about suffixes, that first match is
-also the longest suffix. If the regexp engine changes, this behaviour may
-change and break the stemmer.
+ <p>
+Implementation of the Porter 2 stemmer. <a
+href="String.html#method-i-porter2_stem">String#porter2_stem</a> is the
+main stemming procedure.
</p>
</div>
<div class="method-source-code"
id="porter-ends-with-short-syllable--source">
<pre>
- <span class="ruby-comment cmt"># File lib/porter2.rb, line 87</span>
-87: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_ends_with_short_syllable?</span>
-88: <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-node">/#{Porter2::SHORT_SYLLABLE}$/</span> <span class="ruby-operator">?</span> <span class="ruby-keyword kw">true</span> <span class="ruby-operator">:</span> <span class="ruby-keyword kw">false</span>
-89: <span class="ruby-keyword kw">end</span></pre>
+ <span class="ruby-comment cmt"># File lib/porter2_implementation.rb, line 59</span>
+59: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_ends_with_short_syllable?</span>
+60: <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-node">/#{Porter2::SHORT_SYLLABLE}$/</span> <span class="ruby-operator">?</span> <span class="ruby-keyword kw">true</span> <span class="ruby-operator">:</span> <span class="ruby-keyword kw">false</span>
+61: <span class="ruby-keyword kw">end</span></pre>
</div>
</div>
<div class="method-source-code"
id="porter-is-short-word--source">
<pre>
- <span class="ruby-comment cmt"># File lib/porter2.rb, line 93</span>
-93: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_is_short_word?</span>
-94: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_ends_with_short_syllable?</span> <span class="ruby-keyword kw">and</span> <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_r1</span>.<span class="ruby-identifier">empty?</span>
-95: <span class="ruby-keyword kw">end</span></pre>
+ <span class="ruby-comment cmt"># File lib/porter2_implementation.rb, line 65</span>
+65: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_is_short_word?</span>
+66: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_ends_with_short_syllable?</span> <span class="ruby-keyword kw">and</span> <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_r1</span>.<span class="ruby-identifier">empty?</span>
+67: <span class="ruby-keyword kw">end</span></pre>
</div>
</div>
<div class="method-source-code"
id="porter-postprocess-source">
<pre>
- <span class="ruby-comment cmt"># File lib/porter2.rb, line 289</span>
-289: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_postprocess</span>
-290: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">gsub</span>(<span class="ruby-regexp re">/Y/</span>, <span class="ruby-value str">'y'</span>)
-291: <span class="ruby-keyword kw">end</span></pre>
+ <span class="ruby-comment cmt"># File lib/porter2_implementation.rb, line 261</span>
+261: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_postprocess</span>
+262: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">gsub</span>(<span class="ruby-regexp re">/Y/</span>, <span class="ruby-value str">'y'</span>)
+263: <span class="ruby-keyword kw">end</span></pre>
</div>
</div>
<div class="method-source-code"
id="porter-preprocess-source">
<pre>
- <span class="ruby-comment cmt"># File lib/porter2.rb, line 53</span>
-53: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_preprocess</span>
-54: <span class="ruby-identifier">w</span> = <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">dup</span>
-55:
-56: <span class="ruby-comment cmt"># remove any initial apostrophe</span>
-57: <span class="ruby-identifier">w</span>.<span class="ruby-identifier">gsub!</span>(<span class="ruby-regexp re">/^'*(.)/</span>, <span class="ruby-value str">'\1'</span>)
-58:
-59: <span class="ruby-comment cmt"># set initial y, or y after a vowel, to Y</span>
-60: <span class="ruby-identifier">w</span>.<span class="ruby-identifier">gsub!</span>(<span class="ruby-regexp re">/^y/</span>, <span class="ruby-value str">"Y"</span>)
-61: <span class="ruby-identifier">w</span>.<span class="ruby-identifier">gsub!</span>(<span class="ruby-node">/(#{Porter2::V})y/</span>, <span class="ruby-value str">'\1Y'</span>)
-62:
-63: <span class="ruby-identifier">w</span>
-64: <span class="ruby-keyword kw">end</span></pre>
+ <span class="ruby-comment cmt"># File lib/porter2_implementation.rb, line 25</span>
+25: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_preprocess</span>
+26: <span class="ruby-identifier">w</span> = <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">dup</span>
+27:
+28: <span class="ruby-comment cmt"># remove any initial apostrophe</span>
+29: <span class="ruby-identifier">w</span>.<span class="ruby-identifier">gsub!</span>(<span class="ruby-regexp re">/^'*(.)/</span>, <span class="ruby-value str">'\1'</span>)
+30:
+31: <span class="ruby-comment cmt"># set initial y, or y after a vowel, to Y</span>
+32: <span class="ruby-identifier">w</span>.<span class="ruby-identifier">gsub!</span>(<span class="ruby-regexp re">/^y/</span>, <span class="ruby-value str">"Y"</span>)
+33: <span class="ruby-identifier">w</span>.<span class="ruby-identifier">gsub!</span>(<span class="ruby-node">/(#{Porter2::V})y/</span>, <span class="ruby-value str">'\1Y'</span>)
+34:
+35: <span class="ruby-identifier">w</span>
+36: <span class="ruby-keyword kw">end</span></pre>
</div>
</div>
<div class="method-source-code"
id="porter-r--source">
<pre>
- <span class="ruby-comment cmt"># File lib/porter2.rb, line 69</span>
-69: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_r1</span>
-70: <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/^(gener|commun|arsen)(?<r1>.*)/</span>
-71: <span class="ruby-constant">Regexp</span>.<span class="ruby-identifier">last_match</span>(<span class="ruby-value">:r1</span>)
-72: <span class="ruby-keyword kw">else</span>
-73: <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-node">/#{Porter2::V}#{Porter2::C}(?<r1>.*)$/</span>
-74: <span class="ruby-constant">Regexp</span>.<span class="ruby-identifier">last_match</span>(<span class="ruby-value">:r1</span>) <span class="ruby-operator">||</span> <span class="ruby-value str">""</span>
-75: <span class="ruby-keyword kw">end</span>
-76: <span class="ruby-keyword kw">end</span></pre>
+ <span class="ruby-comment cmt"># File lib/porter2_implementation.rb, line 41</span>
+41: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_r1</span>
+42: <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/^(gener|commun|arsen)(?<r1>.*)/</span>
+43: <span class="ruby-constant">Regexp</span>.<span class="ruby-identifier">last_match</span>(<span class="ruby-value">:r1</span>)
+44: <span class="ruby-keyword kw">else</span>
+45: <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-node">/#{Porter2::V}#{Porter2::C}(?<r1>.*)$/</span>
+46: <span class="ruby-constant">Regexp</span>.<span class="ruby-identifier">last_match</span>(<span class="ruby-value">:r1</span>) <span class="ruby-operator">||</span> <span class="ruby-value str">""</span>
+47: <span class="ruby-keyword kw">end</span>
+48: <span class="ruby-keyword kw">end</span></pre>
</div>
</div>
<div class="method-source-code"
id="porter-r--source">
<pre>
- <span class="ruby-comment cmt"># File lib/porter2.rb, line 80</span>
-80: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_r2</span>
-81: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_r1</span> <span class="ruby-operator">=~</span> <span class="ruby-node">/#{Porter2::V}#{Porter2::C}(?<r2>.*)$/</span>
-82: <span class="ruby-constant">Regexp</span>.<span class="ruby-identifier">last_match</span>(<span class="ruby-value">:r2</span>) <span class="ruby-operator">||</span> <span class="ruby-value str">""</span>
-83: <span class="ruby-keyword kw">end</span></pre>
+ <span class="ruby-comment cmt"># File lib/porter2_implementation.rb, line 52</span>
+52: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_r2</span>
+53: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_r1</span> <span class="ruby-operator">=~</span> <span class="ruby-node">/#{Porter2::V}#{Porter2::C}(?<r2>.*)$/</span>
+54: <span class="ruby-constant">Regexp</span>.<span class="ruby-identifier">last_match</span>(<span class="ruby-value">:r2</span>) <span class="ruby-operator">||</span> <span class="ruby-value str">""</span>
+55: <span class="ruby-keyword kw">end</span></pre>
</div>
</div>
<div class="method-source-code"
id="porter-stem-source">
<pre>
- <span class="ruby-comment cmt"># File lib/porter2.rb, line 297</span>
-297: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_stem</span>(<span class="ruby-identifier">gb_english</span> = <span class="ruby-keyword kw">false</span>)
-298: <span class="ruby-identifier">preword</span> = <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_tidy</span>
-299: <span class="ruby-keyword kw">return</span> <span class="ruby-identifier">preword</span> <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">preword</span>.<span class="ruby-identifier">length</span> <span class="ruby-operator"><=</span> <span class="ruby-value">2</span>
-300:
-301: <span class="ruby-identifier">word</span> = <span class="ruby-identifier">preword</span>.<span class="ruby-identifier">porter2_preprocess</span>
-302:
-303: <span class="ruby-keyword kw">if</span> <span class="ruby-constant">Porter2</span><span class="ruby-operator">::</span><span class="ruby-constant">SPECIAL_CASES</span>.<span class="ruby-identifier">has_key?</span> <span class="ruby-identifier">word</span>
-304: <span class="ruby-constant">Porter2</span><span class="ruby-operator">::</span><span class="ruby-constant">SPECIAL_CASES</span>[<span class="ruby-identifier">word</span>]
-305: <span class="ruby-keyword kw">else</span>
-306: <span class="ruby-identifier">w1a</span> = <span class="ruby-identifier">word</span>.<span class="ruby-identifier">porter2_step0</span>.<span class="ruby-identifier">porter2_step1a</span>
-307: <span class="ruby-keyword kw">if</span> <span class="ruby-constant">Porter2</span><span class="ruby-operator">::</span><span class="ruby-constant">STEP_1A_SPECIAL_CASES</span>.<span class="ruby-identifier">include?</span> <span class="ruby-identifier">w1a</span>
-308: <span class="ruby-identifier">w1a</span>
-309: <span class="ruby-keyword kw">else</span>
-310: <span class="ruby-identifier">w1a</span>.<span class="ruby-identifier">porter2_step1b</span>(<span class="ruby-identifier">gb_english</span>).<span class="ruby-identifier">porter2_step1c</span>.<span class="ruby-identifier">porter2_step2</span>(<span class="ruby-identifier">gb_english</span>).<span class="ruby-identifier">porter2_step3</span>(<span class="ruby-identifier">gb_english</span>).<span class="ruby-identifier">porter2_step4</span>(<span class="ruby-identifier">gb_english</span>).<span class="ruby-identifier">porter2_step5</span>.<span class="ruby-identifier">porter2_postprocess</span>
-311: <span class="ruby-keyword kw">end</span>
-312: <span class="ruby-keyword kw">end</span>
-313: <span class="ruby-keyword kw">end</span></pre>
+ <span class="ruby-comment cmt"># File lib/porter2_implementation.rb, line 269</span>
+269: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_stem</span>(<span class="ruby-identifier">gb_english</span> = <span class="ruby-keyword kw">false</span>)
+270: <span class="ruby-identifier">preword</span> = <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_tidy</span>
+271: <span class="ruby-keyword kw">return</span> <span class="ruby-identifier">preword</span> <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">preword</span>.<span class="ruby-identifier">length</span> <span class="ruby-operator"><=</span> <span class="ruby-value">2</span>
+272:
+273: <span class="ruby-identifier">word</span> = <span class="ruby-identifier">preword</span>.<span class="ruby-identifier">porter2_preprocess</span>
+274:
+275: <span class="ruby-keyword kw">if</span> <span class="ruby-constant">Porter2</span><span class="ruby-operator">::</span><span class="ruby-constant">SPECIAL_CASES</span>.<span class="ruby-identifier">has_key?</span> <span class="ruby-identifier">word</span>
+276: <span class="ruby-constant">Porter2</span><span class="ruby-operator">::</span><span class="ruby-constant">SPECIAL_CASES</span>[<span class="ruby-identifier">word</span>]
+277: <span class="ruby-keyword kw">else</span>
+278: <span class="ruby-identifier">w1a</span> = <span class="ruby-identifier">word</span>.<span class="ruby-identifier">porter2_step0</span>.<span class="ruby-identifier">porter2_step1a</span>
+279: <span class="ruby-keyword kw">if</span> <span class="ruby-constant">Porter2</span><span class="ruby-operator">::</span><span class="ruby-constant">STEP_1A_SPECIAL_CASES</span>.<span class="ruby-identifier">include?</span> <span class="ruby-identifier">w1a</span>
+280: <span class="ruby-identifier">w1a</span>
+281: <span class="ruby-keyword kw">else</span>
+282: <span class="ruby-identifier">w1a</span>.<span class="ruby-identifier">porter2_step1b</span>(<span class="ruby-identifier">gb_english</span>).<span class="ruby-identifier">porter2_step1c</span>.<span class="ruby-identifier">porter2_step2</span>(<span class="ruby-identifier">gb_english</span>).<span class="ruby-identifier">porter2_step3</span>(<span class="ruby-identifier">gb_english</span>).<span class="ruby-identifier">porter2_step4</span>(<span class="ruby-identifier">gb_english</span>).<span class="ruby-identifier">porter2_step5</span>.<span class="ruby-identifier">porter2_postprocess</span>
+283: <span class="ruby-keyword kw">end</span>
+284: <span class="ruby-keyword kw">end</span>
+285: <span class="ruby-keyword kw">end</span></pre>
</div>
</div>
<div class="method-source-code"
id="porter-stem-verbose-source">
<pre>
- <span class="ruby-comment cmt"># File lib/porter2.rb, line 316</span>
-316: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_stem_verbose</span>(<span class="ruby-identifier">gb_english</span> = <span class="ruby-keyword kw">false</span>)
-317: <span class="ruby-identifier">preword</span> = <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_tidy</span>
-318: <span class="ruby-identifier">puts</span> <span class="ruby-node">"Preword: #{preword}"</span>
-319: <span class="ruby-keyword kw">return</span> <span class="ruby-identifier">preword</span> <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">preword</span>.<span class="ruby-identifier">length</span> <span class="ruby-operator"><=</span> <span class="ruby-value">2</span>
-320:
-321: <span class="ruby-identifier">word</span> = <span class="ruby-identifier">preword</span>.<span class="ruby-identifier">porter2_preprocess</span>
-322: <span class="ruby-identifier">puts</span> <span class="ruby-node">"Preprocessed: #{word}"</span>
-323:
-324: <span class="ruby-keyword kw">if</span> <span class="ruby-constant">Porter2</span><span class="ruby-operator">::</span><span class="ruby-constant">SPECIAL_CASES</span>.<span class="ruby-identifier">has_key?</span> <span class="ruby-identifier">word</span>
-325: <span class="ruby-identifier">puts</span> <span class="ruby-node">"Returning #{word} as special case #{Porter2::SPECIAL_CASES[word]}"</span>
-326: <span class="ruby-constant">Porter2</span><span class="ruby-operator">::</span><span class="ruby-constant">SPECIAL_CASES</span>[<span class="ruby-identifier">word</span>]
-327: <span class="ruby-keyword kw">else</span>
-328: <span class="ruby-identifier">r1</span> = <span class="ruby-identifier">word</span>.<span class="ruby-identifier">porter2_r1</span>
-329: <span class="ruby-identifier">r2</span> = <span class="ruby-identifier">word</span>.<span class="ruby-identifier">porter2_r2</span>
-330: <span class="ruby-identifier">puts</span> <span class="ruby-node">"R1 = #{r1}, R2 = #{r2}"</span>
-331:
-332: <span class="ruby-identifier">w0</span> = <span class="ruby-identifier">word</span>.<span class="ruby-identifier">porter2_step0</span> ; <span class="ruby-identifier">puts</span> <span class="ruby-node">"After step 0: #{w0} (R1 = #{w0.porter2_r1}, R2 = #{w0.porter2_r2})"</span>
-333: <span class="ruby-identifier">w1a</span> = <span class="ruby-identifier">w0</span>.<span class="ruby-identifier">porter2_step1a</span> ; <span class="ruby-identifier">puts</span> <span class="ruby-node">"After step 1a: #{w1a} (R1 = #{w1a.porter2_r1}, R2 = #{w1a.porter2_r2})"</span>
-334:
-335: <span class="ruby-keyword kw">if</span> <span class="ruby-constant">Porter2</span><span class="ruby-operator">::</span><span class="ruby-constant">STEP_1A_SPECIAL_CASES</span>.<span class="ruby-identifier">include?</span> <span class="ruby-identifier">w1a</span>
-336: <span class="ruby-identifier">puts</span> <span class="ruby-node">"Returning #{w1a} as 1a special case"</span>
-337: <span class="ruby-identifier">w1a</span>
-338: <span class="ruby-keyword kw">else</span>
-339: <span class="ruby-identifier">w1b</span> = <span class="ruby-identifier">w1a</span>.<span class="ruby-identifier">porter2_step1b</span>(<span class="ruby-identifier">gb_english</span>) ; <span class="ruby-identifier">puts</span> <span class="ruby-node">"After step 1b: #{w1b} (R1 = #{w1b.porter2_r1}, R2 = #{w1b.porter2_r2})"</span>
-340: <span class="ruby-identifier">w1c</span> = <span class="ruby-identifier">w1b</span>.<span class="ruby-identifier">porter2_step1c</span> ; <span class="ruby-identifier">puts</span> <span class="ruby-node">"After step 1c: #{w1c} (R1 = #{w1c.porter2_r1}, R2 = #{w1c.porter2_r2})"</span>
-341: <span class="ruby-identifier">w2</span> = <span class="ruby-identifier">w1c</span>.<span class="ruby-identifier">porter2_step2</span>(<span class="ruby-identifier">gb_english</span>) ; <span class="ruby-identifier">puts</span> <span class="ruby-node">"After step 2: #{w2} (R1 = #{w2.porter2_r1}, R2 = #{w2.porter2_r2})"</span>
-342: <span class="ruby-identifier">w3</span> = <span class="ruby-identifier">w2</span>.<span class="ruby-identifier">porter2_step3</span>(<span class="ruby-identifier">gb_english</span>) ; <span class="ruby-identifier">puts</span> <span class="ruby-node">"After step 3: #{w3} (R1 = #{w3.porter2_r1}, R2 = #{w3.porter2_r2})"</span>
-343: <span class="ruby-identifier">w4</span> = <span class="ruby-identifier">w3</span>.<span class="ruby-identifier">porter2_step4</span>(<span class="ruby-identifier">gb_english</span>) ; <span class="ruby-identifier">puts</span> <span class="ruby-node">"After step 4: #{w4} (R1 = #{w4.porter2_r1}, R2 = #{w4.porter2_r2})"</span>
-344: <span class="ruby-identifier">w5</span> = <span class="ruby-identifier">w4</span>.<span class="ruby-identifier">porter2_step5</span> ; <span class="ruby-identifier">puts</span> <span class="ruby-node">"After step 5: #{w5}"</span>
-345: <span class="ruby-identifier">wpost</span> = <span class="ruby-identifier">w5</span>.<span class="ruby-identifier">porter2_postprocess</span> ; <span class="ruby-identifier">puts</span> <span class="ruby-node">"After postprocess: #{wpost}"</span>
-346: <span class="ruby-identifier">wpost</span>
-347: <span class="ruby-keyword kw">end</span>
-348: <span class="ruby-keyword kw">end</span>
-349: <span class="ruby-keyword kw">end</span></pre>
+ <span class="ruby-comment cmt"># File lib/porter2_implementation.rb, line 288</span>
+288: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_stem_verbose</span>(<span class="ruby-identifier">gb_english</span> = <span class="ruby-keyword kw">false</span>)
+289: <span class="ruby-identifier">preword</span> = <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_tidy</span>
+290: <span class="ruby-identifier">puts</span> <span class="ruby-node">"Preword: #{preword}"</span>
+291: <span class="ruby-keyword kw">return</span> <span class="ruby-identifier">preword</span> <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">preword</span>.<span class="ruby-identifier">length</span> <span class="ruby-operator"><=</span> <span class="ruby-value">2</span>
+292:
+293: <span class="ruby-identifier">word</span> = <span class="ruby-identifier">preword</span>.<span class="ruby-identifier">porter2_preprocess</span>
+294: <span class="ruby-identifier">puts</span> <span class="ruby-node">"Preprocessed: #{word}"</span>
+295:
+296: <span class="ruby-keyword kw">if</span> <span class="ruby-constant">Porter2</span><span class="ruby-operator">::</span><span class="ruby-constant">SPECIAL_CASES</span>.<span class="ruby-identifier">has_key?</span> <span class="ruby-identifier">word</span>
+297: <span class="ruby-identifier">puts</span> <span class="ruby-node">"Returning #{word} as special case #{Porter2::SPECIAL_CASES[word]}"</span>
+298: <span class="ruby-constant">Porter2</span><span class="ruby-operator">::</span><span class="ruby-constant">SPECIAL_CASES</span>[<span class="ruby-identifier">word</span>]
+299: <span class="ruby-keyword kw">else</span>
+300: <span class="ruby-identifier">r1</span> = <span class="ruby-identifier">word</span>.<span class="ruby-identifier">porter2_r1</span>
+301: <span class="ruby-identifier">r2</span> = <span class="ruby-identifier">word</span>.<span class="ruby-identifier">porter2_r2</span>
+302: <span class="ruby-identifier">puts</span> <span class="ruby-node">"R1 = #{r1}, R2 = #{r2}"</span>
+303:
+304: <span class="ruby-identifier">w0</span> = <span class="ruby-identifier">word</span>.<span class="ruby-identifier">porter2_step0</span> ; <span class="ruby-identifier">puts</span> <span class="ruby-node">"After step 0: #{w0} (R1 = #{w0.porter2_r1}, R2 = #{w0.porter2_r2})"</span>
+305: <span class="ruby-identifier">w1a</span> = <span class="ruby-identifier">w0</span>.<span class="ruby-identifier">porter2_step1a</span> ; <span class="ruby-identifier">puts</span> <span class="ruby-node">"After step 1a: #{w1a} (R1 = #{w1a.porter2_r1}, R2 = #{w1a.porter2_r2})"</span>
+306:
+307: <span class="ruby-keyword kw">if</span> <span class="ruby-constant">Porter2</span><span class="ruby-operator">::</span><span class="ruby-constant">STEP_1A_SPECIAL_CASES</span>.<span class="ruby-identifier">include?</span> <span class="ruby-identifier">w1a</span>
+308: <span class="ruby-identifier">puts</span> <span class="ruby-node">"Returning #{w1a} as 1a special case"</span>
+309: <span class="ruby-identifier">w1a</span>
+310: <span class="ruby-keyword kw">else</span>
+311: <span class="ruby-identifier">w1b</span> = <span class="ruby-identifier">w1a</span>.<span class="ruby-identifier">porter2_step1b</span>(<span class="ruby-identifier">gb_english</span>) ; <span class="ruby-identifier">puts</span> <span class="ruby-node">"After step 1b: #{w1b} (R1 = #{w1b.porter2_r1}, R2 = #{w1b.porter2_r2})"</span>
+312: <span class="ruby-identifier">w1c</span> = <span class="ruby-identifier">w1b</span>.<span class="ruby-identifier">porter2_step1c</span> ; <span class="ruby-identifier">puts</span> <span class="ruby-node">"After step 1c: #{w1c} (R1 = #{w1c.porter2_r1}, R2 = #{w1c.porter2_r2})"</span>
+313: <span class="ruby-identifier">w2</span> = <span class="ruby-identifier">w1c</span>.<span class="ruby-identifier">porter2_step2</span>(<span class="ruby-identifier">gb_english</span>) ; <span class="ruby-identifier">puts</span> <span class="ruby-node">"After step 2: #{w2} (R1 = #{w2.porter2_r1}, R2 = #{w2.porter2_r2})"</span>
+314: <span class="ruby-identifier">w3</span> = <span class="ruby-identifier">w2</span>.<span class="ruby-identifier">porter2_step3</span>(<span class="ruby-identifier">gb_english</span>) ; <span class="ruby-identifier">puts</span> <span class="ruby-node">"After step 3: #{w3} (R1 = #{w3.porter2_r1}, R2 = #{w3.porter2_r2})"</span>
+315: <span class="ruby-identifier">w4</span> = <span class="ruby-identifier">w3</span>.<span class="ruby-identifier">porter2_step4</span>(<span class="ruby-identifier">gb_english</span>) ; <span class="ruby-identifier">puts</span> <span class="ruby-node">"After step 4: #{w4} (R1 = #{w4.porter2_r1}, R2 = #{w4.porter2_r2})"</span>
+316: <span class="ruby-identifier">w5</span> = <span class="ruby-identifier">w4</span>.<span class="ruby-identifier">porter2_step5</span> ; <span class="ruby-identifier">puts</span> <span class="ruby-node">"After step 5: #{w5}"</span>
+317: <span class="ruby-identifier">wpost</span> = <span class="ruby-identifier">w5</span>.<span class="ruby-identifier">porter2_postprocess</span> ; <span class="ruby-identifier">puts</span> <span class="ruby-node">"After postprocess: #{wpost}"</span>
+318: <span class="ruby-identifier">wpost</span>
+319: <span class="ruby-keyword kw">end</span>
+320: <span class="ruby-keyword kw">end</span>
+321: <span class="ruby-keyword kw">end</span></pre>
</div>
</div>
<div class="method-source-code"
id="porter-step--source">
<pre>
- <span class="ruby-comment cmt"># File lib/porter2.rb, line 103</span>
-103: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_step0</span>
-104: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub!</span>(<span class="ruby-regexp re">/(.)('s'|'s|')$/</span>, <span class="ruby-value str">'\1'</span>) <span class="ruby-operator">||</span> <span class="ruby-keyword kw">self</span>
-105: <span class="ruby-keyword kw">end</span></pre>
+ <span class="ruby-comment cmt"># File lib/porter2_implementation.rb, line 75</span>
+75: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_step0</span>
+76: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub!</span>(<span class="ruby-regexp re">/(.)('s'|'s|')$/</span>, <span class="ruby-value str">'\1'</span>) <span class="ruby-operator">||</span> <span class="ruby-keyword kw">self</span>
+77: <span class="ruby-keyword kw">end</span></pre>
</div>
</div>
<div class="method-source-code"
id="porter-step-a-source">
<pre>
- <span class="ruby-comment cmt"># File lib/porter2.rb, line 113</span>
-113: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_step1a</span>
-114: <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/sses$/</span>
-115: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/sses$/</span>, <span class="ruby-value str">'ss'</span>)
-116: <span class="ruby-keyword kw">elsif</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/..(ied|ies)$/</span>
-117: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/(ied|ies)$/</span>, <span class="ruby-value str">'i'</span>)
-118: <span class="ruby-keyword kw">elsif</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/(ied|ies)$/</span>
-119: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/(ied|ies)$/</span>, <span class="ruby-value str">'ie'</span>)
-120: <span class="ruby-keyword kw">elsif</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/(us|ss)$/</span>
-121: <span class="ruby-keyword kw">self</span>
-122: <span class="ruby-keyword kw">elsif</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/s$/</span>
-123: <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-node">/(#{Porter2::V}.+)s$/</span>
-124: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/s$/</span>, <span class="ruby-value str">''</span>)
-125: <span class="ruby-keyword kw">else</span>
-126: <span class="ruby-keyword kw">self</span>
-127: <span class="ruby-keyword kw">end</span>
-128: <span class="ruby-keyword kw">else</span>
-129: <span class="ruby-keyword kw">self</span>
-130: <span class="ruby-keyword kw">end</span>
-131: <span class="ruby-keyword kw">end</span></pre>
+ <span class="ruby-comment cmt"># File lib/porter2_implementation.rb, line 85</span>
+ 85: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_step1a</span>
+ 86: <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/sses$/</span>
+ 87: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/sses$/</span>, <span class="ruby-value str">'ss'</span>)
+ 88: <span class="ruby-keyword kw">elsif</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/..(ied|ies)$/</span>
+ 89: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/(ied|ies)$/</span>, <span class="ruby-value str">'i'</span>)
+ 90: <span class="ruby-keyword kw">elsif</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/(ied|ies)$/</span>
+ 91: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/(ied|ies)$/</span>, <span class="ruby-value str">'ie'</span>)
+ 92: <span class="ruby-keyword kw">elsif</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/(us|ss)$/</span>
+ 93: <span class="ruby-keyword kw">self</span>
+ 94: <span class="ruby-keyword kw">elsif</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/s$/</span>
+ 95: <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-node">/(#{Porter2::V}.+)s$/</span>
+ 96: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/s$/</span>, <span class="ruby-value str">''</span>)
+ 97: <span class="ruby-keyword kw">else</span>
+ 98: <span class="ruby-keyword kw">self</span>
+ 99: <span class="ruby-keyword kw">end</span>
+100: <span class="ruby-keyword kw">else</span>
+101: <span class="ruby-keyword kw">self</span>
+102: <span class="ruby-keyword kw">end</span>
+103: <span class="ruby-keyword kw">end</span></pre>
</div>
</div>
<div class="method-source-code"
id="porter-step-b-source">
<pre>
- <span class="ruby-comment cmt"># File lib/porter2.rb, line 143</span>
-143: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_step1b</span>(<span class="ruby-identifier">gb_english</span> = <span class="ruby-keyword kw">false</span>)
-144: <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/(eed|eedly)$/</span>
-145: <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_r1</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/(eed|eedly)$/</span>
-146: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/(eed|eedly)$/</span>, <span class="ruby-value str">'ee'</span>)
-147: <span class="ruby-keyword kw">else</span>
-148: <span class="ruby-keyword kw">self</span>
-149: <span class="ruby-keyword kw">end</span>
-150: <span class="ruby-keyword kw">else</span>
-151: <span class="ruby-identifier">w</span> = <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">dup</span>
-152: <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">w</span> <span class="ruby-operator">=~</span> <span class="ruby-node">/#{Porter2::V}.*(ed|edly|ing|ingly)$/</span>
-153: <span class="ruby-identifier">w</span>.<span class="ruby-identifier">sub!</span>(<span class="ruby-regexp re">/(ed|edly|ing|ingly)$/</span>, <span class="ruby-value str">''</span>)
-154: <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">w</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/(at|lb|iz)$/</span>
-155: <span class="ruby-identifier">w</span> <span class="ruby-operator">+=</span> <span class="ruby-value str">'e'</span>
-156: <span class="ruby-keyword kw">elsif</span> <span class="ruby-identifier">w</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/is$/</span> <span class="ruby-keyword kw">and</span> <span class="ruby-identifier">gb_english</span>
-157: <span class="ruby-identifier">w</span> <span class="ruby-operator">+=</span> <span class="ruby-value str">'e'</span>
-158: <span class="ruby-keyword kw">elsif</span> <span class="ruby-identifier">w</span> <span class="ruby-operator">=~</span> <span class="ruby-node">/#{Porter2::Double}$/</span>
-159: <span class="ruby-identifier">w</span>.<span class="ruby-identifier">chop!</span>
-160: <span class="ruby-keyword kw">elsif</span> <span class="ruby-identifier">w</span>.<span class="ruby-identifier">porter2_is_short_word?</span>
-161: <span class="ruby-identifier">w</span> <span class="ruby-operator">+=</span> <span class="ruby-value str">'e'</span>
-162: <span class="ruby-keyword kw">end</span>
-163: <span class="ruby-keyword kw">end</span>
-164: <span class="ruby-identifier">w</span>
-165: <span class="ruby-keyword kw">end</span>
-166: <span class="ruby-keyword kw">end</span></pre>
+ <span class="ruby-comment cmt"># File lib/porter2_implementation.rb, line 115</span>
+115: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_step1b</span>(<span class="ruby-identifier">gb_english</span> = <span class="ruby-keyword kw">false</span>)
+116: <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/(eed|eedly)$/</span>
+117: <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_r1</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/(eed|eedly)$/</span>
+118: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/(eed|eedly)$/</span>, <span class="ruby-value str">'ee'</span>)
+119: <span class="ruby-keyword kw">else</span>
+120: <span class="ruby-keyword kw">self</span>
+121: <span class="ruby-keyword kw">end</span>
+122: <span class="ruby-keyword kw">else</span>
+123: <span class="ruby-identifier">w</span> = <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">dup</span>
+124: <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">w</span> <span class="ruby-operator">=~</span> <span class="ruby-node">/#{Porter2::V}.*(ed|edly|ing|ingly)$/</span>
+125: <span class="ruby-identifier">w</span>.<span class="ruby-identifier">sub!</span>(<span class="ruby-regexp re">/(ed|edly|ing|ingly)$/</span>, <span class="ruby-value str">''</span>)
+126: <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">w</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/(at|lb|iz)$/</span>
+127: <span class="ruby-identifier">w</span> <span class="ruby-operator">+=</span> <span class="ruby-value str">'e'</span>
+128: <span class="ruby-keyword kw">elsif</span> <span class="ruby-identifier">w</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/is$/</span> <span class="ruby-keyword kw">and</span> <span class="ruby-identifier">gb_english</span>
+129: <span class="ruby-identifier">w</span> <span class="ruby-operator">+=</span> <span class="ruby-value str">'e'</span>
+130: <span class="ruby-keyword kw">elsif</span> <span class="ruby-identifier">w</span> <span class="ruby-operator">=~</span> <span class="ruby-node">/#{Porter2::Double}$/</span>
+131: <span class="ruby-identifier">w</span>.<span class="ruby-identifier">chop!</span>
+132: <span class="ruby-keyword kw">elsif</span> <span class="ruby-identifier">w</span>.<span class="ruby-identifier">porter2_is_short_word?</span>
+133: <span class="ruby-identifier">w</span> <span class="ruby-operator">+=</span> <span class="ruby-value str">'e'</span>
+134: <span class="ruby-keyword kw">end</span>
+135: <span class="ruby-keyword kw">end</span>
+136: <span class="ruby-identifier">w</span>
+137: <span class="ruby-keyword kw">end</span>
+138: <span class="ruby-keyword kw">end</span></pre>
</div>
</div>
<div class="method-source-code"
id="porter-step-c-source">
<pre>
- <span class="ruby-comment cmt"># File lib/porter2.rb, line 171</span>
-171: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_step1c</span>
-172: <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-node">/.+#{Porter2::C}(y|Y)$/</span>
-173: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/(y|Y)$/</span>, <span class="ruby-value str">'i'</span>)
-174: <span class="ruby-keyword kw">else</span>
-175: <span class="ruby-keyword kw">self</span>
-176: <span class="ruby-keyword kw">end</span>
-177: <span class="ruby-keyword kw">end</span></pre>
+ <span class="ruby-comment cmt"># File lib/porter2_implementation.rb, line 143</span>
+143: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_step1c</span>
+144: <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-node">/.+#{Porter2::C}(y|Y)$/</span>
+145: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/(y|Y)$/</span>, <span class="ruby-value str">'i'</span>)
+146: <span class="ruby-keyword kw">else</span>
+147: <span class="ruby-keyword kw">self</span>
+148: <span class="ruby-keyword kw">end</span>
+149: <span class="ruby-keyword kw">end</span></pre>
</div>
</div>
<div class="method-source-code"
id="porter-step--source">
<pre>
- <span class="ruby-comment cmt"># File lib/porter2.rb, line 188</span>
-188: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_step2</span>(<span class="ruby-identifier">gb_english</span> = <span class="ruby-keyword kw">false</span>)
-189: <span class="ruby-identifier">r1</span> = <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_r1</span>
-190: <span class="ruby-identifier">s2m</span> = <span class="ruby-constant">Porter2</span><span class="ruby-operator">::</span><span class="ruby-constant">STEP_2_MAPS</span>.<span class="ruby-identifier">dup</span>
-191: <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">gb_english</span>
-192: <span class="ruby-identifier">s2m</span>[<span class="ruby-value str">"iser"</span>] = <span class="ruby-value str">"ise"</span>
-193: <span class="ruby-identifier">s2m</span>[<span class="ruby-value str">"isation"</span>] = <span class="ruby-value str">"ise"</span>
-194: <span class="ruby-keyword kw">end</span>
-195: <span class="ruby-identifier">step_2_re</span> = <span class="ruby-constant">Regexp</span>.<span class="ruby-identifier">union</span>(<span class="ruby-identifier">s2m</span>.<span class="ruby-identifier">keys</span>.<span class="ruby-identifier">map</span> {<span class="ruby-operator">|</span><span class="ruby-identifier">r</span><span class="ruby-operator">|</span> <span class="ruby-constant">Regexp</span>.<span class="ruby-identifier">new</span>(<span class="ruby-identifier">r</span> <span class="ruby-operator">+</span> <span class="ruby-value str">"$"</span>)})
-196: <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-identifier">step_2_re</span>
-197: <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">r1</span> <span class="ruby-operator">=~</span> <span class="ruby-node">/#{$&}$/</span>
-198: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-node">/#{$&}$/</span>, <span class="ruby-identifier">s2m</span>[<span class="ruby-node">$&</span>])
-199: <span class="ruby-keyword kw">else</span>
-200: <span class="ruby-keyword kw">self</span>
-201: <span class="ruby-keyword kw">end</span>
-202: <span class="ruby-keyword kw">elsif</span> <span class="ruby-identifier">r1</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/li$/</span> <span class="ruby-keyword kw">and</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-node">/(#{Porter2::Valid_LI})li$/</span>
-203: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/li$/</span>, <span class="ruby-value str">''</span>)
-204: <span class="ruby-keyword kw">elsif</span> <span class="ruby-identifier">r1</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/ogi$/</span> <span class="ruby-keyword kw">and</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/logi$/</span>
-205: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/ogi$/</span>, <span class="ruby-value str">'og'</span>)
-206: <span class="ruby-keyword kw">else</span>
-207: <span class="ruby-keyword kw">self</span>
-208: <span class="ruby-keyword kw">end</span>
-209: <span class="ruby-keyword kw">end</span></pre>
+ <span class="ruby-comment cmt"># File lib/porter2_implementation.rb, line 160</span>
+160: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_step2</span>(<span class="ruby-identifier">gb_english</span> = <span class="ruby-keyword kw">false</span>)
+161: <span class="ruby-identifier">r1</span> = <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_r1</span>
+162: <span class="ruby-identifier">s2m</span> = <span class="ruby-constant">Porter2</span><span class="ruby-operator">::</span><span class="ruby-constant">STEP_2_MAPS</span>.<span class="ruby-identifier">dup</span>
+163: <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">gb_english</span>
+164: <span class="ruby-identifier">s2m</span>[<span class="ruby-value str">"iser"</span>] = <span class="ruby-value str">"ise"</span>
+165: <span class="ruby-identifier">s2m</span>[<span class="ruby-value str">"isation"</span>] = <span class="ruby-value str">"ise"</span>
+166: <span class="ruby-keyword kw">end</span>
+167: <span class="ruby-identifier">step_2_re</span> = <span class="ruby-constant">Regexp</span>.<span class="ruby-identifier">union</span>(<span class="ruby-identifier">s2m</span>.<span class="ruby-identifier">keys</span>.<span class="ruby-identifier">map</span> {<span class="ruby-operator">|</span><span class="ruby-identifier">r</span><span class="ruby-operator">|</span> <span class="ruby-constant">Regexp</span>.<span class="ruby-identifier">new</span>(<span class="ruby-identifier">r</span> <span class="ruby-operator">+</span> <span class="ruby-value str">"$"</span>)})
+168: <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-identifier">step_2_re</span>
+169: <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">r1</span> <span class="ruby-operator">=~</span> <span class="ruby-node">/#{$&}$/</span>
+170: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-node">/#{$&}$/</span>, <span class="ruby-identifier">s2m</span>[<span class="ruby-node">$&</span>])
+171: <span class="ruby-keyword kw">else</span>
+172: <span class="ruby-keyword kw">self</span>
+173: <span class="ruby-keyword kw">end</span>
+174: <span class="ruby-keyword kw">elsif</span> <span class="ruby-identifier">r1</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/li$/</span> <span class="ruby-keyword kw">and</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-node">/(#{Porter2::Valid_LI})li$/</span>
+175: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/li$/</span>, <span class="ruby-value str">''</span>)
+176: <span class="ruby-keyword kw">elsif</span> <span class="ruby-identifier">r1</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/ogi$/</span> <span class="ruby-keyword kw">and</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/logi$/</span>
+177: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/ogi$/</span>, <span class="ruby-value str">'og'</span>)
+178: <span class="ruby-keyword kw">else</span>
+179: <span class="ruby-keyword kw">self</span>
+180: <span class="ruby-keyword kw">end</span>
+181: <span class="ruby-keyword kw">end</span></pre>
</div>
</div>
<div class="method-source-code"
id="porter-step--source">
<pre>
- <span class="ruby-comment cmt"># File lib/porter2.rb, line 220</span>
-220: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_step3</span>(<span class="ruby-identifier">gb_english</span> = <span class="ruby-keyword kw">false</span>)
-221: <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/ative$/</span> <span class="ruby-keyword kw">and</span> <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_r2</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/ative$/</span>
-222: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/ative$/</span>, <span class="ruby-value str">''</span>)
-223: <span class="ruby-keyword kw">else</span>
-224: <span class="ruby-identifier">s3m</span> = <span class="ruby-constant">Porter2</span><span class="ruby-operator">::</span><span class="ruby-constant">STEP_3_MAPS</span>.<span class="ruby-identifier">dup</span>
-225: <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">gb_english</span>
-226: <span class="ruby-identifier">s3m</span>[<span class="ruby-value str">"alise"</span>] = <span class="ruby-value str">"al"</span>
-227: <span class="ruby-keyword kw">end</span>
-228: <span class="ruby-identifier">step_3_re</span> = <span class="ruby-constant">Regexp</span>.<span class="ruby-identifier">union</span>(<span class="ruby-identifier">s3m</span>.<span class="ruby-identifier">keys</span>.<span class="ruby-identifier">map</span> {<span class="ruby-operator">|</span><span class="ruby-identifier">r</span><span class="ruby-operator">|</span> <span class="ruby-constant">Regexp</span>.<span class="ruby-identifier">new</span>(<span class="ruby-identifier">r</span> <span class="ruby-operator">+</span> <span class="ruby-value str">"$"</span>)})
-229: <span class="ruby-identifier">r1</span> = <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_r1</span>
-230: <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-identifier">step_3_re</span> <span class="ruby-keyword kw">and</span> <span class="ruby-identifier">r1</span> <span class="ruby-operator">=~</span> <span class="ruby-node">/#{$&}$/</span>
-231: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-node">/#{$&}$/</span>, <span class="ruby-identifier">s3m</span>[<span class="ruby-node">$&</span>])
-232: <span class="ruby-keyword kw">else</span>
-233: <span class="ruby-keyword kw">self</span>
-234: <span class="ruby-keyword kw">end</span>
-235: <span class="ruby-keyword kw">end</span>
-236: <span class="ruby-keyword kw">end</span></pre>
+ <span class="ruby-comment cmt"># File lib/porter2_implementation.rb, line 192</span>
+192: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_step3</span>(<span class="ruby-identifier">gb_english</span> = <span class="ruby-keyword kw">false</span>)
+193: <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/ative$/</span> <span class="ruby-keyword kw">and</span> <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_r2</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/ative$/</span>
+194: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/ative$/</span>, <span class="ruby-value str">''</span>)
+195: <span class="ruby-keyword kw">else</span>
+196: <span class="ruby-identifier">s3m</span> = <span class="ruby-constant">Porter2</span><span class="ruby-operator">::</span><span class="ruby-constant">STEP_3_MAPS</span>.<span class="ruby-identifier">dup</span>
+197: <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">gb_english</span>
+198: <span class="ruby-identifier">s3m</span>[<span class="ruby-value str">"alise"</span>] = <span class="ruby-value str">"al"</span>
+199: <span class="ruby-keyword kw">end</span>
+200: <span class="ruby-identifier">step_3_re</span> = <span class="ruby-constant">Regexp</span>.<span class="ruby-identifier">union</span>(<span class="ruby-identifier">s3m</span>.<span class="ruby-identifier">keys</span>.<span class="ruby-identifier">map</span> {<span class="ruby-operator">|</span><span class="ruby-identifier">r</span><span class="ruby-operator">|</span> <span class="ruby-constant">Regexp</span>.<span class="ruby-identifier">new</span>(<span class="ruby-identifier">r</span> <span class="ruby-operator">+</span> <span class="ruby-value str">"$"</span>)})
+201: <span class="ruby-identifier">r1</span> = <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_r1</span>
+202: <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-identifier">step_3_re</span> <span class="ruby-keyword kw">and</span> <span class="ruby-identifier">r1</span> <span class="ruby-operator">=~</span> <span class="ruby-node">/#{$&}$/</span>
+203: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-node">/#{$&}$/</span>, <span class="ruby-identifier">s3m</span>[<span class="ruby-node">$&</span>])
+204: <span class="ruby-keyword kw">else</span>
+205: <span class="ruby-keyword kw">self</span>
+206: <span class="ruby-keyword kw">end</span>
+207: <span class="ruby-keyword kw">end</span>
+208: <span class="ruby-keyword kw">end</span></pre>
</div>
</div>
<div class="method-source-code"
id="porter-step--source">
<pre>
- <span class="ruby-comment cmt"># File lib/porter2.rb, line 246</span>
-246: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_step4</span>(<span class="ruby-identifier">gb_english</span> = <span class="ruby-keyword kw">false</span>)
-247: <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_r2</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/ion$/</span> <span class="ruby-keyword kw">and</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/(s|t)ion$/</span>
-248: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/ion$/</span>, <span class="ruby-value str">''</span>)
-249: <span class="ruby-keyword kw">else</span>
-250: <span class="ruby-identifier">s4m</span> = <span class="ruby-constant">Porter2</span><span class="ruby-operator">::</span><span class="ruby-constant">STEP_4_MAPS</span>.<span class="ruby-identifier">dup</span>
-251: <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">gb_english</span>
-252: <span class="ruby-identifier">s4m</span>[<span class="ruby-value str">"ise"</span>] = <span class="ruby-value str">""</span>
-253: <span class="ruby-keyword kw">end</span>
-254: <span class="ruby-identifier">step_4_re</span> = <span class="ruby-constant">Regexp</span>.<span class="ruby-identifier">union</span>(<span class="ruby-identifier">s4m</span>.<span class="ruby-identifier">keys</span>.<span class="ruby-identifier">map</span> {<span class="ruby-operator">|</span><span class="ruby-identifier">r</span><span class="ruby-operator">|</span> <span class="ruby-constant">Regexp</span>.<span class="ruby-identifier">new</span>(<span class="ruby-identifier">r</span> <span class="ruby-operator">+</span> <span class="ruby-value str">"$"</span>)})
-255: <span class="ruby-identifier">r2</span> = <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_r2</span>
-256: <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-identifier">step_4_re</span>
-257: <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">r2</span> <span class="ruby-operator">=~</span> <span class="ruby-node">/#{$&}/</span>
-258: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-node">/#{$&}$/</span>, <span class="ruby-identifier">s4m</span>[<span class="ruby-node">$&</span>])
-259: <span class="ruby-keyword kw">else</span>
-260: <span class="ruby-keyword kw">self</span>
-261: <span class="ruby-keyword kw">end</span>
-262: <span class="ruby-keyword kw">else</span>
-263: <span class="ruby-keyword kw">self</span>
-264: <span class="ruby-keyword kw">end</span>
-265: <span class="ruby-keyword kw">end</span>
-266: <span class="ruby-keyword kw">end</span></pre>
+ <span class="ruby-comment cmt"># File lib/porter2_implementation.rb, line 218</span>
+218: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_step4</span>(<span class="ruby-identifier">gb_english</span> = <span class="ruby-keyword kw">false</span>)
+219: <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_r2</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/ion$/</span> <span class="ruby-keyword kw">and</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/(s|t)ion$/</span>
+220: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/ion$/</span>, <span class="ruby-value str">''</span>)
+221: <span class="ruby-keyword kw">else</span>
+222: <span class="ruby-identifier">s4m</span> = <span class="ruby-constant">Porter2</span><span class="ruby-operator">::</span><span class="ruby-constant">STEP_4_MAPS</span>.<span class="ruby-identifier">dup</span>
+223: <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">gb_english</span>
+224: <span class="ruby-identifier">s4m</span>[<span class="ruby-value str">"ise"</span>] = <span class="ruby-value str">""</span>
+225: <span class="ruby-keyword kw">end</span>
+226: <span class="ruby-identifier">step_4_re</span> = <span class="ruby-constant">Regexp</span>.<span class="ruby-identifier">union</span>(<span class="ruby-identifier">s4m</span>.<span class="ruby-identifier">keys</span>.<span class="ruby-identifier">map</span> {<span class="ruby-operator">|</span><span class="ruby-identifier">r</span><span class="ruby-operator">|</span> <span class="ruby-constant">Regexp</span>.<span class="ruby-identifier">new</span>(<span class="ruby-identifier">r</span> <span class="ruby-operator">+</span> <span class="ruby-value str">"$"</span>)})
+227: <span class="ruby-identifier">r2</span> = <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_r2</span>
+228: <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-identifier">step_4_re</span>
+229: <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">r2</span> <span class="ruby-operator">=~</span> <span class="ruby-node">/#{$&}/</span>
+230: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-node">/#{$&}$/</span>, <span class="ruby-identifier">s4m</span>[<span class="ruby-node">$&</span>])
+231: <span class="ruby-keyword kw">else</span>
+232: <span class="ruby-keyword kw">self</span>
+233: <span class="ruby-keyword kw">end</span>
+234: <span class="ruby-keyword kw">else</span>
+235: <span class="ruby-keyword kw">self</span>
+236: <span class="ruby-keyword kw">end</span>
+237: <span class="ruby-keyword kw">end</span>
+238: <span class="ruby-keyword kw">end</span></pre>
</div>
</div>
<div class="method-source-code"
id="porter-step--source">
<pre>
- <span class="ruby-comment cmt"># File lib/porter2.rb, line 272</span>
-272: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_step5</span>
-273: <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/ll$/</span> <span class="ruby-keyword kw">and</span> <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_r2</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/l$/</span>
-274: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/ll$/</span>, <span class="ruby-value str">'l'</span>)
-275: <span class="ruby-keyword kw">elsif</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/e$/</span> <span class="ruby-keyword kw">and</span> <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_r2</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/e$/</span>
-276: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/e$/</span>, <span class="ruby-value str">''</span>)
-277: <span class="ruby-keyword kw">else</span>
-278: <span class="ruby-identifier">r1</span> = <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_r1</span>
-279: <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/e$/</span> <span class="ruby-keyword kw">and</span> <span class="ruby-identifier">r1</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/e$/</span> <span class="ruby-keyword kw">and</span> <span class="ruby-keyword kw">not</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-node">/#{Porter2::SHORT_SYLLABLE}e$/</span>
-280: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/e$/</span>, <span class="ruby-value str">''</span>)
-281: <span class="ruby-keyword kw">else</span>
-282: <span class="ruby-keyword kw">self</span>
-283: <span class="ruby-keyword kw">end</span>
-284: <span class="ruby-keyword kw">end</span>
-285: <span class="ruby-keyword kw">end</span></pre>
+ <span class="ruby-comment cmt"># File lib/porter2_implementation.rb, line 244</span>
+244: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_step5</span>
+245: <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/ll$/</span> <span class="ruby-keyword kw">and</span> <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_r2</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/l$/</span>
+246: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/ll$/</span>, <span class="ruby-value str">'l'</span>)
+247: <span class="ruby-keyword kw">elsif</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/e$/</span> <span class="ruby-keyword kw">and</span> <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_r2</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/e$/</span>
+248: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/e$/</span>, <span class="ruby-value str">''</span>)
+249: <span class="ruby-keyword kw">else</span>
+250: <span class="ruby-identifier">r1</span> = <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_r1</span>
+251: <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/e$/</span> <span class="ruby-keyword kw">and</span> <span class="ruby-identifier">r1</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/e$/</span> <span class="ruby-keyword kw">and</span> <span class="ruby-keyword kw">not</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-node">/#{Porter2::SHORT_SYLLABLE}e$/</span>
+252: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/e$/</span>, <span class="ruby-value str">''</span>)
+253: <span class="ruby-keyword kw">else</span>
+254: <span class="ruby-keyword kw">self</span>
+255: <span class="ruby-keyword kw">end</span>
+256: <span class="ruby-keyword kw">end</span>
+257: <span class="ruby-keyword kw">end</span></pre>
</div>
</div>
<div class="method-source-code"
id="porter-tidy-source">
<pre>
- <span class="ruby-comment cmt"># File lib/porter2.rb, line 35</span>
-35: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_tidy</span>
-36: <span class="ruby-identifier">preword</span> = <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">to_s</span>.<span class="ruby-identifier">strip</span>.<span class="ruby-identifier">downcase</span>
-37:
-38: <span class="ruby-comment cmt"># map apostrophe-like characters to apostrophes</span>
-39: <span class="ruby-identifier">preword</span>.<span class="ruby-identifier">gsub!</span>(<span class="ruby-regexp re">/‘/</span>, <span class="ruby-value str">"'"</span>)
-40: <span class="ruby-identifier">preword</span>.<span class="ruby-identifier">gsub!</span>(<span class="ruby-regexp re">/’/</span>, <span class="ruby-value str">"'"</span>)
-41:
-42: <span class="ruby-identifier">preword</span>
-43: <span class="ruby-keyword kw">end</span></pre>
+ <span class="ruby-comment cmt"># File lib/porter2_implementation.rb, line 7</span>
+ 7: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_tidy</span>
+ 8: <span class="ruby-identifier">preword</span> = <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">to_s</span>.<span class="ruby-identifier">strip</span>.<span class="ruby-identifier">downcase</span>
+ 9:
+10: <span class="ruby-comment cmt"># map apostrophe-like characters to apostrophes</span>
+11: <span class="ruby-identifier">preword</span>.<span class="ruby-identifier">gsub!</span>(<span class="ruby-regexp re">/‘/</span>, <span class="ruby-value str">"'"</span>)
+12: <span class="ruby-identifier">preword</span>.<span class="ruby-identifier">gsub!</span>(<span class="ruby-regexp re">/’/</span>, <span class="ruby-value str">"'"</span>)
+13:
+14: <span class="ruby-identifier">preword</span>
+15: <span class="ruby-keyword kw">end</span></pre>
</div>
</div>
<div id="project-metadata">
+ <div id="fileindex-section" class="section project-section">
+ <h3 class="section-header">Files</h3>
+ <ul>
+
+ <li class="file"><a href="./Readme_rdoc.html">Readme.rdoc</a></li>
+
+ </ul>
+ </div>
+
<div id="classindex-section" class="section project-section">
<h3 class="section-header">Class Index
-Fri, 07 Jan 2011 08:46:50 +0000
+Fri, 11 Feb 2011 13:56:05 +0000
./test/tc_porter2_parts.rb Wed, 05 Jan 2011 11:38:33 +0000
./test/ts_porter2.rb Mon, 03 Jan 2011 00:20:11 +0000
./test/tc_porter2_full.rb Wed, 05 Jan 2011 11:35:59 +0000
-./lib/porter2.rb Fri, 07 Jan 2011 08:46:31 +0000
-./lib/porter2_constants.rb Fri, 07 Jan 2011 08:46:16 +0000
+./lib/porter2.rb Sun, 09 Jan 2011 18:34:08 +0000
+./lib/porter2_constants.rb Sun, 09 Jan 2011 09:20:05 +0000
+./lib/porter2_implementation.rb Sat, 08 Jan 2011 10:20:57 +0000
+./Readme.rdoc Fri, 11 Feb 2011 13:55:53 +0000
+ <h2>Files</h2>
+ <ul>
+
+ <li class="file"><a href="Readme_rdoc.html">Readme.rdoc</a></li>
+
+ </ul>
+
<h2 id="classes">Classes/Modules</h2>
<ul>
--- /dev/null
+<?xml version="1.0" encoding="utf-8"?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
+<head>
+ <meta content="text/html; charset=utf-8" http-equiv="Content-Type" />
+
+ <title>File: porter2_constants.rb [RDoc Documentation]</title>
+
+ <link type="text/css" media="screen" href="../rdoc.css" rel="stylesheet" />
+
+ <script src="../js/jquery.js" type="text/javascript"
+ charset="utf-8"></script>
+ <script src="../js/thickbox-compressed.js" type="text/javascript"
+ charset="utf-8"></script>
+ <script src="../js/quicksearch.js" type="text/javascript"
+ charset="utf-8"></script>
+ <script src="../js/darkfish.js" type="text/javascript"
+ charset="utf-8"></script>
+</head>
+
+<body class="file file-popup">
+ <div id="metadata">
+ <dl>
+ <dt class="modified-date">Last Modified</dt>
+ <dd class="modified-date">2011-01-09 09:20:05 +0000</dd>
+
+
+ <dt class="requires">Requires</dt>
+ <dd class="requires">
+ <ul>
+
+ </ul>
+ </dd>
+
+
+
+ </dl>
+ </div>
+
+ <div id="documentation">
+
+ <div class="description">
+ <h2>Description</h2>
+ <p>
+coding: utf-8\r
+</p>
+
+ </div>
+
+ </div>
+</body>
+</html>
+
--- /dev/null
+<?xml version="1.0" encoding="utf-8"?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
+<head>
+ <meta content="text/html; charset=utf-8" http-equiv="Content-Type" />
+
+ <title>File: porter2_implementation.rb [RDoc Documentation]</title>
+
+ <link type="text/css" media="screen" href="../rdoc.css" rel="stylesheet" />
+
+ <script src="../js/jquery.js" type="text/javascript"
+ charset="utf-8"></script>
+ <script src="../js/thickbox-compressed.js" type="text/javascript"
+ charset="utf-8"></script>
+ <script src="../js/quicksearch.js" type="text/javascript"
+ charset="utf-8"></script>
+ <script src="../js/darkfish.js" type="text/javascript"
+ charset="utf-8"></script>
+</head>
+
+<body class="file file-popup">
+ <div id="metadata">
+ <dl>
+ <dt class="modified-date">Last Modified</dt>
+ <dd class="modified-date">2011-01-08 10:20:57 +0000</dd>
+
+
+ <dt class="requires">Requires</dt>
+ <dd class="requires">
+ <ul>
+
+ </ul>
+ </dd>
+
+
+
+ </dl>
+ </div>
+
+ <div id="documentation">
+
+ <div class="description">
+ <h2>Description</h2>
+ <p>
+coding: utf-8
+</p>
+
+ </div>
+
+ </div>
+</body>
+</html>
+
--- /dev/null
+<?xml version="1.0" encoding="utf-8"?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
+<head>
+ <meta content="text/html; charset=utf-8" http-equiv="Content-Type" />
+
+ <title>File: porter2_module.rb [RDoc Documentation]</title>
+
+ <link type="text/css" media="screen" href="../rdoc.css" rel="stylesheet" />
+
+ <script src="../js/jquery.js" type="text/javascript"
+ charset="utf-8"></script>
+ <script src="../js/thickbox-compressed.js" type="text/javascript"
+ charset="utf-8"></script>
+ <script src="../js/quicksearch.js" type="text/javascript"
+ charset="utf-8"></script>
+ <script src="../js/darkfish.js" type="text/javascript"
+ charset="utf-8"></script>
+</head>
+
+<body class="file file-popup">
+ <div id="metadata">
+ <dl>
+ <dt class="modified-date">Last Modified</dt>
+ <dd class="modified-date">2011-01-05 11:34:03 +0000</dd>
+
+
+ <dt class="requires">Requires</dt>
+ <dd class="requires">
+ <ul>
+
+ </ul>
+ </dd>
+
+
+
+ </dl>
+ </div>
+
+ <div id="documentation">
+
+ <div class="description">
+ <h2>Description</h2>
+ <p>
+coding: utf-8\r
+</p>
+
+ </div>
+
+ </div>
+</body>
+</html>
+
<div id="metadata">
<dl>
<dt class="modified-date">Last Modified</dt>
- <dd class="modified-date">2011-01-07 08:46:31 +0000</dd>
+ <dd class="modified-date">2011-01-09 18:34:08 +0000</dd>
<dt class="requires">Requires</dt>
<li>porter2_constants</li>
+ <li>porter2_implementation</li>
+
</ul>
</dd>
--- /dev/null
+<?xml version="1.0" encoding="utf-8"?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
+<head>
+ <meta content="text/html; charset=utf-8" http-equiv="Content-Type" />
+
+ <title>File: porter2_string.rb [RDoc Documentation]</title>
+
+ <link type="text/css" media="screen" href="../rdoc.css" rel="stylesheet" />
+
+ <script src="../js/jquery.js" type="text/javascript"
+ charset="utf-8"></script>
+ <script src="../js/thickbox-compressed.js" type="text/javascript"
+ charset="utf-8"></script>
+ <script src="../js/quicksearch.js" type="text/javascript"
+ charset="utf-8"></script>
+ <script src="../js/darkfish.js" type="text/javascript"
+ charset="utf-8"></script>
+</head>
+
+<body class="file file-popup">
+ <div id="metadata">
+ <dl>
+ <dt class="modified-date">Last Modified</dt>
+ <dd class="modified-date">2011-01-05 11:24:47 +0000</dd>
+
+
+ <dt class="requires">Requires</dt>
+ <dd class="requires">
+ <ul>
+
+ <li>porter2_module</li>
+
+ </ul>
+ </dd>
+
+
+
+ </dl>
+ </div>
+
+ <div id="documentation">
+
+ <div class="description">
+ <h2>Description</h2>
+ <p>
+coding: utf-8
+</p>
+
+ </div>
+
+ </div>
+</body>
+</html>
+
# coding: utf-8
-require 'porter2_constants'
-
# ==The Porter 2 stemmer
-#
-# This is the Porter 2 stemming algorithm, as described at
-# http://snowball.tartarus.org/algorithms/english/stemmer.html
-# The original paper is:
-#
-# Porter, 1980, "An algorithm for suffix stripping", _Program_, Vol. 14,
-# no. 3, pp 130-137
-#
-# Constants for the stemmer are in the Porter2 module.
-#
-# Procedures that implement the stemmer are added to the String class.
-#
-# The stemmer algorithm is implemented in the porter2_stem procedure.
-#
-# ==Internationalisation
-# There isn't much, as this is a stemmer that only works for English.
-#
-# The +gb_english+ flag to the various procedures allows the stemmer to treat the British
-# English '-ise' the same as the American English '-ize'.
-#
-# ==Longest suffixes
-# Several places in the algorithm require matching the longest suffix of a word. The
-# regexp engine in Ruby 1.9 seems to handle alterntives in regexps by finding the
-# alternative that matches at the first position in the string. As we're only talking
-# about suffixes, that first match is also the longest suffix. If the regexp engine changes,
-# this behaviour may change and break the stemmer.
-
-class String
- # Tidy up the word before we get down to the algorithm
- def porter2_tidy
- preword = self.to_s.strip.downcase
-
- # map apostrophe-like characters to apostrophes
- preword.gsub!(/‘/, "'")
- preword.gsub!(/’/, "'")
-
- preword
- end
-
-
- # Preprocess the word.
- # Remove any initial ', if present. Then, set initial y, or y after a vowel, to Y
- #
- # (The comment to 'establish the regions R1 and R2' in the original description
- # is an implementation optimisation that identifies where the regions start. As
- # no modifications are made to the word that affect those positions, you may want
- # to cache them now. This implementation doesn't do that.)
- def porter2_preprocess
- w = self.dup
-
- # remove any initial apostrophe
- w.gsub!(/^'*(.)/, '\1')
-
- # set initial y, or y after a vowel, to Y
- w.gsub!(/^y/, "Y")
- w.gsub!(/(#{Porter2::V})y/, '\1Y')
-
- w
- end
-
-
- # R1 is the portion of the word after the first non-vowel after the first vowel
- # (with words beginning 'gener-', 'commun-', and 'arsen-' treated as special cases
- def porter2_r1
- if self =~ /^(gener|commun|arsen)(?<r1>.*)/
- Regexp.last_match(:r1)
- else
- self =~ /#{Porter2::V}#{Porter2::C}(?<r1>.*)$/
- Regexp.last_match(:r1) || ""
- end
- end
-
-
- # R2 is the portion of R1 (porter2_r1) after the first non-vowel after the first vowel
- def porter2_r2
- self.porter2_r1 =~ /#{Porter2::V}#{Porter2::C}(?<r2>.*)$/
- Regexp.last_match(:r2) || ""
- end
-
-
- # Returns true if the word ends with a short syllable
- def porter2_ends_with_short_syllable?
- self =~ /#{Porter2::SHORT_SYLLABLE}$/ ? true : false
- end
-
-
- # A word is short if it ends in a short syllable, and R1 is null
- def porter2_is_short_word?
- self.porter2_ends_with_short_syllable? and self.porter2_r1.empty?
- end
-
-
- # Search for the longest among the suffixes,
- # * '
- # * 's
- # * 's'
- # and remove if found.
- def porter2_step0
- self.sub!(/(.)('s'|'s|')$/, '\1') || self
- end
-
-
- # Search for the longest among the following suffixes, and perform the action indicated.
- # sses:: replace by ss
- # ied, ies:: replace by i if preceded by more than one letter, otherwise by ie
- # s:: delete if the preceding word part contains a vowel not immediately before the s
- # us, ss:: do nothing
- def porter2_step1a
- if self =~ /sses$/
- self.sub(/sses$/, 'ss')
- elsif self =~ /..(ied|ies)$/
- self.sub(/(ied|ies)$/, 'i')
- elsif self =~ /(ied|ies)$/
- self.sub(/(ied|ies)$/, 'ie')
- elsif self =~ /(us|ss)$/
- self
- elsif self =~ /s$/
- if self =~ /(#{Porter2::V}.+)s$/
- self.sub(/s$/, '')
- else
- self
- end
- else
- self
- end
- end
-
- # Search for the longest among the following suffixes, and perform the action indicated.
- # eed, eedly:: replace by ee if the suffix is also in R1
- # ed, edly, ing, ingly:: delete if the preceding word part contains a vowel and,
- # after the deletion:
- # * if the word ends at, bl or iz: add e, or
- # * if the word ends with a double: remove the last letter, or
- # * if the word is short: add e
- #
- # (If gb_english is +true+, treat the 'is' suffix as 'iz' above.)
- def porter2_step1b(gb_english = false)
- if self =~ /(eed|eedly)$/
- if self.porter2_r1 =~ /(eed|eedly)$/
- self.sub(/(eed|eedly)$/, 'ee')
- else
- self
- end
- else
- w = self.dup
- if w =~ /#{Porter2::V}.*(ed|edly|ing|ingly)$/
- w.sub!(/(ed|edly|ing|ingly)$/, '')
- if w =~ /(at|lb|iz)$/
- w += 'e'
- elsif w =~ /is$/ and gb_english
- w += 'e'
- elsif w =~ /#{Porter2::Double}$/
- w.chop!
- elsif w.porter2_is_short_word?
- w += 'e'
- end
- end
- w
- end
- end
-
-
- # Replace a suffix of y or Y by i if it is preceded by a non-vowel which is
- # not the first letter of the word.
- def porter2_step1c
- if self =~ /.+#{Porter2::C}(y|Y)$/
- self.sub(/(y|Y)$/, 'i')
- else
- self
- end
- end
-
-
- # Search for the longest among the suffixes listed in the keys of Porter2::STEP_2_MAPS.
- # If one is found and that suffix occurs in R1, replace it with the value
- # found in STEP_2_MAPS.
- #
- # (Suffixes 'ogi' and 'li' are treated as special cases in the procedure.)
- #
- # (If gb_english is +true+, replace the 'iser' and 'isation' suffixes with
- # 'ise', similarly to how 'izer' and 'ization' are treated.)
- def porter2_step2(gb_english = false)
- r1 = self.porter2_r1
- s2m = Porter2::STEP_2_MAPS.dup
- if gb_english
- s2m["iser"] = "ise"
- s2m["isation"] = "ise"
- end
- step_2_re = Regexp.union(s2m.keys.map {|r| Regexp.new(r + "$")})
- if self =~ step_2_re
- if r1 =~ /#{$&}$/
- self.sub(/#{$&}$/, s2m[$&])
- else
- self
- end
- elsif r1 =~ /li$/ and self =~ /(#{Porter2::Valid_LI})li$/
- self.sub(/li$/, '')
- elsif r1 =~ /ogi$/ and self =~ /logi$/
- self.sub(/ogi$/, 'og')
- else
- self
- end
- end
-
-
- # Search for the longest among the suffixes listed in the keys of Porter2::STEP_3_MAPS.
- # If one is found and that suffix occurs in R1, replace it with the value
- # found in STEP_3_MAPS.
- #
- # (Suffix 'ative' is treated as a special case in the procedure.)
- #
- # (If gb_english is +true+, replace the 'alise' suffix with
- # 'al', similarly to how 'alize' is treated.)
- def porter2_step3(gb_english = false)
- if self =~ /ative$/ and self.porter2_r2 =~ /ative$/
- self.sub(/ative$/, '')
- else
- s3m = Porter2::STEP_3_MAPS.dup
- if gb_english
- s3m["alise"] = "al"
- end
- step_3_re = Regexp.union(s3m.keys.map {|r| Regexp.new(r + "$")})
- r1 = self.porter2_r1
- if self =~ step_3_re and r1 =~ /#{$&}$/
- self.sub(/#{$&}$/, s3m[$&])
- else
- self
- end
- end
- end
-
-
- # Search for the longest among the suffixes listed in the keys of Porter2::STEP_4_MAPS.
- # If one is found and that suffix occurs in R2, replace it with the value
- # found in STEP_4_MAPS.
- #
- # (Suffix 'ion' is treated as a special case in the procedure.)
- #
- # (If gb_english is +true+, delete the 'ise' suffix if found.)
- def porter2_step4(gb_english = false)
- if self.porter2_r2 =~ /ion$/ and self =~ /(s|t)ion$/
- self.sub(/ion$/, '')
- else
- s4m = Porter2::STEP_4_MAPS.dup
- if gb_english
- s4m["ise"] = ""
- end
- step_4_re = Regexp.union(s4m.keys.map {|r| Regexp.new(r + "$")})
- r2 = self.porter2_r2
- if self =~ step_4_re
- if r2 =~ /#{$&}/
- self.sub(/#{$&}$/, s4m[$&])
- else
- self
- end
- else
- self
- end
- end
- end
-
-
- # Search for the the following suffixes, and, if found, perform the action indicated.
- # e:: delete if in R2, or in R1 and not preceded by a short syllable
- # l:: delete if in R2 and preceded by l
- def porter2_step5
- if self =~ /ll$/ and self.porter2_r2 =~ /l$/
- self.sub(/ll$/, 'l')
- elsif self =~ /e$/ and self.porter2_r2 =~ /e$/
- self.sub(/e$/, '')
- else
- r1 = self.porter2_r1
- if self =~ /e$/ and r1 =~ /e$/ and not self =~ /#{Porter2::SHORT_SYLLABLE}e$/
- self.sub(/e$/, '')
- else
- self
- end
- end
- end
-
-
- # Turn all Y letters into y
- def porter2_postprocess
- self.gsub(/Y/, 'y')
- end
-
- public
-
- # Perform the stemming procedure. If +gb_english+ is true, treat '-ise' and similar suffixes
- # as '-ize' in American English.
- def porter2_stem(gb_english = false)
- preword = self.porter2_tidy
- return preword if preword.length <= 2
-
- word = preword.porter2_preprocess
-
- if Porter2::SPECIAL_CASES.has_key? word
- Porter2::SPECIAL_CASES[word]
- else
- w1a = word.porter2_step0.porter2_step1a
- if Porter2::STEP_1A_SPECIAL_CASES.include? w1a
- w1a
- else
- w1a.porter2_step1b(gb_english).porter2_step1c.porter2_step2(gb_english).porter2_step3(gb_english).porter2_step4(gb_english).porter2_step5.porter2_postprocess
- end
- end
- end
-
- # A verbose version of porter2_stem that prints the output of each stage to STDOUT
- def porter2_stem_verbose(gb_english = false)
- preword = self.porter2_tidy
- puts "Preword: #{preword}"
- return preword if preword.length <= 2
-
- word = preword.porter2_preprocess
- puts "Preprocessed: #{word}"
-
- if Porter2::SPECIAL_CASES.has_key? word
- puts "Returning #{word} as special case #{Porter2::SPECIAL_CASES[word]}"
- Porter2::SPECIAL_CASES[word]
- else
- r1 = word.porter2_r1
- r2 = word.porter2_r2
- puts "R1 = #{r1}, R2 = #{r2}"
-
- w0 = word.porter2_step0 ; puts "After step 0: #{w0} (R1 = #{w0.porter2_r1}, R2 = #{w0.porter2_r2})"
- w1a = w0.porter2_step1a ; puts "After step 1a: #{w1a} (R1 = #{w1a.porter2_r1}, R2 = #{w1a.porter2_r2})"
-
- if Porter2::STEP_1A_SPECIAL_CASES.include? w1a
- puts "Returning #{w1a} as 1a special case"
- w1a
- else
- w1b = w1a.porter2_step1b(gb_english) ; puts "After step 1b: #{w1b} (R1 = #{w1b.porter2_r1}, R2 = #{w1b.porter2_r2})"
- w1c = w1b.porter2_step1c ; puts "After step 1c: #{w1c} (R1 = #{w1c.porter2_r1}, R2 = #{w1c.porter2_r2})"
- w2 = w1c.porter2_step2(gb_english) ; puts "After step 2: #{w2} (R1 = #{w2.porter2_r1}, R2 = #{w2.porter2_r2})"
- w3 = w2.porter2_step3(gb_english) ; puts "After step 3: #{w3} (R1 = #{w3.porter2_r1}, R2 = #{w3.porter2_r2})"
- w4 = w3.porter2_step4(gb_english) ; puts "After step 4: #{w4} (R1 = #{w4.porter2_r1}, R2 = #{w4.porter2_r2})"
- w5 = w4.porter2_step5 ; puts "After step 5: #{w5}"
- wpost = w5.porter2_postprocess ; puts "After postprocess: #{wpost}"
- wpost
- end
- end
- end
-
- alias stem porter2_stem
-
-end
+$:.unshift File.join(File.dirname(__FILE__), "..", "lib")
+require 'porter2_constants'
+require 'porter2_implementation'
--- /dev/null
+# coding: utf-8\r
+\r
+# Constants for the Porter 2 stemmer\r
+module Porter2\r
+\r
+ # A non-vowel\r
+ C = "[^aeiouy]"\r
+\r
+ # A vowel: a e i o u y\r
+ V = "[aeiouy]"\r
+\r
+ # A non-vowel other than w, x, or Y\r
+ CW = "[^aeiouywxY]"\r
+\r
+ # Doubles created when adding a suffix: these are undoubled when stemmed\r
+ Double = "(bb|dd|ff|gg|mm|nn|pp|rr|tt)"\r
+\r
+ # A valid letter that can come before 'li' (or 'ly')\r
+ Valid_LI = "[cdeghkmnrt]"\r
+\r
+ # A specification for a short syllable.\r
+ #\r
+ # A short syllable in a word is either: \r
+ # 1. a vowel followed by a non-vowel other than w, x or Y and preceded by a non-vowel, or \r
+ # 2. a vowel at the beginning of the word followed by a non-vowel.\r
+ #\r
+ # (The original document is silent on whether sequences of two or more non-vowels make a\r
+ # syllable long. But as this specification is only used to find sequences of non-vowel -\r
+ # vowel - non-vowel - end-of-word, this ambiguity does not have an effect.)\r
+ SHORT_SYLLABLE = "((#{C}#{V}#{CW})|(^#{V}#{C}))"\r
+\r
+ # Suffix transformations used in porter2_step2.\r
+ # (ogi, li endings dealt with in procedure)\r
+ STEP_2_MAPS = {"tional" => "tion",\r
+ "enci" => "ence",\r
+ "anci" => "ance",\r
+ "abli" => "able",\r
+ "entli" => "ent",\r
+ "ization" => "ize",\r
+ "izer" => "ize",\r
+ "ational" => "ate",\r
+ "ation" => "ate",\r
+ "ator" => "ate",\r
+ "alism" => "al",\r
+ "aliti" => "al",\r
+ "alli" => "al",\r
+ "fulness" => "ful",\r
+ "ousli" => "ous",\r
+ "ousness" => "ous",\r
+ "iveness" => "ive",\r
+ "iviti" => "ive",\r
+ "biliti" => "ble",\r
+ "bli" => "ble",\r
+ "fulli" => "ful",\r
+ "lessli" => "less" }\r
+\r
+ # Suffix transformations used in porter2_step3.\r
+ # (ative ending dealt with in procedure) \r
+ STEP_3_MAPS = {"tional" => "tion",\r
+ "ational" => "ate",\r
+ "alize" => "al",\r
+ "icate" => "ic",\r
+ "iciti" => "ic",\r
+ "ical" => "ic",\r
+ "ful" => "",\r
+ "ness" => "" }\r
+ \r
+ # Suffix transformations used in porter2_step4.\r
+ # (ion ending dealt with in procedure)\r
+ STEP_4_MAPS = {"al" => "",\r
+ "ance" => "",\r
+ "ence" => "",\r
+ "er" => "",\r
+ "ic" => "",\r
+ "able" => "",\r
+ "ible" => "",\r
+ "ant" => "",\r
+ "ement" => "",\r
+ "ment" => "",\r
+ "ent" => "",\r
+ "ism" => "",\r
+ "ate" => "",\r
+ "iti" => "",\r
+ "ous" => "",\r
+ "ive" => "",\r
+ "ize" => "" }\r
+ \r
+ # Special-case stemmings \r
+ SPECIAL_CASES = {"skis" => "ski",\r
+ "skies" => "sky",\r
+ \r
+ "dying" => "die",\r
+ "lying" => "lie",\r
+ "tying" => "tie",\r
+ "idly" => "idl",\r
+ "gently" => "gentl",\r
+ "ugly" => "ugli",\r
+ "early" => "earli",\r
+ "only" => "onli",\r
+ "singly" =>"singl",\r
+ \r
+ "sky" => "sky",\r
+ "news" => "news",\r
+ "howe" => "howe",\r
+ "atlas" => "atlas",\r
+ "cosmos" => "cosmos",\r
+ "bias" => "bias",\r
+ "andes" => "andes" }\r
+ \r
+ # Special case words to stop processing after step 1a.\r
+ STEP_1A_SPECIAL_CASES = %w[ inning outing canning herring earring proceed exceed succeed ]\r
+\r
+end\r
+\r
--- /dev/null
+# coding: utf-8
+
+# Implementation of the Porter 2 stemmer. String#porter2_stem is the main stemming procedure.
+
+class String
+ # Tidy up the word before we get down to the algorithm
+ def porter2_tidy
+ preword = self.to_s.strip.downcase
+
+ # map apostrophe-like characters to apostrophes
+ preword.gsub!(/‘/, "'")
+ preword.gsub!(/’/, "'")
+
+ preword
+ end
+
+
+ # Preprocess the word.
+ # Remove any initial ', if present. Then, set initial y, or y after a vowel, to Y
+ #
+ # (The comment to 'establish the regions R1 and R2' in the original description
+ # is an implementation optimisation that identifies where the regions start. As
+ # no modifications are made to the word that affect those positions, you may want
+ # to cache them now. This implementation doesn't do that.)
+ def porter2_preprocess
+ w = self.dup
+
+ # remove any initial apostrophe
+ w.gsub!(/^'*(.)/, '\1')
+
+ # set initial y, or y after a vowel, to Y
+ w.gsub!(/^y/, "Y")
+ w.gsub!(/(#{Porter2::V})y/, '\1Y')
+
+ w
+ end
+
+
+ # R1 is the portion of the word after the first non-vowel after the first vowel
+ # (with words beginning 'gener-', 'commun-', and 'arsen-' treated as special cases
+ def porter2_r1
+ if self =~ /^(gener|commun|arsen)(?<r1>.*)/
+ Regexp.last_match(:r1)
+ else
+ self =~ /#{Porter2::V}#{Porter2::C}(?<r1>.*)$/
+ Regexp.last_match(:r1) || ""
+ end
+ end
+
+
+ # R2 is the portion of R1 (porter2_r1) after the first non-vowel after the first vowel
+ def porter2_r2
+ self.porter2_r1 =~ /#{Porter2::V}#{Porter2::C}(?<r2>.*)$/
+ Regexp.last_match(:r2) || ""
+ end
+
+
+ # Returns true if the word ends with a short syllable
+ def porter2_ends_with_short_syllable?
+ self =~ /#{Porter2::SHORT_SYLLABLE}$/ ? true : false
+ end
+
+
+ # A word is short if it ends in a short syllable, and R1 is null
+ def porter2_is_short_word?
+ self.porter2_ends_with_short_syllable? and self.porter2_r1.empty?
+ end
+
+
+ # Search for the longest among the suffixes,
+ # * '
+ # * 's
+ # * 's'
+ # and remove if found.
+ def porter2_step0
+ self.sub!(/(.)('s'|'s|')$/, '\1') || self
+ end
+
+
+ # Search for the longest among the following suffixes, and perform the action indicated.
+ # sses:: replace by ss
+ # ied, ies:: replace by i if preceded by more than one letter, otherwise by ie
+ # s:: delete if the preceding word part contains a vowel not immediately before the s
+ # us, ss:: do nothing
+ def porter2_step1a
+ if self =~ /sses$/
+ self.sub(/sses$/, 'ss')
+ elsif self =~ /..(ied|ies)$/
+ self.sub(/(ied|ies)$/, 'i')
+ elsif self =~ /(ied|ies)$/
+ self.sub(/(ied|ies)$/, 'ie')
+ elsif self =~ /(us|ss)$/
+ self
+ elsif self =~ /s$/
+ if self =~ /(#{Porter2::V}.+)s$/
+ self.sub(/s$/, '')
+ else
+ self
+ end
+ else
+ self
+ end
+ end
+
+
+ # Search for the longest among the following suffixes, and perform the action indicated.
+ # eed, eedly:: replace by ee if the suffix is also in R1
+ # ed, edly, ing, ingly:: delete if the preceding word part contains a vowel and,
+ # after the deletion:
+ # * if the word ends at, bl or iz: add e, or
+ # * if the word ends with a double: remove the last letter, or
+ # * if the word is short: add e
+ #
+ # (If gb_english is +true+, treat the 'is' suffix as 'iz' above.)
+ def porter2_step1b(gb_english = false)
+ if self =~ /(eed|eedly)$/
+ if self.porter2_r1 =~ /(eed|eedly)$/
+ self.sub(/(eed|eedly)$/, 'ee')
+ else
+ self
+ end
+ else
+ w = self.dup
+ if w =~ /#{Porter2::V}.*(ed|edly|ing|ingly)$/
+ w.sub!(/(ed|edly|ing|ingly)$/, '')
+ if w =~ /(at|lb|iz)$/
+ w += 'e'
+ elsif w =~ /is$/ and gb_english
+ w += 'e'
+ elsif w =~ /#{Porter2::Double}$/
+ w.chop!
+ elsif w.porter2_is_short_word?
+ w += 'e'
+ end
+ end
+ w
+ end
+ end
+
+
+ # Replace a suffix of y or Y by i if it is preceded by a non-vowel which is
+ # not the first letter of the word.
+ def porter2_step1c
+ if self =~ /.+#{Porter2::C}(y|Y)$/
+ self.sub(/(y|Y)$/, 'i')
+ else
+ self
+ end
+ end
+
+
+ # Search for the longest among the suffixes listed in the keys of Porter2::STEP_2_MAPS.
+ # If one is found and that suffix occurs in R1, replace it with the value
+ # found in STEP_2_MAPS.
+ #
+ # (Suffixes 'ogi' and 'li' are treated as special cases in the procedure.)
+ #
+ # (If gb_english is +true+, replace the 'iser' and 'isation' suffixes with
+ # 'ise', similarly to how 'izer' and 'ization' are treated.)
+ def porter2_step2(gb_english = false)
+ r1 = self.porter2_r1
+ s2m = Porter2::STEP_2_MAPS.dup
+ if gb_english
+ s2m["iser"] = "ise"
+ s2m["isation"] = "ise"
+ end
+ step_2_re = Regexp.union(s2m.keys.map {|r| Regexp.new(r + "$")})
+ if self =~ step_2_re
+ if r1 =~ /#{$&}$/
+ self.sub(/#{$&}$/, s2m[$&])
+ else
+ self
+ end
+ elsif r1 =~ /li$/ and self =~ /(#{Porter2::Valid_LI})li$/
+ self.sub(/li$/, '')
+ elsif r1 =~ /ogi$/ and self =~ /logi$/
+ self.sub(/ogi$/, 'og')
+ else
+ self
+ end
+ end
+
+
+ # Search for the longest among the suffixes listed in the keys of Porter2::STEP_3_MAPS.
+ # If one is found and that suffix occurs in R1, replace it with the value
+ # found in STEP_3_MAPS.
+ #
+ # (Suffix 'ative' is treated as a special case in the procedure.)
+ #
+ # (If gb_english is +true+, replace the 'alise' suffix with
+ # 'al', similarly to how 'alize' is treated.)
+ def porter2_step3(gb_english = false)
+ if self =~ /ative$/ and self.porter2_r2 =~ /ative$/
+ self.sub(/ative$/, '')
+ else
+ s3m = Porter2::STEP_3_MAPS.dup
+ if gb_english
+ s3m["alise"] = "al"
+ end
+ step_3_re = Regexp.union(s3m.keys.map {|r| Regexp.new(r + "$")})
+ r1 = self.porter2_r1
+ if self =~ step_3_re and r1 =~ /#{$&}$/
+ self.sub(/#{$&}$/, s3m[$&])
+ else
+ self
+ end
+ end
+ end
+
+
+ # Search for the longest among the suffixes listed in the keys of Porter2::STEP_4_MAPS.
+ # If one is found and that suffix occurs in R2, replace it with the value
+ # found in STEP_4_MAPS.
+ #
+ # (Suffix 'ion' is treated as a special case in the procedure.)
+ #
+ # (If gb_english is +true+, delete the 'ise' suffix if found.)
+ def porter2_step4(gb_english = false)
+ if self.porter2_r2 =~ /ion$/ and self =~ /(s|t)ion$/
+ self.sub(/ion$/, '')
+ else
+ s4m = Porter2::STEP_4_MAPS.dup
+ if gb_english
+ s4m["ise"] = ""
+ end
+ step_4_re = Regexp.union(s4m.keys.map {|r| Regexp.new(r + "$")})
+ r2 = self.porter2_r2
+ if self =~ step_4_re
+ if r2 =~ /#{$&}/
+ self.sub(/#{$&}$/, s4m[$&])
+ else
+ self
+ end
+ else
+ self
+ end
+ end
+ end
+
+
+ # Search for the the following suffixes, and, if found, perform the action indicated.
+ # e:: delete if in R2, or in R1 and not preceded by a short syllable
+ # l:: delete if in R2 and preceded by l
+ def porter2_step5
+ if self =~ /ll$/ and self.porter2_r2 =~ /l$/
+ self.sub(/ll$/, 'l')
+ elsif self =~ /e$/ and self.porter2_r2 =~ /e$/
+ self.sub(/e$/, '')
+ else
+ r1 = self.porter2_r1
+ if self =~ /e$/ and r1 =~ /e$/ and not self =~ /#{Porter2::SHORT_SYLLABLE}e$/
+ self.sub(/e$/, '')
+ else
+ self
+ end
+ end
+ end
+
+
+ # Turn all Y letters into y
+ def porter2_postprocess
+ self.gsub(/Y/, 'y')
+ end
+
+ public
+
+ # Perform the stemming procedure. If +gb_english+ is true, treat '-ise' and similar suffixes
+ # as '-ize' in American English.
+ def porter2_stem(gb_english = false)
+ preword = self.porter2_tidy
+ return preword if preword.length <= 2
+
+ word = preword.porter2_preprocess
+
+ if Porter2::SPECIAL_CASES.has_key? word
+ Porter2::SPECIAL_CASES[word]
+ else
+ w1a = word.porter2_step0.porter2_step1a
+ if Porter2::STEP_1A_SPECIAL_CASES.include? w1a
+ w1a
+ else
+ w1a.porter2_step1b(gb_english).porter2_step1c.porter2_step2(gb_english).porter2_step3(gb_english).porter2_step4(gb_english).porter2_step5.porter2_postprocess
+ end
+ end
+ end
+
+ # A verbose version of porter2_stem that prints the output of each stage to STDOUT
+ def porter2_stem_verbose(gb_english = false)
+ preword = self.porter2_tidy
+ puts "Preword: #{preword}"
+ return preword if preword.length <= 2
+
+ word = preword.porter2_preprocess
+ puts "Preprocessed: #{word}"
+
+ if Porter2::SPECIAL_CASES.has_key? word
+ puts "Returning #{word} as special case #{Porter2::SPECIAL_CASES[word]}"
+ Porter2::SPECIAL_CASES[word]
+ else
+ r1 = word.porter2_r1
+ r2 = word.porter2_r2
+ puts "R1 = #{r1}, R2 = #{r2}"
+
+ w0 = word.porter2_step0 ; puts "After step 0: #{w0} (R1 = #{w0.porter2_r1}, R2 = #{w0.porter2_r2})"
+ w1a = w0.porter2_step1a ; puts "After step 1a: #{w1a} (R1 = #{w1a.porter2_r1}, R2 = #{w1a.porter2_r2})"
+
+ if Porter2::STEP_1A_SPECIAL_CASES.include? w1a
+ puts "Returning #{w1a} as 1a special case"
+ w1a
+ else
+ w1b = w1a.porter2_step1b(gb_english) ; puts "After step 1b: #{w1b} (R1 = #{w1b.porter2_r1}, R2 = #{w1b.porter2_r2})"
+ w1c = w1b.porter2_step1c ; puts "After step 1c: #{w1c} (R1 = #{w1c.porter2_r1}, R2 = #{w1c.porter2_r2})"
+ w2 = w1c.porter2_step2(gb_english) ; puts "After step 2: #{w2} (R1 = #{w2.porter2_r1}, R2 = #{w2.porter2_r2})"
+ w3 = w2.porter2_step3(gb_english) ; puts "After step 3: #{w3} (R1 = #{w3.porter2_r1}, R2 = #{w3.porter2_r2})"
+ w4 = w3.porter2_step4(gb_english) ; puts "After step 4: #{w4} (R1 = #{w4.porter2_r1}, R2 = #{w4.porter2_r2})"
+ w5 = w4.porter2_step5 ; puts "After step 5: #{w5}"
+ wpost = w5.porter2_postprocess ; puts "After postprocess: #{wpost}"
+ wpost
+ end
+ end
+ end
+
+ alias stem porter2_stem
+
+end
+