From: Neil Smith <neil.d1@njae.me.uk> Date: Wed, 5 Jan 2011 11:41:22 +0000 (+0000) Subject: Split constants into a module, procedures directly into String. X-Git-Tag: v1.0.0~9 X-Git-Url: https://git.njae.me.uk/?a=commitdiff_plain;h=a222cc928502d3bb6ecd3d03c532ce7f9804acb4;p=porter2stemmer.git Split constants into a module, procedures directly into String. --- diff --git a/doc/Stemmable.html b/doc/Stemmable.html deleted file mode 100644 index 9594c5a..0000000 --- a/doc/Stemmable.html +++ /dev/null @@ -1,1104 +0,0 @@ -<?xml version="1.0" encoding="utf-8"?> -<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" - "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> -<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> -<head> - <meta content="text/html; charset=utf-8" http-equiv="Content-Type" /> - - <title>Module: Stemmable</title> - - <link rel="stylesheet" href="./rdoc.css" type="text/css" media="screen" /> - - <script src="./js/jquery.js" type="text/javascript" - charset="utf-8"></script> - <script src="./js/thickbox-compressed.js" type="text/javascript" - charset="utf-8"></script> - <script src="./js/quicksearch.js" type="text/javascript" - charset="utf-8"></script> - <script src="./js/darkfish.js" type="text/javascript" - charset="utf-8"></script> - -</head> -<body class="module"> - - <div id="metadata"> - <div id="home-metadata"> - <div id="home-section" class="section"> - <h3 class="section-header"> - <a href="./index.html">Home</a> - <a href="./index.html#classes">Classes</a> - <a href="./index.html#methods">Methods</a> - </h3> - </div> - </div> - - <div id="file-metadata"> - <div id="file-list-section" class="section"> - <h3 class="section-header">In Files</h3> - <div class="section-body"> - <ul> - - <li><a href="./lib/porter2_rb.html?TB_iframe=true&height=550&width=785" - class="thickbox" title="lib/porter2.rb">lib/porter2.rb</a></li> - - </ul> - </div> - </div> - - - </div> - - <div id="class-metadata"> - - <!-- Parent Class --> - - - <!-- Namespace Contents --> - - - <!-- Method Quickref --> - - <div id="method-list-section" class="section"> - <h3 class="section-header">Methods</h3> - <ul class="link-list"> - - <li><a href="#method-i-porter2_ends_with_short_syllable%3F">#porter2_ends_with_short_syllable?</a></li> - - <li><a href="#method-i-porter2_is_short_word%3F">#porter2_is_short_word?</a></li> - - <li><a href="#method-i-porter2_postprocess">#porter2_postprocess</a></li> - - <li><a href="#method-i-porter2_preprocess">#porter2_preprocess</a></li> - - <li><a href="#method-i-porter2_r1">#porter2_r1</a></li> - - <li><a href="#method-i-porter2_r2">#porter2_r2</a></li> - - <li><a href="#method-i-porter2_stem">#porter2_stem</a></li> - - <li><a href="#method-i-porter2_stem_verbose">#porter2_stem_verbose</a></li> - - <li><a href="#method-i-porter2_tidy">#porter2_tidy</a></li> - - <li><a href="#method-i-stem">#stem</a></li> - - <li><a href="#method-i-step_0">#step_0</a></li> - - <li><a href="#method-i-step_1a">#step_1a</a></li> - - <li><a href="#method-i-step_1b">#step_1b</a></li> - - <li><a href="#method-i-step_1c">#step_1c</a></li> - - <li><a href="#method-i-step_2">#step_2</a></li> - - <li><a href="#method-i-step_3">#step_3</a></li> - - <li><a href="#method-i-step_4">#step_4</a></li> - - <li><a href="#method-i-step_5">#step_5</a></li> - - </ul> - </div> - - - <!-- Included Modules --> - - </div> - - <div id="project-metadata"> - - - - <div id="classindex-section" class="section project-section"> - <h3 class="section-header">Class Index - <span class="search-toggle"><img src="./images/find.png" - height="16" width="16" alt="[+]" - title="show/hide quicksearch" /></span></h3> - <form action="#" method="get" accept-charset="utf-8" class="initially-hidden"> - <fieldset> - <legend>Quicksearch</legend> - <input type="text" name="quicksearch" value="" - class="quicksearch-field" /> - </fieldset> - </form> - - <ul class="link-list"> - - <li><a href="./Stemmable.html">Stemmable</a></li> - - <li><a href="./String.html">String</a></li> - - <li><a href="./TestPorter2.html">TestPorter2</a></li> - - </ul> - <div id="no-class-search-results" style="display: none;">No matching classes.</div> - </div> - - - </div> - </div> - - <div id="documentation"> - <h1 class="module">Stemmable</h1> - - <div id="description"> - <p> -Porter stemmer in Ruby. -</p> -<p> -This is the Porter 2 stemming algorithm, as described at <a -href="http://snowball.tartarus.org/algorithms/english/stemmer.html">snowball.tartarus.org/algorithms/english/stemmer.html</a> -The original paper is: -</p> -<pre> - Porter, 1980, An algorithm for suffix stripping, Program, Vol. 14, - no. 3, pp 130-137 </pre> - - </div> - - <!-- Constants --> - - <div id="constants-list" class="section"> - <h3 class="section-header">Constants</h3> - <dl> - - <dt><a name="C">C</a></dt> - - <dd class="description"><p> -A non-vowel -</p></dd> - - - <dt><a name="V">V</a></dt> - - <dd class="description"><p> -A vowel -</p></dd> - - - <dt><a name="CW">CW</a></dt> - - <dd class="description"><p> -A non-vowel other than w, x, or Y -</p></dd> - - - <dt><a name="Double">Double</a></dt> - - <dd class="description"><p> -Doubles created when added a suffix: these are undoubled when stemmed -</p></dd> - - - <dt><a name="Valid_LI">Valid_LI</a></dt> - - <dd class="description"><p> -A valid letter that can come before ‘li’ -</p></dd> - - - <dt><a name="SHORT_SYLLABLE">SHORT_SYLLABLE</a></dt> - - <dd class="description"><p> -A specification for a short syllable -</p></dd> - - - <dt><a name="STEP_2_MAPS">STEP_2_MAPS</a></dt> - - <dd class="description"><p> -Suffix transformations used in Step 2. (ogi, li endings dealt with in -procedure) -</p></dd> - - - <dt><a name="STEP_3_MAPS">STEP_3_MAPS</a></dt> - - <dd class="description"><p> -Suffix transformations used in Step 3. (ative ending dealt with in -procedure) -</p></dd> - - - <dt><a name="STEP_4_MAPS">STEP_4_MAPS</a></dt> - - <dd class="description"><p> -Suffix transformations used in Step 4. -</p></dd> - - - <dt><a name="SPECIAL_CASES">SPECIAL_CASES</a></dt> - - <dd class="description"><p> -Special-case stemmings -</p></dd> - - - <dt><a name="STEP_1A_SPECIAL_CASES">STEP_1A_SPECIAL_CASES</a></dt> - - <dd class="description"><p> -Special case words to ignore after step 1a. -</p></dd> - - - </dl> - </div> - - - <!-- Attributes --> - - - <!-- Methods --> - - <div id="public-instance-method-details" class="method-section section"> - <h3 class="section-header">Public Instance Methods</h3> - - - <div id="porter-ends-with-short-syllable--method" class="method-detail "> - <a name="method-i-porter2_ends_with_short_syllable%3F"></a> - - <div class="method-heading"> - - <span class="method-name">porter2_ends_with_short_syllable?</span><span - class="method-args">()</span> - <span class="method-click-advice">click to toggle source</span> - - </div> - - <div class="method-description"> - - <p> -A short syllable in a word is either -</p> -<ol> -<li><p> -a vowel followed by a non-vowel other than w, x or Y and preceded by -</p> -</li> -</ol> -<p> -a non-vowel, or -</p> -<ol> -<li><p> -a vowel at the beginning of the word followed by a non-vowel. -</p> -</li> -</ol> - - - - <div class="method-source-code" - id="porter-ends-with-short-syllable--source"> -<pre> - <span class="ruby-comment cmt"># File lib/porter2.rb, line 155</span> -155: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_ends_with_short_syllable?</span> -156: <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-node">/#{SHORT_SYLLABLE}$/</span> <span class="ruby-operator">?</span> <span class="ruby-keyword kw">true</span> <span class="ruby-operator">:</span> <span class="ruby-keyword kw">false</span> -157: <span class="ruby-keyword kw">end</span></pre> - </div> - - </div> - - - - - </div> - - - <div id="porter-is-short-word--method" class="method-detail "> - <a name="method-i-porter2_is_short_word%3F"></a> - - <div class="method-heading"> - - <span class="method-name">porter2_is_short_word?</span><span - class="method-args">()</span> - <span class="method-click-advice">click to toggle source</span> - - </div> - - <div class="method-description"> - - <p> -A word is short if it ends in a short syllable, and if R1 is null -</p> - - - - <div class="method-source-code" - id="porter-is-short-word--source"> -<pre> - <span class="ruby-comment cmt"># File lib/porter2.rb, line 160</span> -160: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_is_short_word?</span> -161: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_ends_with_short_syllable?</span> <span class="ruby-keyword kw">and</span> <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_r1</span>.<span class="ruby-identifier">empty?</span> -162: <span class="ruby-keyword kw">end</span></pre> - </div> - - </div> - - - - - </div> - - - <div id="porter-postprocess-method" class="method-detail "> - <a name="method-i-porter2_postprocess"></a> - - <div class="method-heading"> - - <span class="method-name">porter2_postprocess</span><span - class="method-args">()</span> - <span class="method-click-advice">click to toggle source</span> - - </div> - - <div class="method-description"> - - - - - - <div class="method-source-code" - id="porter-postprocess-source"> -<pre> - <span class="ruby-comment cmt"># File lib/porter2.rb, line 311</span> -311: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_postprocess</span> -312: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">gsub</span>(<span class="ruby-regexp re">/Y/</span>, <span class="ruby-value str">'y'</span>) -313: <span class="ruby-keyword kw">end</span></pre> - </div> - - </div> - - - - - </div> - - - <div id="porter-preprocess-method" class="method-detail "> - <a name="method-i-porter2_preprocess"></a> - - <div class="method-heading"> - - <span class="method-name">porter2_preprocess</span><span - class="method-args">()</span> - <span class="method-click-advice">click to toggle source</span> - - </div> - - <div class="method-description"> - - - - - - <div class="method-source-code" - id="porter-preprocess-source"> -<pre> - <span class="ruby-comment cmt"># File lib/porter2.rb, line 122</span> -122: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_preprocess</span> -123: <span class="ruby-identifier">w</span> = <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">dup</span> -124: -125: <span class="ruby-comment cmt"># remove any initial apostrophe </span> -126: <span class="ruby-identifier">w</span>.<span class="ruby-identifier">gsub!</span>(<span class="ruby-regexp re">/^'*(.)/</span>, <span class="ruby-value str">'\1'</span>) -127: -128: <span class="ruby-comment cmt"># set initial y, or y after a vowel, to Y </span> -129: <span class="ruby-identifier">w</span>.<span class="ruby-identifier">gsub!</span>(<span class="ruby-regexp re">/^y/</span>, <span class="ruby-value str">"Y"</span>) -130: <span class="ruby-identifier">w</span>.<span class="ruby-identifier">gsub!</span>(<span class="ruby-node">/(#{V})y/</span>, <span class="ruby-value str">'\1Y'</span>) -131: -132: <span class="ruby-identifier">w</span> -133: <span class="ruby-keyword kw">end</span></pre> - </div> - - </div> - - - - - </div> - - - <div id="porter-r--method" class="method-detail "> - <a name="method-i-porter2_r1"></a> - - <div class="method-heading"> - - <span class="method-name">porter2_r1</span><span - class="method-args">()</span> - <span class="method-click-advice">click to toggle source</span> - - </div> - - <div class="method-description"> - - <p> -The word after the first non-vowel after the first vowel -</p> - - - - <div class="method-source-code" - id="porter-r--source"> -<pre> - <span class="ruby-comment cmt"># File lib/porter2.rb, line 136</span> -136: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_r1</span> -137: <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/^(gener|commun|arsen)(?<r1>.*)/</span> -138: <span class="ruby-constant">Regexp</span>.<span class="ruby-identifier">last_match</span>(<span class="ruby-value">:r1</span>) -139: <span class="ruby-keyword kw">else</span> -140: <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-node">/#{V}#{C}(?<r1>.*)$/</span> -141: <span class="ruby-constant">Regexp</span>.<span class="ruby-identifier">last_match</span>(<span class="ruby-value">:r1</span>) <span class="ruby-operator">||</span> <span class="ruby-value str">""</span> -142: <span class="ruby-keyword kw">end</span> -143: <span class="ruby-keyword kw">end</span></pre> - </div> - - </div> - - - - - </div> - - - <div id="porter-r--method" class="method-detail "> - <a name="method-i-porter2_r2"></a> - - <div class="method-heading"> - - <span class="method-name">porter2_r2</span><span - class="method-args">()</span> - <span class="method-click-advice">click to toggle source</span> - - </div> - - <div class="method-description"> - - <p> -R1 after the first non-vowel after the first vowel -</p> - - - - <div class="method-source-code" - id="porter-r--source"> -<pre> - <span class="ruby-comment cmt"># File lib/porter2.rb, line 146</span> -146: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_r2</span> -147: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_r1</span> <span class="ruby-operator">=~</span> <span class="ruby-node">/#{V}#{C}(?<r2>.*)$/</span> -148: <span class="ruby-constant">Regexp</span>.<span class="ruby-identifier">last_match</span>(<span class="ruby-value">:r2</span>) <span class="ruby-operator">||</span> <span class="ruby-value str">""</span> -149: <span class="ruby-keyword kw">end</span></pre> - </div> - - </div> - - - - - </div> - - - <div id="porter-stem-method" class="method-detail "> - <a name="method-i-porter2_stem"></a> - - <div class="method-heading"> - - <span class="method-name">porter2_stem</span><span - class="method-args">(gb_english = false)</span> - <span class="method-click-advice">click to toggle source</span> - - </div> - - <div class="method-description"> - - - - - - <div class="method-source-code" - id="porter-stem-source"> -<pre> - <span class="ruby-comment cmt"># File lib/porter2.rb, line 316</span> -316: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_stem</span>(<span class="ruby-identifier">gb_english</span> = <span class="ruby-keyword kw">false</span>) -317: <span class="ruby-identifier">preword</span> = <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_tidy</span> -318: <span class="ruby-keyword kw">return</span> <span class="ruby-identifier">preword</span> <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">preword</span>.<span class="ruby-identifier">length</span> <span class="ruby-operator"><=</span> <span class="ruby-value">2</span> -319: -320: <span class="ruby-identifier">word</span> = <span class="ruby-identifier">preword</span>.<span class="ruby-identifier">porter2_preprocess</span> -321: -322: <span class="ruby-keyword kw">if</span> <span class="ruby-constant">SPECIAL_CASES</span>.<span class="ruby-identifier">has_key?</span> <span class="ruby-identifier">word</span> -323: <span class="ruby-constant">SPECIAL_CASES</span>[<span class="ruby-identifier">word</span>] -324: <span class="ruby-keyword kw">else</span> -325: <span class="ruby-identifier">w1a</span> = <span class="ruby-identifier">word</span>.<span class="ruby-identifier">step_0</span>.<span class="ruby-identifier">step_1a</span> -326: <span class="ruby-keyword kw">if</span> <span class="ruby-constant">STEP_1A_SPECIAL_CASES</span>.<span class="ruby-identifier">include?</span> <span class="ruby-identifier">w1a</span> -327: <span class="ruby-identifier">w1a</span> -328: <span class="ruby-keyword kw">else</span> -329: <span class="ruby-identifier">w1a</span>.<span class="ruby-identifier">step_1b</span>(<span class="ruby-identifier">gb_english</span>).<span class="ruby-identifier">step_1c</span>.<span class="ruby-identifier">step_2</span>(<span class="ruby-identifier">gb_english</span>).<span class="ruby-identifier">step_3</span>(<span class="ruby-identifier">gb_english</span>).<span class="ruby-identifier">step_4</span>(<span class="ruby-identifier">gb_english</span>).<span class="ruby-identifier">step_5</span>.<span class="ruby-identifier">porter2_postprocess</span> -330: <span class="ruby-keyword kw">end</span> -331: <span class="ruby-keyword kw">end</span> -332: <span class="ruby-keyword kw">end</span></pre> - </div> - - </div> - - - <div class="aliases"> - Also aliased as: <a href="Stemmable.html#method-i-stem">stem</a> - </div> - - - - </div> - - - <div id="porter-stem-verbose-method" class="method-detail "> - <a name="method-i-porter2_stem_verbose"></a> - - <div class="method-heading"> - - <span class="method-name">porter2_stem_verbose</span><span - class="method-args">(gb_english = false)</span> - <span class="method-click-advice">click to toggle source</span> - - </div> - - <div class="method-description"> - - - - - - <div class="method-source-code" - id="porter-stem-verbose-source"> -<pre> - <span class="ruby-comment cmt"># File lib/porter2.rb, line 334</span> -334: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_stem_verbose</span>(<span class="ruby-identifier">gb_english</span> = <span class="ruby-keyword kw">false</span>) -335: <span class="ruby-identifier">preword</span> = <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_tidy</span> -336: <span class="ruby-identifier">puts</span> <span class="ruby-node">"Preword: #{preword}"</span> -337: <span class="ruby-keyword kw">return</span> <span class="ruby-identifier">preword</span> <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">preword</span>.<span class="ruby-identifier">length</span> <span class="ruby-operator"><=</span> <span class="ruby-value">2</span> -338: -339: <span class="ruby-identifier">word</span> = <span class="ruby-identifier">preword</span>.<span class="ruby-identifier">porter2_preprocess</span> -340: <span class="ruby-identifier">puts</span> <span class="ruby-node">"Preprocessed: #{word}"</span> -341: -342: <span class="ruby-keyword kw">if</span> <span class="ruby-constant">SPECIAL_CASES</span>.<span class="ruby-identifier">has_key?</span> <span class="ruby-identifier">word</span> -343: <span class="ruby-identifier">puts</span> <span class="ruby-node">"Returning #{word} as special case #{SPECIAL_CASES[word]}"</span> -344: <span class="ruby-constant">SPECIAL_CASES</span>[<span class="ruby-identifier">word</span>] -345: <span class="ruby-keyword kw">else</span> -346: <span class="ruby-identifier">r1</span> = <span class="ruby-identifier">word</span>.<span class="ruby-identifier">porter2_r1</span> -347: <span class="ruby-identifier">r2</span> = <span class="ruby-identifier">word</span>.<span class="ruby-identifier">porter2_r2</span> -348: <span class="ruby-identifier">puts</span> <span class="ruby-node">"R1 = #{r1}, R2 = #{r2}"</span> -349: -350: <span class="ruby-identifier">w0</span> = <span class="ruby-identifier">word</span>.<span class="ruby-identifier">step_0</span> ; <span class="ruby-identifier">puts</span> <span class="ruby-node">"After step 0: #{w0} (R1 = #{w0.porter2_r1}, R2 = #{w0.porter2_r2})"</span> -351: <span class="ruby-identifier">w1a</span> = <span class="ruby-identifier">w0</span>.<span class="ruby-identifier">step_1a</span> ; <span class="ruby-identifier">puts</span> <span class="ruby-node">"After step 1a: #{w1a} (R1 = #{w1a.porter2_r1}, R2 = #{w1a.porter2_r2})"</span> -352: -353: <span class="ruby-keyword kw">if</span> <span class="ruby-constant">STEP_1A_SPECIAL_CASES</span>.<span class="ruby-identifier">include?</span> <span class="ruby-identifier">w1a</span> -354: <span class="ruby-identifier">puts</span> <span class="ruby-node">"Returning #{w1a} as 1a special case"</span> -355: <span class="ruby-identifier">w1a</span> -356: <span class="ruby-keyword kw">else</span> -357: <span class="ruby-identifier">w1b</span> = <span class="ruby-identifier">w1a</span>.<span class="ruby-identifier">step_1b</span>(<span class="ruby-identifier">gb_english</span>) ; <span class="ruby-identifier">puts</span> <span class="ruby-node">"After step 1b: #{w1b} (R1 = #{w1b.porter2_r1}, R2 = #{w1b.porter2_r2})"</span> -358: <span class="ruby-identifier">w1c</span> = <span class="ruby-identifier">w1b</span>.<span class="ruby-identifier">step_1c</span> ; <span class="ruby-identifier">puts</span> <span class="ruby-node">"After step 1c: #{w1c} (R1 = #{w1c.porter2_r1}, R2 = #{w1c.porter2_r2})"</span> -359: <span class="ruby-identifier">w2</span> = <span class="ruby-identifier">w1c</span>.<span class="ruby-identifier">step_2</span>(<span class="ruby-identifier">gb_english</span>) ; <span class="ruby-identifier">puts</span> <span class="ruby-node">"After step 2: #{w2} (R1 = #{w2.porter2_r1}, R2 = #{w2.porter2_r2})"</span> -360: <span class="ruby-identifier">w3</span> = <span class="ruby-identifier">w2</span>.<span class="ruby-identifier">step_3</span>(<span class="ruby-identifier">gb_english</span>) ; <span class="ruby-identifier">puts</span> <span class="ruby-node">"After step 3: #{w3} (R1 = #{w3.porter2_r1}, R2 = #{w3.porter2_r2})"</span> -361: <span class="ruby-identifier">w4</span> = <span class="ruby-identifier">w3</span>.<span class="ruby-identifier">step_4</span>(<span class="ruby-identifier">gb_english</span>) ; <span class="ruby-identifier">puts</span> <span class="ruby-node">"After step 4: #{w4} (R1 = #{w4.porter2_r1}, R2 = #{w4.porter2_r2})"</span> -362: <span class="ruby-identifier">w5</span> = <span class="ruby-identifier">w4</span>.<span class="ruby-identifier">step_5</span> ; <span class="ruby-identifier">puts</span> <span class="ruby-node">"After step 5: #{w5}"</span> -363: <span class="ruby-identifier">wpost</span> = <span class="ruby-identifier">w5</span>.<span class="ruby-identifier">porter2_postprocess</span> ; <span class="ruby-identifier">puts</span> <span class="ruby-node">"After postprocess: #{wpost}"</span> -364: <span class="ruby-identifier">wpost</span> -365: <span class="ruby-keyword kw">end</span> -366: <span class="ruby-keyword kw">end</span> -367: <span class="ruby-keyword kw">end</span></pre> - </div> - - </div> - - - - - </div> - - - <div id="porter-tidy-method" class="method-detail "> - <a name="method-i-porter2_tidy"></a> - - <div class="method-heading"> - - <span class="method-name">porter2_tidy</span><span - class="method-args">()</span> - <span class="method-click-advice">click to toggle source</span> - - </div> - - <div class="method-description"> - - <p> -Tidy up the word before we get down to the algorithm -</p> - - - - <div class="method-source-code" - id="porter-tidy-source"> -<pre> - <span class="ruby-comment cmt"># File lib/porter2.rb, line 112</span> -112: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_tidy</span> -113: <span class="ruby-identifier">preword</span> = <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">to_s</span>.<span class="ruby-identifier">strip</span>.<span class="ruby-identifier">downcase</span> -114: -115: <span class="ruby-comment cmt"># map apostrophe-like characters to apostrophes </span> -116: <span class="ruby-identifier">preword</span>.<span class="ruby-identifier">gsub!</span>(<span class="ruby-regexp re">/â/</span>, <span class="ruby-value str">"'"</span>) -117: <span class="ruby-identifier">preword</span>.<span class="ruby-identifier">gsub!</span>(<span class="ruby-regexp re">/â/</span>, <span class="ruby-value str">"'"</span>) -118: -119: <span class="ruby-identifier">preword</span> -120: <span class="ruby-keyword kw">end</span></pre> - </div> - - </div> - - - - - </div> - - - <div id="stem-method" class="method-detail method-alias"> - <a name="method-i-stem"></a> - - <div class="method-heading"> - - <span class="method-name">stem</span><span - class="method-args">(gb_english = false)</span> - <span class="method-click-advice">click to toggle source</span> - - </div> - - <div class="method-description"> - - - - - - </div> - - - - - <div class="aliases"> - Alias for: <a href="Stemmable.html#method-i-porter2_stem">porter2_stem</a> - </div> - - </div> - - - <div id="step--method" class="method-detail "> - <a name="method-i-step_0"></a> - - <div class="method-heading"> - - <span class="method-name">step_0</span><span - class="method-args">()</span> - <span class="method-click-advice">click to toggle source</span> - - </div> - - <div class="method-description"> - - <p> -Search for the longest among the suffixes, -</p> -<ul> -<li><p> -’ -</p> -</li> -<li><p> -’s -</p> -</li> -<li><p> -’s’ -</p> -</li> -</ul> -<p> -and remove if found. -</p> - - - - <div class="method-source-code" - id="step--source"> -<pre> - <span class="ruby-comment cmt"># File lib/porter2.rb, line 169</span> -169: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">step_0</span> -170: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub!</span>(<span class="ruby-regexp re">/(.)('s'|'s|')$/</span>, <span class="ruby-value str">'\1'</span>) <span class="ruby-operator">||</span> <span class="ruby-keyword kw">self</span> -171: <span class="ruby-keyword kw">end</span></pre> - </div> - - </div> - - - - - </div> - - - <div id="step-a-method" class="method-detail "> - <a name="method-i-step_1a"></a> - - <div class="method-heading"> - - <span class="method-name">step_1a</span><span - class="method-args">()</span> - <span class="method-click-advice">click to toggle source</span> - - </div> - - <div class="method-description"> - - <p> -Remove plural suffixes -</p> - - - - <div class="method-source-code" - id="step-a-source"> -<pre> - <span class="ruby-comment cmt"># File lib/porter2.rb, line 174</span> -174: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">step_1a</span> -175: <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/sses$/</span> -176: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/sses$/</span>, <span class="ruby-value str">'ss'</span>) -177: <span class="ruby-keyword kw">elsif</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/..(ied|ies)$/</span> -178: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/(ied|ies)$/</span>, <span class="ruby-value str">'i'</span>) -179: <span class="ruby-keyword kw">elsif</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/(ied|ies)$/</span> -180: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/(ied|ies)$/</span>, <span class="ruby-value str">'ie'</span>) -181: <span class="ruby-keyword kw">elsif</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/(us|ss)$/</span> -182: <span class="ruby-keyword kw">self</span> -183: <span class="ruby-keyword kw">elsif</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/s$/</span> -184: <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-node">/(#{V}.+)s$/</span> -185: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/s$/</span>, <span class="ruby-value str">''</span>) -186: <span class="ruby-keyword kw">else</span> -187: <span class="ruby-keyword kw">self</span> -188: <span class="ruby-keyword kw">end</span> -189: <span class="ruby-keyword kw">else</span> -190: <span class="ruby-keyword kw">self</span> -191: <span class="ruby-keyword kw">end</span> -192: <span class="ruby-keyword kw">end</span></pre> - </div> - - </div> - - - - - </div> - - - <div id="step-b-method" class="method-detail "> - <a name="method-i-step_1b"></a> - - <div class="method-heading"> - - <span class="method-name">step_1b</span><span - class="method-args">(gb_english = false)</span> - <span class="method-click-advice">click to toggle source</span> - - </div> - - <div class="method-description"> - - - - - - <div class="method-source-code" - id="step-b-source"> -<pre> - <span class="ruby-comment cmt"># File lib/porter2.rb, line 194</span> -194: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">step_1b</span>(<span class="ruby-identifier">gb_english</span> = <span class="ruby-keyword kw">false</span>) -195: <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/(eed|eedly)$/</span> -196: <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_r1</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/(eed|eedly)$/</span> -197: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/(eed|eedly)$/</span>, <span class="ruby-value str">'ee'</span>) -198: <span class="ruby-keyword kw">else</span> -199: <span class="ruby-keyword kw">self</span> -200: <span class="ruby-keyword kw">end</span> -201: <span class="ruby-keyword kw">else</span> -202: <span class="ruby-identifier">w</span> = <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">dup</span> -203: <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">w</span> <span class="ruby-operator">=~</span> <span class="ruby-node">/#{V}.*(ed|edly|ing|ingly)$/</span> -204: <span class="ruby-identifier">w</span>.<span class="ruby-identifier">sub!</span>(<span class="ruby-regexp re">/(ed|edly|ing|ingly)$/</span>, <span class="ruby-value str">''</span>) -205: <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">w</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/(at|lb|iz)$/</span> -206: <span class="ruby-identifier">w</span> <span class="ruby-operator">+=</span> <span class="ruby-value str">'e'</span> -207: <span class="ruby-keyword kw">elsif</span> <span class="ruby-identifier">w</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/is$/</span> <span class="ruby-keyword kw">and</span> <span class="ruby-identifier">gb_english</span> -208: <span class="ruby-identifier">w</span> <span class="ruby-operator">+=</span> <span class="ruby-value str">'e'</span> -209: <span class="ruby-keyword kw">elsif</span> <span class="ruby-identifier">w</span> <span class="ruby-operator">=~</span> <span class="ruby-node">/#{Double}$/</span> -210: <span class="ruby-identifier">w</span>.<span class="ruby-identifier">chop!</span> -211: <span class="ruby-keyword kw">elsif</span> <span class="ruby-identifier">w</span>.<span class="ruby-identifier">porter2_is_short_word?</span> -212: <span class="ruby-identifier">w</span> <span class="ruby-operator">+=</span> <span class="ruby-value str">'e'</span> -213: <span class="ruby-keyword kw">end</span> -214: <span class="ruby-keyword kw">end</span> -215: <span class="ruby-identifier">w</span> -216: <span class="ruby-keyword kw">end</span> -217: <span class="ruby-keyword kw">end</span></pre> - </div> - - </div> - - - - - </div> - - - <div id="step-c-method" class="method-detail "> - <a name="method-i-step_1c"></a> - - <div class="method-heading"> - - <span class="method-name">step_1c</span><span - class="method-args">()</span> - <span class="method-click-advice">click to toggle source</span> - - </div> - - <div class="method-description"> - - - - - - <div class="method-source-code" - id="step-c-source"> -<pre> - <span class="ruby-comment cmt"># File lib/porter2.rb, line 220</span> -220: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">step_1c</span> -221: <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-node">/.+#{C}(y|Y)$/</span> -222: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/(y|Y)$/</span>, <span class="ruby-value str">'i'</span>) -223: <span class="ruby-keyword kw">else</span> -224: <span class="ruby-keyword kw">self</span> -225: <span class="ruby-keyword kw">end</span> -226: <span class="ruby-keyword kw">end</span></pre> - </div> - - </div> - - - - - </div> - - - <div id="step--method" class="method-detail "> - <a name="method-i-step_2"></a> - - <div class="method-heading"> - - <span class="method-name">step_2</span><span - class="method-args">(gb_english = false)</span> - <span class="method-click-advice">click to toggle source</span> - - </div> - - <div class="method-description"> - - - - - - <div class="method-source-code" - id="step--source"> -<pre> - <span class="ruby-comment cmt"># File lib/porter2.rb, line 229</span> -229: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">step_2</span>(<span class="ruby-identifier">gb_english</span> = <span class="ruby-keyword kw">false</span>) -230: <span class="ruby-identifier">r1</span> = <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_r1</span> -231: <span class="ruby-identifier">s2m</span> = <span class="ruby-constant">STEP_2_MAPS</span>.<span class="ruby-identifier">dup</span> -232: <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">gb_english</span> -233: <span class="ruby-identifier">s2m</span>[<span class="ruby-value str">"iser"</span>] = <span class="ruby-value str">"ise"</span> -234: <span class="ruby-identifier">s2m</span>[<span class="ruby-value str">"isation"</span>] = <span class="ruby-value str">"ise"</span> -235: <span class="ruby-keyword kw">end</span> -236: <span class="ruby-identifier">step_2_re</span> = <span class="ruby-constant">Regexp</span>.<span class="ruby-identifier">union</span>(<span class="ruby-identifier">s2m</span>.<span class="ruby-identifier">keys</span>.<span class="ruby-identifier">map</span> {<span class="ruby-operator">|</span><span class="ruby-identifier">r</span><span class="ruby-operator">|</span> <span class="ruby-constant">Regexp</span>.<span class="ruby-identifier">new</span>(<span class="ruby-identifier">r</span> <span class="ruby-operator">+</span> <span class="ruby-value str">"$"</span>)}) -237: <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-identifier">step_2_re</span> -238: <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">r1</span> <span class="ruby-operator">=~</span> <span class="ruby-node">/#{$&}$/</span> -239: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-node">/#{$&}$/</span>, <span class="ruby-identifier">s2m</span>[<span class="ruby-node">$&</span>]) -240: <span class="ruby-keyword kw">else</span> -241: <span class="ruby-keyword kw">self</span> -242: <span class="ruby-keyword kw">end</span> -243: <span class="ruby-keyword kw">elsif</span> <span class="ruby-identifier">r1</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/li$/</span> <span class="ruby-keyword kw">and</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-node">/(#{Valid_LI})li$/</span> -244: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/li$/</span>, <span class="ruby-value str">''</span>) -245: <span class="ruby-keyword kw">elsif</span> <span class="ruby-identifier">r1</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/ogi$/</span> <span class="ruby-keyword kw">and</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/logi$/</span> -246: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/ogi$/</span>, <span class="ruby-value str">'og'</span>) -247: <span class="ruby-keyword kw">else</span> -248: <span class="ruby-keyword kw">self</span> -249: <span class="ruby-keyword kw">end</span> -250: <span class="ruby-keyword kw">end</span></pre> - </div> - - </div> - - - - - </div> - - - <div id="step--method" class="method-detail "> - <a name="method-i-step_3"></a> - - <div class="method-heading"> - - <span class="method-name">step_3</span><span - class="method-args">(gb_english = false)</span> - <span class="method-click-advice">click to toggle source</span> - - </div> - - <div class="method-description"> - - - - - - <div class="method-source-code" - id="step--source"> -<pre> - <span class="ruby-comment cmt"># File lib/porter2.rb, line 253</span> -253: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">step_3</span>(<span class="ruby-identifier">gb_english</span> = <span class="ruby-keyword kw">false</span>) -254: <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/ative$/</span> <span class="ruby-keyword kw">and</span> <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_r2</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/ative$/</span> -255: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/ative$/</span>, <span class="ruby-value str">''</span>) -256: <span class="ruby-keyword kw">else</span> -257: <span class="ruby-identifier">s3m</span> = <span class="ruby-constant">STEP_3_MAPS</span>.<span class="ruby-identifier">dup</span> -258: <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">gb_english</span> -259: <span class="ruby-identifier">s3m</span>[<span class="ruby-value str">"alise"</span>] = <span class="ruby-value str">"al"</span> -260: <span class="ruby-keyword kw">end</span> -261: <span class="ruby-identifier">step_3_re</span> = <span class="ruby-constant">Regexp</span>.<span class="ruby-identifier">union</span>(<span class="ruby-identifier">s3m</span>.<span class="ruby-identifier">keys</span>.<span class="ruby-identifier">map</span> {<span class="ruby-operator">|</span><span class="ruby-identifier">r</span><span class="ruby-operator">|</span> <span class="ruby-constant">Regexp</span>.<span class="ruby-identifier">new</span>(<span class="ruby-identifier">r</span> <span class="ruby-operator">+</span> <span class="ruby-value str">"$"</span>)}) -262: <span class="ruby-identifier">r1</span> = <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_r1</span> -263: <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-identifier">step_3_re</span> <span class="ruby-keyword kw">and</span> <span class="ruby-identifier">r1</span> <span class="ruby-operator">=~</span> <span class="ruby-node">/#{$&}$/</span> -264: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-node">/#{$&}$/</span>, <span class="ruby-identifier">s3m</span>[<span class="ruby-node">$&</span>]) -265: <span class="ruby-keyword kw">else</span> -266: <span class="ruby-keyword kw">self</span> -267: <span class="ruby-keyword kw">end</span> -268: <span class="ruby-keyword kw">end</span> -269: <span class="ruby-keyword kw">end</span></pre> - </div> - - </div> - - - - - </div> - - - <div id="step--method" class="method-detail "> - <a name="method-i-step_4"></a> - - <div class="method-heading"> - - <span class="method-name">step_4</span><span - class="method-args">(gb_english = false)</span> - <span class="method-click-advice">click to toggle source</span> - - </div> - - <div class="method-description"> - - - - - - <div class="method-source-code" - id="step--source"> -<pre> - <span class="ruby-comment cmt"># File lib/porter2.rb, line 272</span> -272: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">step_4</span>(<span class="ruby-identifier">gb_english</span> = <span class="ruby-keyword kw">false</span>) -273: <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_r2</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/ion$/</span> <span class="ruby-keyword kw">and</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/(s|t)ion$/</span> -274: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/ion$/</span>, <span class="ruby-value str">''</span>) -275: <span class="ruby-keyword kw">else</span> -276: <span class="ruby-identifier">s4m</span> = <span class="ruby-constant">STEP_4_MAPS</span>.<span class="ruby-identifier">dup</span> -277: <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">gb_english</span> -278: <span class="ruby-identifier">s4m</span>[<span class="ruby-value str">"ise"</span>] = <span class="ruby-value str">""</span> -279: <span class="ruby-keyword kw">end</span> -280: <span class="ruby-identifier">step_4_re</span> = <span class="ruby-constant">Regexp</span>.<span class="ruby-identifier">union</span>(<span class="ruby-identifier">s4m</span>.<span class="ruby-identifier">keys</span>.<span class="ruby-identifier">map</span> {<span class="ruby-operator">|</span><span class="ruby-identifier">r</span><span class="ruby-operator">|</span> <span class="ruby-constant">Regexp</span>.<span class="ruby-identifier">new</span>(<span class="ruby-identifier">r</span> <span class="ruby-operator">+</span> <span class="ruby-value str">"$"</span>)}) -281: <span class="ruby-identifier">r2</span> = <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_r2</span> -282: <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-identifier">step_4_re</span> -283: <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">r2</span> <span class="ruby-operator">=~</span> <span class="ruby-node">/#{$&}/</span> -284: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-node">/#{$&}$/</span>, <span class="ruby-identifier">s4m</span>[<span class="ruby-node">$&</span>]) -285: <span class="ruby-keyword kw">else</span> -286: <span class="ruby-keyword kw">self</span> -287: <span class="ruby-keyword kw">end</span> -288: <span class="ruby-keyword kw">else</span> -289: <span class="ruby-keyword kw">self</span> -290: <span class="ruby-keyword kw">end</span> -291: <span class="ruby-keyword kw">end</span> -292: <span class="ruby-keyword kw">end</span></pre> - </div> - - </div> - - - - - </div> - - - <div id="step--method" class="method-detail "> - <a name="method-i-step_5"></a> - - <div class="method-heading"> - - <span class="method-name">step_5</span><span - class="method-args">()</span> - <span class="method-click-advice">click to toggle source</span> - - </div> - - <div class="method-description"> - - - - - - <div class="method-source-code" - id="step--source"> -<pre> - <span class="ruby-comment cmt"># File lib/porter2.rb, line 295</span> -295: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">step_5</span> -296: <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/ll$/</span> <span class="ruby-keyword kw">and</span> <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_r2</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/l$/</span> -297: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/ll$/</span>, <span class="ruby-value str">'l'</span>) -298: <span class="ruby-keyword kw">elsif</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/e$/</span> <span class="ruby-keyword kw">and</span> <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_r2</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/e$/</span> -299: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/e$/</span>, <span class="ruby-value str">''</span>) -300: <span class="ruby-keyword kw">else</span> -301: <span class="ruby-identifier">r1</span> = <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_r1</span> -302: <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/e$/</span> <span class="ruby-keyword kw">and</span> <span class="ruby-identifier">r1</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/e$/</span> <span class="ruby-keyword kw">and</span> <span class="ruby-keyword kw">not</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-node">/#{SHORT_SYLLABLE}e$/</span> -303: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/e$/</span>, <span class="ruby-value str">''</span>) -304: <span class="ruby-keyword kw">else</span> -305: <span class="ruby-keyword kw">self</span> -306: <span class="ruby-keyword kw">end</span> -307: <span class="ruby-keyword kw">end</span> -308: <span class="ruby-keyword kw">end</span></pre> - </div> - - </div> - - - - - </div> - - - </div> - - - </div> - - - <div id="rdoc-debugging-section-dump" class="debugging-section"> - - <p>Disabled; run with --debug to generate this.</p> - - </div> - - <div id="validator-badges"> - <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p> - <p><small>Generated with the <a href="http://deveiate.org/projects/Darkfish-Rdoc/">Darkfish - Rdoc Generator</a> 1.1.6</small>.</p> - </div> - -</body> -</html> - diff --git a/doc/String.html b/doc/String.html index 9a5f5bb..e0921af 100644 --- a/doc/String.html +++ b/doc/String.html @@ -38,9 +38,6 @@ <div class="section-body"> <ul> - <li><a href="./test/tc_porter2_full_rb.html?TB_iframe=true&height=550&width=785" - class="thickbox" title="test/tc_porter2_full.rb">test/tc_porter2_full.rb</a></li> - <li><a href="./lib/porter2_rb.html?TB_iframe=true&height=550&width=785" class="thickbox" title="lib/porter2.rb">lib/porter2.rb</a></li> @@ -68,20 +65,52 @@ <!-- Method Quickref --> - - <!-- Included Modules --> - - <div id="includes-section" class="section"> - <h3 class="section-header">Included Modules</h3> + <div id="method-list-section" class="section"> + <h3 class="section-header">Methods</h3> <ul class="link-list"> - - - <li><a class="include" href="Stemmable.html">Stemmable</a></li> - - + + <li><a href="#method-i-porter2_ends_with_short_syllable%3F">#porter2_ends_with_short_syllable?</a></li> + + <li><a href="#method-i-porter2_is_short_word%3F">#porter2_is_short_word?</a></li> + + <li><a href="#method-i-porter2_postprocess">#porter2_postprocess</a></li> + + <li><a href="#method-i-porter2_preprocess">#porter2_preprocess</a></li> + + <li><a href="#method-i-porter2_r1">#porter2_r1</a></li> + + <li><a href="#method-i-porter2_r2">#porter2_r2</a></li> + + <li><a href="#method-i-porter2_stem">#porter2_stem</a></li> + + <li><a href="#method-i-porter2_stem_verbose">#porter2_stem_verbose</a></li> + + <li><a href="#method-i-porter2_step0">#porter2_step0</a></li> + + <li><a href="#method-i-porter2_step1a">#porter2_step1a</a></li> + + <li><a href="#method-i-porter2_step1b">#porter2_step1b</a></li> + + <li><a href="#method-i-porter2_step1c">#porter2_step1c</a></li> + + <li><a href="#method-i-porter2_step2">#porter2_step2</a></li> + + <li><a href="#method-i-porter2_step3">#porter2_step3</a></li> + + <li><a href="#method-i-porter2_step4">#porter2_step4</a></li> + + <li><a href="#method-i-porter2_step5">#porter2_step5</a></li> + + <li><a href="#method-i-porter2_tidy">#porter2_tidy</a></li> + + <li><a href="#method-i-stem">#stem</a></li> + </ul> </div> + + <!-- Included Modules --> + </div> <div id="project-metadata"> @@ -103,7 +132,7 @@ <ul class="link-list"> - <li><a href="./Stemmable.html">Stemmable</a></li> + <li><a href="./Porter2.html">Porter2</a></li> <li><a href="./String.html">String</a></li> @@ -121,8 +150,45 @@ <h1 class="class">String</h1> <div id="description"> - <p> -Add stem method to all Strings + <h2>The Porter 2 stemmer</h2> +<p> +This is the Porter 2 stemming algorithm, as described at <a +href="http://snowball.tartarus.org/algorithms/english/stemmer.html">snowball.tartarus.org/algorithms/english/stemmer.html</a> +The original paper is: +</p> +<p> +Porter, 1980, “An algorithm for suffix stripping”, +<em>Program</em>, Vol. 14, no. 3, pp 130-137 +</p> +<p> +Constants for the stemmer are in the <a href="Porter2.html">Porter2</a> +module. +</p> +<p> +Procedures that implement the stemmer are added to the <a +href="String.html">String</a> class. +</p> +<p> +The stemmer algorithm is implemented in the <a +href="String.html#method-i-porter2_stem">porter2_stem</a> procedure. +</p> +<h2>Internationalisation</h2> +<p> +There isn’t much, as this is a stemmer that only works for English. +</p> +<p> +The <tt>gb_english</tt> flag to the various procedures allows the stemmer +to treat the British English ’-ise’ the same as the American +English ’-ize’. +</p> +<h2>Longest suffixes</h2> +<p> +Several places in the algorithm require matching the longest suffix of a +word. The regexp engine in Ruby 1.9 seems to handle alterntives in regexps +by finding the alternative that matches at the first position in the +string. As we’re only talking about suffixes, that first match is +also the longest suffix. If the regexp engine changes, this behaviour may +change and break the stemmer. </p> </div> @@ -135,6 +201,954 @@ Add stem method to all Strings <!-- Methods --> + <div id="public-instance-method-details" class="method-section section"> + <h3 class="section-header">Public Instance Methods</h3> + + + <div id="porter-ends-with-short-syllable--method" class="method-detail "> + <a name="method-i-porter2_ends_with_short_syllable%3F"></a> + + <div class="method-heading"> + + <span class="method-name">porter2_ends_with_short_syllable?</span><span + class="method-args">()</span> + <span class="method-click-advice">click to toggle source</span> + + </div> + + <div class="method-description"> + + <p> +Returns true if the word ends with a short syllable +</p> + + + + <div class="method-source-code" + id="porter-ends-with-short-syllable--source"> +<pre> + <span class="ruby-comment cmt"># File lib/porter2.rb, line 87</span> +87: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_ends_with_short_syllable?</span> +88: <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-node">/#{Porter2::SHORT_SYLLABLE}$/</span> <span class="ruby-operator">?</span> <span class="ruby-keyword kw">true</span> <span class="ruby-operator">:</span> <span class="ruby-keyword kw">false</span> +89: <span class="ruby-keyword kw">end</span></pre> + </div> + + </div> + + + + + </div> + + + <div id="porter-is-short-word--method" class="method-detail "> + <a name="method-i-porter2_is_short_word%3F"></a> + + <div class="method-heading"> + + <span class="method-name">porter2_is_short_word?</span><span + class="method-args">()</span> + <span class="method-click-advice">click to toggle source</span> + + </div> + + <div class="method-description"> + + <p> +A word is short if it ends in a short syllable, and R1 is null +</p> + + + + <div class="method-source-code" + id="porter-is-short-word--source"> +<pre> + <span class="ruby-comment cmt"># File lib/porter2.rb, line 93</span> +93: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_is_short_word?</span> +94: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_ends_with_short_syllable?</span> <span class="ruby-keyword kw">and</span> <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_r1</span>.<span class="ruby-identifier">empty?</span> +95: <span class="ruby-keyword kw">end</span></pre> + </div> + + </div> + + + + + </div> + + + <div id="porter-postprocess-method" class="method-detail "> + <a name="method-i-porter2_postprocess"></a> + + <div class="method-heading"> + + <span class="method-name">porter2_postprocess</span><span + class="method-args">()</span> + <span class="method-click-advice">click to toggle source</span> + + </div> + + <div class="method-description"> + + <p> +Turn all Y letters into y +</p> + + + + <div class="method-source-code" + id="porter-postprocess-source"> +<pre> + <span class="ruby-comment cmt"># File lib/porter2.rb, line 289</span> +289: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_postprocess</span> +290: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">gsub</span>(<span class="ruby-regexp re">/Y/</span>, <span class="ruby-value str">'y'</span>) +291: <span class="ruby-keyword kw">end</span></pre> + </div> + + </div> + + + + + </div> + + + <div id="porter-preprocess-method" class="method-detail "> + <a name="method-i-porter2_preprocess"></a> + + <div class="method-heading"> + + <span class="method-name">porter2_preprocess</span><span + class="method-args">()</span> + <span class="method-click-advice">click to toggle source</span> + + </div> + + <div class="method-description"> + + <p> +Preprocess the word. Remove any initial ’, if present. Then, set +initial y, or y after a vowel, to Y +</p> +<p> +(The comment to ‘establish the regions R1 and R2’ in the +original description is an implementation optimisation that identifies +where the regions start. As no modifications are made to the word that +affect those positions, you may want to cache them now. This implementation +doesn’t do that.) +</p> + + + + <div class="method-source-code" + id="porter-preprocess-source"> +<pre> + <span class="ruby-comment cmt"># File lib/porter2.rb, line 53</span> +53: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_preprocess</span> +54: <span class="ruby-identifier">w</span> = <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">dup</span> +55: +56: <span class="ruby-comment cmt"># remove any initial apostrophe</span> +57: <span class="ruby-identifier">w</span>.<span class="ruby-identifier">gsub!</span>(<span class="ruby-regexp re">/^'*(.)/</span>, <span class="ruby-value str">'\1'</span>) +58: +59: <span class="ruby-comment cmt"># set initial y, or y after a vowel, to Y</span> +60: <span class="ruby-identifier">w</span>.<span class="ruby-identifier">gsub!</span>(<span class="ruby-regexp re">/^y/</span>, <span class="ruby-value str">"Y"</span>) +61: <span class="ruby-identifier">w</span>.<span class="ruby-identifier">gsub!</span>(<span class="ruby-node">/(#{Porter2::V})y/</span>, <span class="ruby-value str">'\1Y'</span>) +62: +63: <span class="ruby-identifier">w</span> +64: <span class="ruby-keyword kw">end</span></pre> + </div> + + </div> + + + + + </div> + + + <div id="porter-r--method" class="method-detail "> + <a name="method-i-porter2_r1"></a> + + <div class="method-heading"> + + <span class="method-name">porter2_r1</span><span + class="method-args">()</span> + <span class="method-click-advice">click to toggle source</span> + + </div> + + <div class="method-description"> + + <p> +R1 is the portion of the word after the first non-vowel after the first +vowel (with words beginning ‘gener-’, ‘commun-’, +and ‘arsen-’ treated as special cases +</p> + + + + <div class="method-source-code" + id="porter-r--source"> +<pre> + <span class="ruby-comment cmt"># File lib/porter2.rb, line 69</span> +69: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_r1</span> +70: <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/^(gener|commun|arsen)(?<r1>.*)/</span> +71: <span class="ruby-constant">Regexp</span>.<span class="ruby-identifier">last_match</span>(<span class="ruby-value">:r1</span>) +72: <span class="ruby-keyword kw">else</span> +73: <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-node">/#{Porter2::V}#{Porter2::C}(?<r1>.*)$/</span> +74: <span class="ruby-constant">Regexp</span>.<span class="ruby-identifier">last_match</span>(<span class="ruby-value">:r1</span>) <span class="ruby-operator">||</span> <span class="ruby-value str">""</span> +75: <span class="ruby-keyword kw">end</span> +76: <span class="ruby-keyword kw">end</span></pre> + </div> + + </div> + + + + + </div> + + + <div id="porter-r--method" class="method-detail "> + <a name="method-i-porter2_r2"></a> + + <div class="method-heading"> + + <span class="method-name">porter2_r2</span><span + class="method-args">()</span> + <span class="method-click-advice">click to toggle source</span> + + </div> + + <div class="method-description"> + + <p> +R2 is the portion of R1 (<a +href="String.html#method-i-porter2_r1">porter2_r1</a>) after the first +non-vowel after the first vowel +</p> + + + + <div class="method-source-code" + id="porter-r--source"> +<pre> + <span class="ruby-comment cmt"># File lib/porter2.rb, line 80</span> +80: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_r2</span> +81: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_r1</span> <span class="ruby-operator">=~</span> <span class="ruby-node">/#{Porter2::V}#{Porter2::C}(?<r2>.*)$/</span> +82: <span class="ruby-constant">Regexp</span>.<span class="ruby-identifier">last_match</span>(<span class="ruby-value">:r2</span>) <span class="ruby-operator">||</span> <span class="ruby-value str">""</span> +83: <span class="ruby-keyword kw">end</span></pre> + </div> + + </div> + + + + + </div> + + + <div id="porter-stem-method" class="method-detail "> + <a name="method-i-porter2_stem"></a> + + <div class="method-heading"> + + <span class="method-name">porter2_stem</span><span + class="method-args">(gb_english = false)</span> + <span class="method-click-advice">click to toggle source</span> + + </div> + + <div class="method-description"> + + <p> +Perform the stemming procedure. If <tt>gb_english</tt> is true, treat +’-ise’ and similar suffixes as ’-ize’ in American +English. +</p> + + + + <div class="method-source-code" + id="porter-stem-source"> +<pre> + <span class="ruby-comment cmt"># File lib/porter2.rb, line 297</span> +297: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_stem</span>(<span class="ruby-identifier">gb_english</span> = <span class="ruby-keyword kw">false</span>) +298: <span class="ruby-identifier">preword</span> = <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_tidy</span> +299: <span class="ruby-keyword kw">return</span> <span class="ruby-identifier">preword</span> <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">preword</span>.<span class="ruby-identifier">length</span> <span class="ruby-operator"><=</span> <span class="ruby-value">2</span> +300: +301: <span class="ruby-identifier">word</span> = <span class="ruby-identifier">preword</span>.<span class="ruby-identifier">porter2_preprocess</span> +302: +303: <span class="ruby-keyword kw">if</span> <span class="ruby-constant">Porter2</span><span class="ruby-operator">::</span><span class="ruby-constant">SPECIAL_CASES</span>.<span class="ruby-identifier">has_key?</span> <span class="ruby-identifier">word</span> +304: <span class="ruby-constant">Porter2</span><span class="ruby-operator">::</span><span class="ruby-constant">SPECIAL_CASES</span>[<span class="ruby-identifier">word</span>] +305: <span class="ruby-keyword kw">else</span> +306: <span class="ruby-identifier">w1a</span> = <span class="ruby-identifier">word</span>.<span class="ruby-identifier">porter2_step0</span>.<span class="ruby-identifier">porter2_step1a</span> +307: <span class="ruby-keyword kw">if</span> <span class="ruby-constant">Porter2</span><span class="ruby-operator">::</span><span class="ruby-constant">STEP_1A_SPECIAL_CASES</span>.<span class="ruby-identifier">include?</span> <span class="ruby-identifier">w1a</span> +308: <span class="ruby-identifier">w1a</span> +309: <span class="ruby-keyword kw">else</span> +310: <span class="ruby-identifier">w1a</span>.<span class="ruby-identifier">porter2_step1b</span>(<span class="ruby-identifier">gb_english</span>).<span class="ruby-identifier">porter2_step1c</span>.<span class="ruby-identifier">porter2_step2</span>(<span class="ruby-identifier">gb_english</span>).<span class="ruby-identifier">porter2_step3</span>(<span class="ruby-identifier">gb_english</span>).<span class="ruby-identifier">porter2_step4</span>(<span class="ruby-identifier">gb_english</span>).<span class="ruby-identifier">porter2_step5</span>.<span class="ruby-identifier">porter2_postprocess</span> +311: <span class="ruby-keyword kw">end</span> +312: <span class="ruby-keyword kw">end</span> +313: <span class="ruby-keyword kw">end</span></pre> + </div> + + </div> + + + <div class="aliases"> + Also aliased as: <a href="String.html#method-i-stem">stem</a> + </div> + + + + </div> + + + <div id="porter-stem-verbose-method" class="method-detail "> + <a name="method-i-porter2_stem_verbose"></a> + + <div class="method-heading"> + + <span class="method-name">porter2_stem_verbose</span><span + class="method-args">(gb_english = false)</span> + <span class="method-click-advice">click to toggle source</span> + + </div> + + <div class="method-description"> + + <p> +A verbose version of <a +href="String.html#method-i-porter2_stem">porter2_stem</a> that prints the +output of each stage to STDOUT +</p> + + + + <div class="method-source-code" + id="porter-stem-verbose-source"> +<pre> + <span class="ruby-comment cmt"># File lib/porter2.rb, line 316</span> +316: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_stem_verbose</span>(<span class="ruby-identifier">gb_english</span> = <span class="ruby-keyword kw">false</span>) +317: <span class="ruby-identifier">preword</span> = <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_tidy</span> +318: <span class="ruby-identifier">puts</span> <span class="ruby-node">"Preword: #{preword}"</span> +319: <span class="ruby-keyword kw">return</span> <span class="ruby-identifier">preword</span> <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">preword</span>.<span class="ruby-identifier">length</span> <span class="ruby-operator"><=</span> <span class="ruby-value">2</span> +320: +321: <span class="ruby-identifier">word</span> = <span class="ruby-identifier">preword</span>.<span class="ruby-identifier">porter2_preprocess</span> +322: <span class="ruby-identifier">puts</span> <span class="ruby-node">"Preprocessed: #{word}"</span> +323: +324: <span class="ruby-keyword kw">if</span> <span class="ruby-constant">Porter2</span><span class="ruby-operator">::</span><span class="ruby-constant">SPECIAL_CASES</span>.<span class="ruby-identifier">has_key?</span> <span class="ruby-identifier">word</span> +325: <span class="ruby-identifier">puts</span> <span class="ruby-node">"Returning #{word} as special case #{Porter2::SPECIAL_CASES[word]}"</span> +326: <span class="ruby-constant">Porter2</span><span class="ruby-operator">::</span><span class="ruby-constant">SPECIAL_CASES</span>[<span class="ruby-identifier">word</span>] +327: <span class="ruby-keyword kw">else</span> +328: <span class="ruby-identifier">r1</span> = <span class="ruby-identifier">word</span>.<span class="ruby-identifier">porter2_r1</span> +329: <span class="ruby-identifier">r2</span> = <span class="ruby-identifier">word</span>.<span class="ruby-identifier">porter2_r2</span> +330: <span class="ruby-identifier">puts</span> <span class="ruby-node">"R1 = #{r1}, R2 = #{r2}"</span> +331: +332: <span class="ruby-identifier">w0</span> = <span class="ruby-identifier">word</span>.<span class="ruby-identifier">porter2_step0</span> ; <span class="ruby-identifier">puts</span> <span class="ruby-node">"After step 0: #{w0} (R1 = #{w0.porter2_r1}, R2 = #{w0.porter2_r2})"</span> +333: <span class="ruby-identifier">w1a</span> = <span class="ruby-identifier">w0</span>.<span class="ruby-identifier">porter2_step1a</span> ; <span class="ruby-identifier">puts</span> <span class="ruby-node">"After step 1a: #{w1a} (R1 = #{w1a.porter2_r1}, R2 = #{w1a.porter2_r2})"</span> +334: +335: <span class="ruby-keyword kw">if</span> <span class="ruby-constant">Porter2</span><span class="ruby-operator">::</span><span class="ruby-constant">STEP_1A_SPECIAL_CASES</span>.<span class="ruby-identifier">include?</span> <span class="ruby-identifier">w1a</span> +336: <span class="ruby-identifier">puts</span> <span class="ruby-node">"Returning #{w1a} as 1a special case"</span> +337: <span class="ruby-identifier">w1a</span> +338: <span class="ruby-keyword kw">else</span> +339: <span class="ruby-identifier">w1b</span> = <span class="ruby-identifier">w1a</span>.<span class="ruby-identifier">porter2_step1b</span>(<span class="ruby-identifier">gb_english</span>) ; <span class="ruby-identifier">puts</span> <span class="ruby-node">"After step 1b: #{w1b} (R1 = #{w1b.porter2_r1}, R2 = #{w1b.porter2_r2})"</span> +340: <span class="ruby-identifier">w1c</span> = <span class="ruby-identifier">w1b</span>.<span class="ruby-identifier">porter2_step1c</span> ; <span class="ruby-identifier">puts</span> <span class="ruby-node">"After step 1c: #{w1c} (R1 = #{w1c.porter2_r1}, R2 = #{w1c.porter2_r2})"</span> +341: <span class="ruby-identifier">w2</span> = <span class="ruby-identifier">w1c</span>.<span class="ruby-identifier">porter2_step2</span>(<span class="ruby-identifier">gb_english</span>) ; <span class="ruby-identifier">puts</span> <span class="ruby-node">"After step 2: #{w2} (R1 = #{w2.porter2_r1}, R2 = #{w2.porter2_r2})"</span> +342: <span class="ruby-identifier">w3</span> = <span class="ruby-identifier">w2</span>.<span class="ruby-identifier">porter2_step3</span>(<span class="ruby-identifier">gb_english</span>) ; <span class="ruby-identifier">puts</span> <span class="ruby-node">"After step 3: #{w3} (R1 = #{w3.porter2_r1}, R2 = #{w3.porter2_r2})"</span> +343: <span class="ruby-identifier">w4</span> = <span class="ruby-identifier">w3</span>.<span class="ruby-identifier">porter2_step4</span>(<span class="ruby-identifier">gb_english</span>) ; <span class="ruby-identifier">puts</span> <span class="ruby-node">"After step 4: #{w4} (R1 = #{w4.porter2_r1}, R2 = #{w4.porter2_r2})"</span> +344: <span class="ruby-identifier">w5</span> = <span class="ruby-identifier">w4</span>.<span class="ruby-identifier">porter2_step5</span> ; <span class="ruby-identifier">puts</span> <span class="ruby-node">"After step 5: #{w5}"</span> +345: <span class="ruby-identifier">wpost</span> = <span class="ruby-identifier">w5</span>.<span class="ruby-identifier">porter2_postprocess</span> ; <span class="ruby-identifier">puts</span> <span class="ruby-node">"After postprocess: #{wpost}"</span> +346: <span class="ruby-identifier">wpost</span> +347: <span class="ruby-keyword kw">end</span> +348: <span class="ruby-keyword kw">end</span> +349: <span class="ruby-keyword kw">end</span></pre> + </div> + + </div> + + + + + </div> + + + <div id="porter-step--method" class="method-detail "> + <a name="method-i-porter2_step0"></a> + + <div class="method-heading"> + + <span class="method-name">porter2_step0</span><span + class="method-args">()</span> + <span class="method-click-advice">click to toggle source</span> + + </div> + + <div class="method-description"> + + <p> +Search for the longest among the suffixes, +</p> +<ul> +<li><p> +‘ +</p> +</li> +<li><p> +’s +</p> +</li> +<li><p> +’s’ +</p> +</li> +</ul> +<p> +and remove if found. +</p> + + + + <div class="method-source-code" + id="porter-step--source"> +<pre> + <span class="ruby-comment cmt"># File lib/porter2.rb, line 103</span> +103: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_step0</span> +104: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub!</span>(<span class="ruby-regexp re">/(.)('s'|'s|')$/</span>, <span class="ruby-value str">'\1'</span>) <span class="ruby-operator">||</span> <span class="ruby-keyword kw">self</span> +105: <span class="ruby-keyword kw">end</span></pre> + </div> + + </div> + + + + + </div> + + + <div id="porter-step-a-method" class="method-detail "> + <a name="method-i-porter2_step1a"></a> + + <div class="method-heading"> + + <span class="method-name">porter2_step1a</span><span + class="method-args">()</span> + <span class="method-click-advice">click to toggle source</span> + + </div> + + <div class="method-description"> + + <p> +Search for the longest among the following suffixes, and perform the action +indicated. +</p> +<table> +<tr><td valign="top">sses</td><td><p> +replace by ss +</p> +</td></tr> +<tr><td valign="top">ied, ies</td><td><p> +replace by i if preceded by more than one letter, otherwise by ie +</p> +</td></tr> +<tr><td valign="top">s</td><td><p> +delete if the preceding word part contains a vowel not immediately before +the s +</p> +</td></tr> +<tr><td valign="top">us, ss</td><td><p> +do nothing +</p> +</td></tr> +</table> + + + + <div class="method-source-code" + id="porter-step-a-source"> +<pre> + <span class="ruby-comment cmt"># File lib/porter2.rb, line 113</span> +113: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_step1a</span> +114: <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/sses$/</span> +115: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/sses$/</span>, <span class="ruby-value str">'ss'</span>) +116: <span class="ruby-keyword kw">elsif</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/..(ied|ies)$/</span> +117: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/(ied|ies)$/</span>, <span class="ruby-value str">'i'</span>) +118: <span class="ruby-keyword kw">elsif</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/(ied|ies)$/</span> +119: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/(ied|ies)$/</span>, <span class="ruby-value str">'ie'</span>) +120: <span class="ruby-keyword kw">elsif</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/(us|ss)$/</span> +121: <span class="ruby-keyword kw">self</span> +122: <span class="ruby-keyword kw">elsif</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/s$/</span> +123: <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-node">/(#{Porter2::V}.+)s$/</span> +124: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/s$/</span>, <span class="ruby-value str">''</span>) +125: <span class="ruby-keyword kw">else</span> +126: <span class="ruby-keyword kw">self</span> +127: <span class="ruby-keyword kw">end</span> +128: <span class="ruby-keyword kw">else</span> +129: <span class="ruby-keyword kw">self</span> +130: <span class="ruby-keyword kw">end</span> +131: <span class="ruby-keyword kw">end</span></pre> + </div> + + </div> + + + + + </div> + + + <div id="porter-step-b-method" class="method-detail "> + <a name="method-i-porter2_step1b"></a> + + <div class="method-heading"> + + <span class="method-name">porter2_step1b</span><span + class="method-args">(gb_english = false)</span> + <span class="method-click-advice">click to toggle source</span> + + </div> + + <div class="method-description"> + + <p> +Search for the longest among the following suffixes, and perform the action +indicated. +</p> +<table> +<tr><td valign="top">eed, eedly</td><td><p> +replace by ee if the suffix is also in R1 +</p> +</td></tr> +<tr><td valign="top">ed, edly, ing, ingly</td><td><p> +delete if the preceding word part contains a vowel and, after the +deletion: +</p> +<ul> +<li><p> +if the word ends at, bl or iz: add e, or +</p> +</li> +</ul> +<ul> +<li><p> +if the word ends with a double: remove the last letter, or +</p> +</li> +</ul> +<ul> +<li><p> +if the word is short: add e +</p> +</li> +</ul> +</td></tr> +</table> +<p> +(If gb_english is <tt>true</tt>, treat the ‘is’ suffix as +‘iz’ above.) +</p> + + + + <div class="method-source-code" + id="porter-step-b-source"> +<pre> + <span class="ruby-comment cmt"># File lib/porter2.rb, line 143</span> +143: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_step1b</span>(<span class="ruby-identifier">gb_english</span> = <span class="ruby-keyword kw">false</span>) +144: <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/(eed|eedly)$/</span> +145: <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_r1</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/(eed|eedly)$/</span> +146: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/(eed|eedly)$/</span>, <span class="ruby-value str">'ee'</span>) +147: <span class="ruby-keyword kw">else</span> +148: <span class="ruby-keyword kw">self</span> +149: <span class="ruby-keyword kw">end</span> +150: <span class="ruby-keyword kw">else</span> +151: <span class="ruby-identifier">w</span> = <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">dup</span> +152: <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">w</span> <span class="ruby-operator">=~</span> <span class="ruby-node">/#{Porter2::V}.*(ed|edly|ing|ingly)$/</span> +153: <span class="ruby-identifier">w</span>.<span class="ruby-identifier">sub!</span>(<span class="ruby-regexp re">/(ed|edly|ing|ingly)$/</span>, <span class="ruby-value str">''</span>) +154: <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">w</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/(at|lb|iz)$/</span> +155: <span class="ruby-identifier">w</span> <span class="ruby-operator">+=</span> <span class="ruby-value str">'e'</span> +156: <span class="ruby-keyword kw">elsif</span> <span class="ruby-identifier">w</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/is$/</span> <span class="ruby-keyword kw">and</span> <span class="ruby-identifier">gb_english</span> +157: <span class="ruby-identifier">w</span> <span class="ruby-operator">+=</span> <span class="ruby-value str">'e'</span> +158: <span class="ruby-keyword kw">elsif</span> <span class="ruby-identifier">w</span> <span class="ruby-operator">=~</span> <span class="ruby-node">/#{Porter2::Double}$/</span> +159: <span class="ruby-identifier">w</span>.<span class="ruby-identifier">chop!</span> +160: <span class="ruby-keyword kw">elsif</span> <span class="ruby-identifier">w</span>.<span class="ruby-identifier">porter2_is_short_word?</span> +161: <span class="ruby-identifier">w</span> <span class="ruby-operator">+=</span> <span class="ruby-value str">'e'</span> +162: <span class="ruby-keyword kw">end</span> +163: <span class="ruby-keyword kw">end</span> +164: <span class="ruby-identifier">w</span> +165: <span class="ruby-keyword kw">end</span> +166: <span class="ruby-keyword kw">end</span></pre> + </div> + + </div> + + + + + </div> + + + <div id="porter-step-c-method" class="method-detail "> + <a name="method-i-porter2_step1c"></a> + + <div class="method-heading"> + + <span class="method-name">porter2_step1c</span><span + class="method-args">()</span> + <span class="method-click-advice">click to toggle source</span> + + </div> + + <div class="method-description"> + + <p> +Replace a suffix of y or Y by i if it is preceded by a non-vowel which is +not the first letter of the word. +</p> + + + + <div class="method-source-code" + id="porter-step-c-source"> +<pre> + <span class="ruby-comment cmt"># File lib/porter2.rb, line 171</span> +171: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_step1c</span> +172: <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-node">/.+#{Porter2::C}(y|Y)$/</span> +173: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/(y|Y)$/</span>, <span class="ruby-value str">'i'</span>) +174: <span class="ruby-keyword kw">else</span> +175: <span class="ruby-keyword kw">self</span> +176: <span class="ruby-keyword kw">end</span> +177: <span class="ruby-keyword kw">end</span></pre> + </div> + + </div> + + + + + </div> + + + <div id="porter-step--method" class="method-detail "> + <a name="method-i-porter2_step2"></a> + + <div class="method-heading"> + + <span class="method-name">porter2_step2</span><span + class="method-args">(gb_english = false)</span> + <span class="method-click-advice">click to toggle source</span> + + </div> + + <div class="method-description"> + + <p> +Search for the longest among the suffixes listed in the keys of +Porter2::STEP_2_MAPS. If one is found and that suffix occurs in R1, +replace it with the value found in STEP_2_MAPS. +</p> +<p> +(Suffixes ‘ogi’ and ‘li’ are treated as special +cases in the procedure.) +</p> +<p> +(If gb_english is <tt>true</tt>, replace the ‘iser’ and +‘isation’ suffixes with ‘ise’, similarly to how +‘izer’ and ‘ization’ are treated.) +</p> + + + + <div class="method-source-code" + id="porter-step--source"> +<pre> + <span class="ruby-comment cmt"># File lib/porter2.rb, line 188</span> +188: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_step2</span>(<span class="ruby-identifier">gb_english</span> = <span class="ruby-keyword kw">false</span>) +189: <span class="ruby-identifier">r1</span> = <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_r1</span> +190: <span class="ruby-identifier">s2m</span> = <span class="ruby-constant">Porter2</span><span class="ruby-operator">::</span><span class="ruby-constant">STEP_2_MAPS</span>.<span class="ruby-identifier">dup</span> +191: <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">gb_english</span> +192: <span class="ruby-identifier">s2m</span>[<span class="ruby-value str">"iser"</span>] = <span class="ruby-value str">"ise"</span> +193: <span class="ruby-identifier">s2m</span>[<span class="ruby-value str">"isation"</span>] = <span class="ruby-value str">"ise"</span> +194: <span class="ruby-keyword kw">end</span> +195: <span class="ruby-identifier">step_2_re</span> = <span class="ruby-constant">Regexp</span>.<span class="ruby-identifier">union</span>(<span class="ruby-identifier">s2m</span>.<span class="ruby-identifier">keys</span>.<span class="ruby-identifier">map</span> {<span class="ruby-operator">|</span><span class="ruby-identifier">r</span><span class="ruby-operator">|</span> <span class="ruby-constant">Regexp</span>.<span class="ruby-identifier">new</span>(<span class="ruby-identifier">r</span> <span class="ruby-operator">+</span> <span class="ruby-value str">"$"</span>)}) +196: <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-identifier">step_2_re</span> +197: <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">r1</span> <span class="ruby-operator">=~</span> <span class="ruby-node">/#{$&}$/</span> +198: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-node">/#{$&}$/</span>, <span class="ruby-identifier">s2m</span>[<span class="ruby-node">$&</span>]) +199: <span class="ruby-keyword kw">else</span> +200: <span class="ruby-keyword kw">self</span> +201: <span class="ruby-keyword kw">end</span> +202: <span class="ruby-keyword kw">elsif</span> <span class="ruby-identifier">r1</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/li$/</span> <span class="ruby-keyword kw">and</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-node">/(#{Porter2::Valid_LI})li$/</span> +203: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/li$/</span>, <span class="ruby-value str">''</span>) +204: <span class="ruby-keyword kw">elsif</span> <span class="ruby-identifier">r1</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/ogi$/</span> <span class="ruby-keyword kw">and</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/logi$/</span> +205: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/ogi$/</span>, <span class="ruby-value str">'og'</span>) +206: <span class="ruby-keyword kw">else</span> +207: <span class="ruby-keyword kw">self</span> +208: <span class="ruby-keyword kw">end</span> +209: <span class="ruby-keyword kw">end</span></pre> + </div> + + </div> + + + + + </div> + + + <div id="porter-step--method" class="method-detail "> + <a name="method-i-porter2_step3"></a> + + <div class="method-heading"> + + <span class="method-name">porter2_step3</span><span + class="method-args">(gb_english = false)</span> + <span class="method-click-advice">click to toggle source</span> + + </div> + + <div class="method-description"> + + <p> +Search for the longest among the suffixes listed in the keys of +Porter2::STEP_3_MAPS. If one is found and that suffix occurs in R1, +replace it with the value found in STEP_3_MAPS. +</p> +<p> +(Suffix ‘ative’ is treated as a special case in the procedure.) +</p> +<p> +(If gb_english is <tt>true</tt>, replace the ‘alise’ suffix +with ‘al’, similarly to how ‘alize’ is treated.) +</p> + + + + <div class="method-source-code" + id="porter-step--source"> +<pre> + <span class="ruby-comment cmt"># File lib/porter2.rb, line 220</span> +220: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_step3</span>(<span class="ruby-identifier">gb_english</span> = <span class="ruby-keyword kw">false</span>) +221: <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/ative$/</span> <span class="ruby-keyword kw">and</span> <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_r2</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/ative$/</span> +222: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/ative$/</span>, <span class="ruby-value str">''</span>) +223: <span class="ruby-keyword kw">else</span> +224: <span class="ruby-identifier">s3m</span> = <span class="ruby-constant">Porter2</span><span class="ruby-operator">::</span><span class="ruby-constant">STEP_3_MAPS</span>.<span class="ruby-identifier">dup</span> +225: <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">gb_english</span> +226: <span class="ruby-identifier">s3m</span>[<span class="ruby-value str">"alise"</span>] = <span class="ruby-value str">"al"</span> +227: <span class="ruby-keyword kw">end</span> +228: <span class="ruby-identifier">step_3_re</span> = <span class="ruby-constant">Regexp</span>.<span class="ruby-identifier">union</span>(<span class="ruby-identifier">s3m</span>.<span class="ruby-identifier">keys</span>.<span class="ruby-identifier">map</span> {<span class="ruby-operator">|</span><span class="ruby-identifier">r</span><span class="ruby-operator">|</span> <span class="ruby-constant">Regexp</span>.<span class="ruby-identifier">new</span>(<span class="ruby-identifier">r</span> <span class="ruby-operator">+</span> <span class="ruby-value str">"$"</span>)}) +229: <span class="ruby-identifier">r1</span> = <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_r1</span> +230: <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-identifier">step_3_re</span> <span class="ruby-keyword kw">and</span> <span class="ruby-identifier">r1</span> <span class="ruby-operator">=~</span> <span class="ruby-node">/#{$&}$/</span> +231: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-node">/#{$&}$/</span>, <span class="ruby-identifier">s3m</span>[<span class="ruby-node">$&</span>]) +232: <span class="ruby-keyword kw">else</span> +233: <span class="ruby-keyword kw">self</span> +234: <span class="ruby-keyword kw">end</span> +235: <span class="ruby-keyword kw">end</span> +236: <span class="ruby-keyword kw">end</span></pre> + </div> + + </div> + + + + + </div> + + + <div id="porter-step--method" class="method-detail "> + <a name="method-i-porter2_step4"></a> + + <div class="method-heading"> + + <span class="method-name">porter2_step4</span><span + class="method-args">(gb_english = false)</span> + <span class="method-click-advice">click to toggle source</span> + + </div> + + <div class="method-description"> + + <p> +Search for the longest among the suffixes listed in the keys of +Porter2::STEP_4_MAPS. If one is found and that suffix occurs in R2, +replace it with the value found in STEP_4_MAPS. +</p> +<p> +(Suffix ‘ion’ is treated as a special case in the procedure.) +</p> +<p> +(If gb_english is <tt>true</tt>, delete the ‘ise’ suffix if +found.) +</p> + + + + <div class="method-source-code" + id="porter-step--source"> +<pre> + <span class="ruby-comment cmt"># File lib/porter2.rb, line 246</span> +246: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_step4</span>(<span class="ruby-identifier">gb_english</span> = <span class="ruby-keyword kw">false</span>) +247: <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_r2</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/ion$/</span> <span class="ruby-keyword kw">and</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/(s|t)ion$/</span> +248: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/ion$/</span>, <span class="ruby-value str">''</span>) +249: <span class="ruby-keyword kw">else</span> +250: <span class="ruby-identifier">s4m</span> = <span class="ruby-constant">Porter2</span><span class="ruby-operator">::</span><span class="ruby-constant">STEP_4_MAPS</span>.<span class="ruby-identifier">dup</span> +251: <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">gb_english</span> +252: <span class="ruby-identifier">s4m</span>[<span class="ruby-value str">"ise"</span>] = <span class="ruby-value str">""</span> +253: <span class="ruby-keyword kw">end</span> +254: <span class="ruby-identifier">step_4_re</span> = <span class="ruby-constant">Regexp</span>.<span class="ruby-identifier">union</span>(<span class="ruby-identifier">s4m</span>.<span class="ruby-identifier">keys</span>.<span class="ruby-identifier">map</span> {<span class="ruby-operator">|</span><span class="ruby-identifier">r</span><span class="ruby-operator">|</span> <span class="ruby-constant">Regexp</span>.<span class="ruby-identifier">new</span>(<span class="ruby-identifier">r</span> <span class="ruby-operator">+</span> <span class="ruby-value str">"$"</span>)}) +255: <span class="ruby-identifier">r2</span> = <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_r2</span> +256: <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-identifier">step_4_re</span> +257: <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">r2</span> <span class="ruby-operator">=~</span> <span class="ruby-node">/#{$&}/</span> +258: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-node">/#{$&}$/</span>, <span class="ruby-identifier">s4m</span>[<span class="ruby-node">$&</span>]) +259: <span class="ruby-keyword kw">else</span> +260: <span class="ruby-keyword kw">self</span> +261: <span class="ruby-keyword kw">end</span> +262: <span class="ruby-keyword kw">else</span> +263: <span class="ruby-keyword kw">self</span> +264: <span class="ruby-keyword kw">end</span> +265: <span class="ruby-keyword kw">end</span> +266: <span class="ruby-keyword kw">end</span></pre> + </div> + + </div> + + + + + </div> + + + <div id="porter-step--method" class="method-detail "> + <a name="method-i-porter2_step5"></a> + + <div class="method-heading"> + + <span class="method-name">porter2_step5</span><span + class="method-args">()</span> + <span class="method-click-advice">click to toggle source</span> + + </div> + + <div class="method-description"> + + <p> +Search for the the following suffixes, and, if found, perform the action +indicated. +</p> +<table> +<tr><td valign="top">e</td><td><p> +delete if in R2, or in R1 and not preceded by a short syllable +</p> +</td></tr> +<tr><td valign="top">l</td><td><p> +delete if in R2 and preceded by l +</p> +</td></tr> +</table> + + + + <div class="method-source-code" + id="porter-step--source"> +<pre> + <span class="ruby-comment cmt"># File lib/porter2.rb, line 272</span> +272: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_step5</span> +273: <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/ll$/</span> <span class="ruby-keyword kw">and</span> <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_r2</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/l$/</span> +274: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/ll$/</span>, <span class="ruby-value str">'l'</span>) +275: <span class="ruby-keyword kw">elsif</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/e$/</span> <span class="ruby-keyword kw">and</span> <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_r2</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/e$/</span> +276: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/e$/</span>, <span class="ruby-value str">''</span>) +277: <span class="ruby-keyword kw">else</span> +278: <span class="ruby-identifier">r1</span> = <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_r1</span> +279: <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/e$/</span> <span class="ruby-keyword kw">and</span> <span class="ruby-identifier">r1</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/e$/</span> <span class="ruby-keyword kw">and</span> <span class="ruby-keyword kw">not</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-node">/#{Porter2::SHORT_SYLLABLE}e$/</span> +280: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/e$/</span>, <span class="ruby-value str">''</span>) +281: <span class="ruby-keyword kw">else</span> +282: <span class="ruby-keyword kw">self</span> +283: <span class="ruby-keyword kw">end</span> +284: <span class="ruby-keyword kw">end</span> +285: <span class="ruby-keyword kw">end</span></pre> + </div> + + </div> + + + + + </div> + + + <div id="porter-tidy-method" class="method-detail "> + <a name="method-i-porter2_tidy"></a> + + <div class="method-heading"> + + <span class="method-name">porter2_tidy</span><span + class="method-args">()</span> + <span class="method-click-advice">click to toggle source</span> + + </div> + + <div class="method-description"> + + <p> +Tidy up the word before we get down to the algorithm +</p> + + + + <div class="method-source-code" + id="porter-tidy-source"> +<pre> + <span class="ruby-comment cmt"># File lib/porter2.rb, line 35</span> +35: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_tidy</span> +36: <span class="ruby-identifier">preword</span> = <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">to_s</span>.<span class="ruby-identifier">strip</span>.<span class="ruby-identifier">downcase</span> +37: +38: <span class="ruby-comment cmt"># map apostrophe-like characters to apostrophes</span> +39: <span class="ruby-identifier">preword</span>.<span class="ruby-identifier">gsub!</span>(<span class="ruby-regexp re">/â/</span>, <span class="ruby-value str">"'"</span>) +40: <span class="ruby-identifier">preword</span>.<span class="ruby-identifier">gsub!</span>(<span class="ruby-regexp re">/â/</span>, <span class="ruby-value str">"'"</span>) +41: +42: <span class="ruby-identifier">preword</span> +43: <span class="ruby-keyword kw">end</span></pre> + </div> + + </div> + + + + + </div> + + + <div id="stem-method" class="method-detail method-alias"> + <a name="method-i-stem"></a> + + <div class="method-heading"> + + <span class="method-name">stem</span><span + class="method-args">(gb_english = false)</span> + <span class="method-click-advice">click to toggle source</span> + + </div> + + <div class="method-description"> + + + + + + </div> + + + + + <div class="aliases"> + Alias for: <a href="String.html#method-i-porter2_stem">porter2_stem</a> + </div> + + </div> + + + </div> + </div> diff --git a/doc/TestPorter2.html b/doc/TestPorter2.html index 1bd45c0..dab7cbe 100644 --- a/doc/TestPorter2.html +++ b/doc/TestPorter2.html @@ -72,11 +72,11 @@ <h3 class="section-header">Methods</h3> <ul class="link-list"> - <li><a href="#method-i-test_ends_with_short_syllable%3F">#test_ends_with_short_syllable?</a></li> + <li><a href="#method-i-test_R1">#test_R1</a></li> - <li><a href="#method-i-test_find_R1">#test_find_R1</a></li> + <li><a href="#method-i-test_R2">#test_R2</a></li> - <li><a href="#method-i-test_find_R2">#test_find_R2</a></li> + <li><a href="#method-i-test_ends_with_short_syllable%3F">#test_ends_with_short_syllable?</a></li> <li><a href="#method-i-test_is_short_word%3F">#test_is_short_word?</a></li> @@ -133,7 +133,7 @@ <ul class="link-list"> - <li><a href="./Stemmable.html">Stemmable</a></li> + <li><a href="./Porter2.html">Porter2</a></li> <li><a href="./String.html">String</a></li> @@ -151,16 +151,7 @@ <h1 class="class">TestPorter2</h1> <div id="description"> - <p> -class <a href="String.html">String</a> -</p> -<pre> - public :porter2_preprocess, :porter2_r1, :porter2_r2 -</pre> -<p> -end -</p> - + </div> <!-- Constants --> @@ -171,12 +162,18 @@ end <dt><a name="TEST_WORDS">TEST_WORDS</a></dt> - <dd class="description"></dd> + <dd class="description"><p> +The full set of test words from <a +href="http://snowball.tartarus.org/algorithms/english/stemmer.html">snowball.tartarus.org/algorithms/english/stemmer.html</a> +</p></dd> <dt><a name="TEST_WORDS_ENGB">TEST_WORDS_ENGB</a></dt> - <dd class="description"></dd> + <dd class="description"><p> +Test words with -ise suffixes (and similar), to test how British English is +stemmed +</p></dd> </dl> @@ -192,12 +189,12 @@ end <h3 class="section-header">Public Instance Methods</h3> - <div id="test-ends-with-short-syllable--method" class="method-detail "> - <a name="method-i-test_ends_with_short_syllable%3F"></a> + <div id="test--method" class="method-detail "> + <a name="method-i-test_R1"></a> <div class="method-heading"> - <span class="method-name">test_ends_with_short_syllable?</span><span + <span class="method-name">test_R1</span><span class="method-args">()</span> <span class="method-click-advice">click to toggle source</span> @@ -210,20 +207,35 @@ end <div class="method-source-code" - id="test-ends-with-short-syllable--source"> + id="test--source"> <pre> - <span class="ruby-comment cmt"># File test/tc_porter2_parts.rb, line 69</span> -69: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">test_ends_with_short_syllable?</span> -70: <span class="ruby-identifier">assert_equal</span> <span class="ruby-keyword kw">true</span>, <span class="ruby-value str">"rap"</span>.<span class="ruby-identifier">porter2_ends_with_short_syllable?</span> -71: <span class="ruby-identifier">assert_equal</span> <span class="ruby-keyword kw">true</span>, <span class="ruby-value str">"trap"</span>.<span class="ruby-identifier">porter2_ends_with_short_syllable?</span> -72: <span class="ruby-identifier">assert_equal</span> <span class="ruby-keyword kw">true</span>, <span class="ruby-value str">"entrap"</span>.<span class="ruby-identifier">porter2_ends_with_short_syllable?</span> -73: <span class="ruby-identifier">assert_equal</span> <span class="ruby-keyword kw">true</span>, <span class="ruby-value str">"ow"</span>.<span class="ruby-identifier">porter2_ends_with_short_syllable?</span> -74: <span class="ruby-identifier">assert_equal</span> <span class="ruby-keyword kw">true</span>, <span class="ruby-value str">"on"</span>.<span class="ruby-identifier">porter2_ends_with_short_syllable?</span> -75: <span class="ruby-identifier">assert_equal</span> <span class="ruby-keyword kw">true</span>, <span class="ruby-value str">"at"</span>.<span class="ruby-identifier">porter2_ends_with_short_syllable?</span> -76: <span class="ruby-identifier">assert_equal</span> <span class="ruby-keyword kw">false</span>, <span class="ruby-value str">"uproot"</span>.<span class="ruby-identifier">porter2_ends_with_short_syllable?</span> -77: <span class="ruby-identifier">assert_equal</span> <span class="ruby-keyword kw">false</span>, <span class="ruby-value str">"bestow"</span>.<span class="ruby-identifier">porter2_ends_with_short_syllable?</span> -78: <span class="ruby-identifier">assert_equal</span> <span class="ruby-keyword kw">false</span>, <span class="ruby-value str">"disturb"</span>.<span class="ruby-identifier">porter2_ends_with_short_syllable?</span> -79: <span class="ruby-keyword kw">end</span></pre> + <span class="ruby-comment cmt"># File test/tc_porter2_parts.rb, line 38</span> +38: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">test_R1</span> +39: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"iful"</span>, <span class="ruby-value str">"beautiful"</span>.<span class="ruby-identifier">porter2_r1</span> +40: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"y"</span>, <span class="ruby-value str">"beauty"</span>.<span class="ruby-identifier">porter2_r1</span> +41: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">""</span>, <span class="ruby-value str">"beau"</span>.<span class="ruby-identifier">porter2_r1</span> +42: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"imadversion"</span>, <span class="ruby-value str">"animadversion"</span>.<span class="ruby-identifier">porter2_r1</span> +43: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"kled"</span>, <span class="ruby-value str">"sprinkled"</span>.<span class="ruby-identifier">porter2_r1</span> +44: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"harist"</span>, <span class="ruby-value str">"eucharist"</span>.<span class="ruby-identifier">porter2_r1</span> +45: +46: <span class="ruby-comment cmt"># special cases</span> +47: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"ate"</span>, <span class="ruby-value str">"generate"</span>.<span class="ruby-identifier">porter2_r1</span> +48: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"ates"</span>, <span class="ruby-value str">"generates"</span>.<span class="ruby-identifier">porter2_r1</span> +49: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"ated"</span>, <span class="ruby-value str">"generated"</span>.<span class="ruby-identifier">porter2_r1</span> +50: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"al"</span>, <span class="ruby-value str">"general"</span>.<span class="ruby-identifier">porter2_r1</span> +51: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"ally"</span>, <span class="ruby-value str">"generally"</span>.<span class="ruby-identifier">porter2_r1</span> +52: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"ic"</span>, <span class="ruby-value str">"generic"</span>.<span class="ruby-identifier">porter2_r1</span> +53: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"ically"</span>, <span class="ruby-value str">"generically"</span>.<span class="ruby-identifier">porter2_r1</span> +54: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"ous"</span>, <span class="ruby-value str">"generous"</span>.<span class="ruby-identifier">porter2_r1</span> +55: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"ously"</span>, <span class="ruby-value str">"generously"</span>.<span class="ruby-identifier">porter2_r1</span> +56: +57: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"al"</span>, <span class="ruby-value str">"communal"</span>.<span class="ruby-identifier">porter2_r1</span> +58: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"ity"</span>, <span class="ruby-value str">"community"</span>.<span class="ruby-identifier">porter2_r1</span> +59: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"e"</span>, <span class="ruby-value str">"commune"</span>.<span class="ruby-identifier">porter2_r1</span> +60: +61: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"ic"</span>, <span class="ruby-value str">"arsenic"</span>.<span class="ruby-identifier">porter2_r1</span> +62: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"al"</span>, <span class="ruby-value str">"arsenal"</span>.<span class="ruby-identifier">porter2_r1</span> +63: <span class="ruby-keyword kw">end</span></pre> </div> </div> @@ -234,12 +246,12 @@ end </div> - <div id="test-find--method" class="method-detail "> - <a name="method-i-test_find_R1"></a> + <div id="test--method" class="method-detail "> + <a name="method-i-test_R2"></a> <div class="method-heading"> - <span class="method-name">test_find_R1</span><span + <span class="method-name">test_R2</span><span class="method-args">()</span> <span class="method-click-advice">click to toggle source</span> @@ -252,35 +264,17 @@ end <div class="method-source-code" - id="test-find--source"> + id="test--source"> <pre> - <span class="ruby-comment cmt"># File test/tc_porter2_parts.rb, line 42</span> -42: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">test_find_R1</span> -43: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"iful"</span>, <span class="ruby-value str">"beautiful"</span>.<span class="ruby-identifier">porter2_r1</span> -44: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"y"</span>, <span class="ruby-value str">"beauty"</span>.<span class="ruby-identifier">porter2_r1</span> -45: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">""</span>, <span class="ruby-value str">"beau"</span>.<span class="ruby-identifier">porter2_r1</span> -46: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"imadversion"</span>, <span class="ruby-value str">"animadversion"</span>.<span class="ruby-identifier">porter2_r1</span> -47: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"kled"</span>, <span class="ruby-value str">"sprinkled"</span>.<span class="ruby-identifier">porter2_r1</span> -48: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"harist"</span>, <span class="ruby-value str">"eucharist"</span>.<span class="ruby-identifier">porter2_r1</span> -49: -50: <span class="ruby-comment cmt"># special cases</span> -51: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"ate"</span>, <span class="ruby-value str">"generate"</span>.<span class="ruby-identifier">porter2_r1</span> -52: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"ates"</span>, <span class="ruby-value str">"generates"</span>.<span class="ruby-identifier">porter2_r1</span> -53: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"ated"</span>, <span class="ruby-value str">"generated"</span>.<span class="ruby-identifier">porter2_r1</span> -54: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"al"</span>, <span class="ruby-value str">"general"</span>.<span class="ruby-identifier">porter2_r1</span> -55: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"ally"</span>, <span class="ruby-value str">"generally"</span>.<span class="ruby-identifier">porter2_r1</span> -56: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"ic"</span>, <span class="ruby-value str">"generic"</span>.<span class="ruby-identifier">porter2_r1</span> -57: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"ically"</span>, <span class="ruby-value str">"generically"</span>.<span class="ruby-identifier">porter2_r1</span> -58: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"ous"</span>, <span class="ruby-value str">"generous"</span>.<span class="ruby-identifier">porter2_r1</span> -59: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"ously"</span>, <span class="ruby-value str">"generously"</span>.<span class="ruby-identifier">porter2_r1</span> -60: -61: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"al"</span>, <span class="ruby-value str">"communal"</span>.<span class="ruby-identifier">porter2_r1</span> -62: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"ity"</span>, <span class="ruby-value str">"community"</span>.<span class="ruby-identifier">porter2_r1</span> -63: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"e"</span>, <span class="ruby-value str">"commune"</span>.<span class="ruby-identifier">porter2_r1</span> -64: -65: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"ic"</span>, <span class="ruby-value str">"arsenic"</span>.<span class="ruby-identifier">porter2_r1</span> -66: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"al"</span>, <span class="ruby-value str">"arsenal"</span>.<span class="ruby-identifier">porter2_r1</span> -67: <span class="ruby-keyword kw">end</span></pre> + <span class="ruby-comment cmt"># File test/tc_porter2_parts.rb, line 65</span> +65: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">test_R2</span> +66: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"ul"</span>, <span class="ruby-value str">"beautiful"</span>.<span class="ruby-identifier">porter2_r2</span> +67: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">""</span>, <span class="ruby-value str">"beauty"</span>.<span class="ruby-identifier">porter2_r2</span> +68: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">""</span>, <span class="ruby-value str">"beau"</span>.<span class="ruby-identifier">porter2_r2</span> +69: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"adversion"</span>, <span class="ruby-value str">"animadversion"</span>.<span class="ruby-identifier">porter2_r2</span> +70: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">""</span>, <span class="ruby-value str">"sprinkled"</span>.<span class="ruby-identifier">porter2_r2</span> +71: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"ist"</span>, <span class="ruby-value str">"eucharist"</span>.<span class="ruby-identifier">porter2_r2</span> +72: <span class="ruby-keyword kw">end</span></pre> </div> </div> @@ -291,12 +285,12 @@ end </div> - <div id="test-find--method" class="method-detail "> - <a name="method-i-test_find_R2"></a> + <div id="test-ends-with-short-syllable--method" class="method-detail "> + <a name="method-i-test_ends_with_short_syllable%3F"></a> <div class="method-heading"> - <span class="method-name">test_find_R2</span><span + <span class="method-name">test_ends_with_short_syllable?</span><span class="method-args">()</span> <span class="method-click-advice">click to toggle source</span> @@ -309,17 +303,20 @@ end <div class="method-source-code" - id="test-find--source"> + id="test-ends-with-short-syllable--source"> <pre> - <span class="ruby-comment cmt"># File test/tc_porter2_parts.rb, line 96</span> - 96: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">test_find_R2</span> - 97: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"ul"</span>, <span class="ruby-value str">"beautiful"</span>.<span class="ruby-identifier">porter2_r2</span> - 98: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">""</span>, <span class="ruby-value str">"beauty"</span>.<span class="ruby-identifier">porter2_r2</span> - 99: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">""</span>, <span class="ruby-value str">"beau"</span>.<span class="ruby-identifier">porter2_r2</span> -100: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"adversion"</span>, <span class="ruby-value str">"animadversion"</span>.<span class="ruby-identifier">porter2_r2</span> -101: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">""</span>, <span class="ruby-value str">"sprinkled"</span>.<span class="ruby-identifier">porter2_r2</span> -102: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"ist"</span>, <span class="ruby-value str">"eucharist"</span>.<span class="ruby-identifier">porter2_r2</span> -103: <span class="ruby-keyword kw">end</span></pre> + <span class="ruby-comment cmt"># File test/tc_porter2_parts.rb, line 74</span> +74: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">test_ends_with_short_syllable?</span> +75: <span class="ruby-identifier">assert_equal</span> <span class="ruby-keyword kw">true</span>, <span class="ruby-value str">"rap"</span>.<span class="ruby-identifier">porter2_ends_with_short_syllable?</span> +76: <span class="ruby-identifier">assert_equal</span> <span class="ruby-keyword kw">true</span>, <span class="ruby-value str">"trap"</span>.<span class="ruby-identifier">porter2_ends_with_short_syllable?</span> +77: <span class="ruby-identifier">assert_equal</span> <span class="ruby-keyword kw">true</span>, <span class="ruby-value str">"entrap"</span>.<span class="ruby-identifier">porter2_ends_with_short_syllable?</span> +78: <span class="ruby-identifier">assert_equal</span> <span class="ruby-keyword kw">true</span>, <span class="ruby-value str">"ow"</span>.<span class="ruby-identifier">porter2_ends_with_short_syllable?</span> +79: <span class="ruby-identifier">assert_equal</span> <span class="ruby-keyword kw">true</span>, <span class="ruby-value str">"on"</span>.<span class="ruby-identifier">porter2_ends_with_short_syllable?</span> +80: <span class="ruby-identifier">assert_equal</span> <span class="ruby-keyword kw">true</span>, <span class="ruby-value str">"at"</span>.<span class="ruby-identifier">porter2_ends_with_short_syllable?</span> +81: <span class="ruby-identifier">assert_equal</span> <span class="ruby-keyword kw">false</span>, <span class="ruby-value str">"uproot"</span>.<span class="ruby-identifier">porter2_ends_with_short_syllable?</span> +82: <span class="ruby-identifier">assert_equal</span> <span class="ruby-keyword kw">false</span>, <span class="ruby-value str">"bestow"</span>.<span class="ruby-identifier">porter2_ends_with_short_syllable?</span> +83: <span class="ruby-identifier">assert_equal</span> <span class="ruby-keyword kw">false</span>, <span class="ruby-value str">"disturb"</span>.<span class="ruby-identifier">porter2_ends_with_short_syllable?</span> +84: <span class="ruby-keyword kw">end</span></pre> </div> </div> @@ -350,21 +347,21 @@ end <div class="method-source-code" id="test-is-short-word--source"> <pre> - <span class="ruby-comment cmt"># File test/tc_porter2_parts.rb, line 81</span> -81: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">test_is_short_word?</span> -82: <span class="ruby-identifier">short_words</span> = <span class="ruby-node">] bed shed shred hop ]</span> -83: <span class="ruby-identifier">long_words</span> = <span class="ruby-node">] bead embed beds ]</span> -84: <span class="ruby-identifier">short_words</span>.<span class="ruby-identifier">each</span> <span class="ruby-keyword kw">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">w</span><span class="ruby-operator">|</span> -85: <span class="ruby-identifier">r1</span> = <span class="ruby-identifier">w</span>.<span class="ruby-identifier">porter2_r1</span> -86: <span class="ruby-identifier">assert_equal</span> <span class="ruby-keyword kw">true</span>, <span class="ruby-identifier">w</span>.<span class="ruby-identifier">porter2_is_short_word?</span>, -87: <span class="ruby-node">"#{w} should be short but classified as long"</span> -88: <span class="ruby-keyword kw">end</span> -89: <span class="ruby-identifier">long_words</span>.<span class="ruby-identifier">each</span> <span class="ruby-keyword kw">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">w</span><span class="ruby-operator">|</span> + <span class="ruby-comment cmt"># File test/tc_porter2_parts.rb, line 86</span> +86: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">test_is_short_word?</span> +87: <span class="ruby-identifier">short_words</span> = <span class="ruby-node">] bed shed shred hop ]</span> +88: <span class="ruby-identifier">long_words</span> = <span class="ruby-node">] bead embed beds ]</span> +89: <span class="ruby-identifier">short_words</span>.<span class="ruby-identifier">each</span> <span class="ruby-keyword kw">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">w</span><span class="ruby-operator">|</span> 90: <span class="ruby-identifier">r1</span> = <span class="ruby-identifier">w</span>.<span class="ruby-identifier">porter2_r1</span> -91: <span class="ruby-identifier">assert_equal</span> <span class="ruby-keyword kw">false</span>, <span class="ruby-identifier">w</span>.<span class="ruby-identifier">porter2_is_short_word?</span>, -92: <span class="ruby-node">"#{w} should be long but classified as short"</span> +91: <span class="ruby-identifier">assert_equal</span> <span class="ruby-keyword kw">true</span>, <span class="ruby-identifier">w</span>.<span class="ruby-identifier">porter2_is_short_word?</span>, +92: <span class="ruby-node">"#{w} should be short but classified as long"</span> 93: <span class="ruby-keyword kw">end</span> -94: <span class="ruby-keyword kw">end</span></pre> +94: <span class="ruby-identifier">long_words</span>.<span class="ruby-identifier">each</span> <span class="ruby-keyword kw">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">w</span><span class="ruby-operator">|</span> +95: <span class="ruby-identifier">r1</span> = <span class="ruby-identifier">w</span>.<span class="ruby-identifier">porter2_r1</span> +96: <span class="ruby-identifier">assert_equal</span> <span class="ruby-keyword kw">false</span>, <span class="ruby-identifier">w</span>.<span class="ruby-identifier">porter2_is_short_word?</span>, +97: <span class="ruby-node">"#{w} should be long but classified as short"</span> +98: <span class="ruby-keyword kw">end</span> +99: <span class="ruby-keyword kw">end</span></pre> </div> </div> @@ -395,14 +392,14 @@ end <div class="method-source-code" id="test-porter-postprocess-source"> <pre> - <span class="ruby-comment cmt"># File test/tc_porter2_parts.rb, line 303</span> -303: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">test_porter2_postprocess</span> -304: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abac"</span>, <span class="ruby-value str">"abac"</span>.<span class="ruby-identifier">porter2_postprocess</span> -305: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abacy"</span>, <span class="ruby-value str">"abacy"</span>.<span class="ruby-identifier">porter2_postprocess</span> -306: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abacy"</span>, <span class="ruby-value str">"abacY"</span>.<span class="ruby-identifier">porter2_postprocess</span> -307: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"aybcy"</span>, <span class="ruby-value str">"aYbcY"</span>.<span class="ruby-identifier">porter2_postprocess</span> -308: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"aybcy"</span>, <span class="ruby-value str">"aYbcy"</span>.<span class="ruby-identifier">porter2_postprocess</span> -309: <span class="ruby-keyword kw">end</span></pre> + <span class="ruby-comment cmt"># File test/tc_porter2_parts.rb, line 299</span> +299: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">test_porter2_postprocess</span> +300: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abac"</span>, <span class="ruby-value str">"abac"</span>.<span class="ruby-identifier">porter2_postprocess</span> +301: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abacy"</span>, <span class="ruby-value str">"abacy"</span>.<span class="ruby-identifier">porter2_postprocess</span> +302: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abacy"</span>, <span class="ruby-value str">"abacY"</span>.<span class="ruby-identifier">porter2_postprocess</span> +303: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"aybcy"</span>, <span class="ruby-value str">"aYbcY"</span>.<span class="ruby-identifier">porter2_postprocess</span> +304: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"aybcy"</span>, <span class="ruby-value str">"aYbcy"</span>.<span class="ruby-identifier">porter2_postprocess</span> +305: <span class="ruby-keyword kw">end</span></pre> </div> </div> @@ -433,19 +430,20 @@ end <div class="method-source-code" id="test-preprocess-source"> <pre> - <span class="ruby-comment cmt"># File test/tc_porter2_parts.rb, line 29</span> -29: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">test_preprocess</span> -30: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abacde"</span>, <span class="ruby-value str">"abacde"</span>.<span class="ruby-identifier">porter2_preprocess</span> -31: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abacde"</span>, <span class="ruby-value str">"''abacde"</span>.<span class="ruby-identifier">porter2_preprocess</span> -32: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"ab'c'de"</span>, <span class="ruby-value str">"'ab'c'de"</span>.<span class="ruby-identifier">porter2_preprocess</span> -33: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"ab'c'de"</span>, <span class="ruby-value str">"''ab'c'de"</span>.<span class="ruby-identifier">porter2_preprocess</span> -34: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"Yabac"</span>, <span class="ruby-value str">"yabac"</span>.<span class="ruby-identifier">porter2_preprocess</span> -35: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"aYbc"</span>, <span class="ruby-value str">"aybc"</span>.<span class="ruby-identifier">porter2_preprocess</span> -36: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abacdeY"</span>, <span class="ruby-value str">"abacdey"</span>.<span class="ruby-identifier">porter2_preprocess</span> -37: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abaYde"</span>, <span class="ruby-value str">"abayde"</span>.<span class="ruby-identifier">porter2_preprocess</span> -38: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"kabaYde"</span>, <span class="ruby-value str">"kabayde"</span>.<span class="ruby-identifier">porter2_preprocess</span> -39: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"'"</span>, <span class="ruby-value str">"'''"</span>.<span class="ruby-identifier">porter2_preprocess</span> -40: <span class="ruby-keyword kw">end</span></pre> + <span class="ruby-comment cmt"># File test/tc_porter2_parts.rb, line 24</span> +24: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">test_preprocess</span> +25: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abacde"</span>, <span class="ruby-value str">"abacde"</span>.<span class="ruby-identifier">porter2_preprocess</span> +26: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abacde"</span>, <span class="ruby-value str">"''abacde"</span>.<span class="ruby-identifier">porter2_preprocess</span> +27: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"ab'c'de"</span>, <span class="ruby-value str">"'ab'c'de"</span>.<span class="ruby-identifier">porter2_preprocess</span> +28: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"ab'c'de"</span>, <span class="ruby-value str">"''ab'c'de"</span>.<span class="ruby-identifier">porter2_preprocess</span> +29: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"Yabac"</span>, <span class="ruby-value str">"yabac"</span>.<span class="ruby-identifier">porter2_preprocess</span> +30: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"aYbc"</span>, <span class="ruby-value str">"aybc"</span>.<span class="ruby-identifier">porter2_preprocess</span> +31: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abacdeY"</span>, <span class="ruby-value str">"abacdey"</span>.<span class="ruby-identifier">porter2_preprocess</span> +32: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abaYde"</span>, <span class="ruby-value str">"abayde"</span>.<span class="ruby-identifier">porter2_preprocess</span> +33: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"kabaYde"</span>, <span class="ruby-value str">"kabayde"</span>.<span class="ruby-identifier">porter2_preprocess</span> +34: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"kabyaYde"</span>, <span class="ruby-value str">"kabyayde"</span>.<span class="ruby-identifier">porter2_preprocess</span> +35: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"'"</span>, <span class="ruby-value str">"'''"</span>.<span class="ruby-identifier">porter2_preprocess</span> +36: <span class="ruby-keyword kw">end</span></pre> </div> </div> @@ -476,13 +474,13 @@ end <div class="method-source-code" id="test-stemmer-source"> <pre> - <span class="ruby-comment cmt"># File test/tc_porter2_full.rb, line 29521</span> -29521: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">test_stemmer</span> -29522: <span class="ruby-constant">TEST_WORDS</span>.<span class="ruby-identifier">each</span> <span class="ruby-keyword kw">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">original</span>, <span class="ruby-identifier">stemmed</span><span class="ruby-operator">|</span> -29523: <span class="ruby-identifier">assert_equal</span> <span class="ruby-identifier">stemmed</span>, <span class="ruby-identifier">original</span>.<span class="ruby-identifier">stem</span>, -29524: <span class="ruby-node">"#{original} should have stemmed to #{stemmed} but got #{original.stem} instead"</span> -29525: <span class="ruby-keyword kw">end</span> -29526: <span class="ruby-keyword kw">end</span></pre> + <span class="ruby-comment cmt"># File test/tc_porter2_full.rb, line 29519</span> +29519: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">test_stemmer</span> +29520: <span class="ruby-constant">TEST_WORDS</span>.<span class="ruby-identifier">each</span> <span class="ruby-keyword kw">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">original</span>, <span class="ruby-identifier">stemmed</span><span class="ruby-operator">|</span> +29521: <span class="ruby-identifier">assert_equal</span> <span class="ruby-identifier">stemmed</span>, <span class="ruby-identifier">original</span>.<span class="ruby-identifier">stem</span>, +29522: <span class="ruby-node">"#{original} should have stemmed to #{stemmed} but got #{original.stem} instead"</span> +29523: <span class="ruby-keyword kw">end</span> +29524: <span class="ruby-keyword kw">end</span></pre> </div> </div> @@ -513,13 +511,13 @@ end <div class="method-source-code" id="test-stemmer-engb-source"> <pre> - <span class="ruby-comment cmt"># File test/tc_porter2_full.rb, line 29528</span> -29528: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">test_stemmer_engb</span> -29529: <span class="ruby-constant">TEST_WORDS_ENGB</span>.<span class="ruby-identifier">each</span> <span class="ruby-keyword kw">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">original</span>, <span class="ruby-identifier">stemmed</span><span class="ruby-operator">|</span> -29530: <span class="ruby-identifier">assert_equal</span> <span class="ruby-identifier">stemmed</span>, <span class="ruby-identifier">original</span>.<span class="ruby-identifier">stem</span>(<span class="ruby-keyword kw">true</span>), -29531: <span class="ruby-node">"#{original} should have stemmed to #{stemmed} but got #{original.stem(true)} instead"</span> -29532: <span class="ruby-keyword kw">end</span> -29533: <span class="ruby-keyword kw">end</span></pre> + <span class="ruby-comment cmt"># File test/tc_porter2_full.rb, line 29526</span> +29526: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">test_stemmer_engb</span> +29527: <span class="ruby-constant">TEST_WORDS_ENGB</span>.<span class="ruby-identifier">each</span> <span class="ruby-keyword kw">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">original</span>, <span class="ruby-identifier">stemmed</span><span class="ruby-operator">|</span> +29528: <span class="ruby-identifier">assert_equal</span> <span class="ruby-identifier">stemmed</span>, <span class="ruby-identifier">original</span>.<span class="ruby-identifier">stem</span>(<span class="ruby-keyword kw">true</span>), +29529: <span class="ruby-node">"#{original} should have stemmed to #{stemmed} but got #{original.stem(true)} instead"</span> +29530: <span class="ruby-keyword kw">end</span> +29531: <span class="ruby-keyword kw">end</span></pre> </div> </div> @@ -550,20 +548,20 @@ end <div class="method-source-code" id="test-step--source"> <pre> - <span class="ruby-comment cmt"># File test/tc_porter2_parts.rb, line 105</span> -105: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">test_step_0</span> -106: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abac"</span>, <span class="ruby-value str">"abac"</span>.<span class="ruby-identifier">step_0</span> -107: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abac"</span>, <span class="ruby-value str">"abac'"</span>.<span class="ruby-identifier">step_0</span> -108: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abac"</span>, <span class="ruby-value str">"abac's"</span>.<span class="ruby-identifier">step_0</span> -109: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abac"</span>, <span class="ruby-value str">"abac's'"</span>.<span class="ruby-identifier">step_0</span> -110: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"ab'c"</span>, <span class="ruby-value str">"ab'c"</span>.<span class="ruby-identifier">step_0</span> -111: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"ab'sc"</span>, <span class="ruby-value str">"ab'sc"</span>.<span class="ruby-identifier">step_0</span> -112: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"ab's'c"</span>, <span class="ruby-value str">"ab's'c"</span>.<span class="ruby-identifier">step_0</span> -113: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"ab'sc"</span>, <span class="ruby-value str">"ab'sc's"</span>.<span class="ruby-identifier">step_0</span> -114: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"'"</span>, <span class="ruby-value str">"'"</span>.<span class="ruby-identifier">step_0</span> -115: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"'s"</span>, <span class="ruby-value str">"'s"</span>.<span class="ruby-identifier">step_0</span> -116: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"'s"</span>, <span class="ruby-value str">"'s'"</span>.<span class="ruby-identifier">step_0</span> -117: <span class="ruby-keyword kw">end</span></pre> + <span class="ruby-comment cmt"># File test/tc_porter2_parts.rb, line 101</span> +101: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">test_step_0</span> +102: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abac"</span>, <span class="ruby-value str">"abac"</span>.<span class="ruby-identifier">porter2_step0</span> +103: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abac"</span>, <span class="ruby-value str">"abac'"</span>.<span class="ruby-identifier">porter2_step0</span> +104: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abac"</span>, <span class="ruby-value str">"abac's"</span>.<span class="ruby-identifier">porter2_step0</span> +105: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abac"</span>, <span class="ruby-value str">"abac's'"</span>.<span class="ruby-identifier">porter2_step0</span> +106: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"ab'c"</span>, <span class="ruby-value str">"ab'c"</span>.<span class="ruby-identifier">porter2_step0</span> +107: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"ab'sc"</span>, <span class="ruby-value str">"ab'sc"</span>.<span class="ruby-identifier">porter2_step0</span> +108: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"ab's'c"</span>, <span class="ruby-value str">"ab's'c"</span>.<span class="ruby-identifier">porter2_step0</span> +109: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"ab'sc"</span>, <span class="ruby-value str">"ab'sc's"</span>.<span class="ruby-identifier">porter2_step0</span> +110: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"'"</span>, <span class="ruby-value str">"'"</span>.<span class="ruby-identifier">porter2_step0</span> +111: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"'s"</span>, <span class="ruby-value str">"'s"</span>.<span class="ruby-identifier">porter2_step0</span> +112: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"'s"</span>, <span class="ruby-value str">"'s'"</span>.<span class="ruby-identifier">porter2_step0</span> +113: <span class="ruby-keyword kw">end</span></pre> </div> </div> @@ -594,21 +592,21 @@ end <div class="method-source-code" id="test-step-a-source"> <pre> - <span class="ruby-comment cmt"># File test/tc_porter2_parts.rb, line 119</span> -119: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">test_step_1a</span> -120: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abacde"</span>, <span class="ruby-value str">"abacde"</span>.<span class="ruby-identifier">step_1a</span> -121: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abacess"</span>, <span class="ruby-value str">"abacesses"</span>.<span class="ruby-identifier">step_1a</span> -122: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"tie"</span>, <span class="ruby-value str">"ties"</span>.<span class="ruby-identifier">step_1a</span> -123: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"tie"</span>, <span class="ruby-value str">"tied"</span>.<span class="ruby-identifier">step_1a</span> -124: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"cri"</span>, <span class="ruby-value str">"cries"</span>.<span class="ruby-identifier">step_1a</span> -125: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"cri"</span>, <span class="ruby-value str">"cried"</span>.<span class="ruby-identifier">step_1a</span> -126: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"gas"</span>, <span class="ruby-value str">"gas"</span>.<span class="ruby-identifier">step_1a</span> -127: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"this"</span>, <span class="ruby-value str">"this"</span>.<span class="ruby-identifier">step_1a</span> -128: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"gap"</span>, <span class="ruby-value str">"gaps"</span>.<span class="ruby-identifier">step_1a</span> -129: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"kiwi"</span>, <span class="ruby-value str">"kiwis"</span>.<span class="ruby-identifier">step_1a</span> -130: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abacus"</span>, <span class="ruby-value str">"abacus"</span>.<span class="ruby-identifier">step_1a</span> -131: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abacess"</span>, <span class="ruby-value str">"abacess"</span>.<span class="ruby-identifier">step_1a</span> -132: <span class="ruby-keyword kw">end</span></pre> + <span class="ruby-comment cmt"># File test/tc_porter2_parts.rb, line 115</span> +115: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">test_step_1a</span> +116: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abacde"</span>, <span class="ruby-value str">"abacde"</span>.<span class="ruby-identifier">porter2_step1a</span> +117: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abacess"</span>, <span class="ruby-value str">"abacesses"</span>.<span class="ruby-identifier">porter2_step1a</span> +118: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"tie"</span>, <span class="ruby-value str">"ties"</span>.<span class="ruby-identifier">porter2_step1a</span> +119: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"tie"</span>, <span class="ruby-value str">"tied"</span>.<span class="ruby-identifier">porter2_step1a</span> +120: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"cri"</span>, <span class="ruby-value str">"cries"</span>.<span class="ruby-identifier">porter2_step1a</span> +121: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"cri"</span>, <span class="ruby-value str">"cried"</span>.<span class="ruby-identifier">porter2_step1a</span> +122: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"gas"</span>, <span class="ruby-value str">"gas"</span>.<span class="ruby-identifier">porter2_step1a</span> +123: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"this"</span>, <span class="ruby-value str">"this"</span>.<span class="ruby-identifier">porter2_step1a</span> +124: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"gap"</span>, <span class="ruby-value str">"gaps"</span>.<span class="ruby-identifier">porter2_step1a</span> +125: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"kiwi"</span>, <span class="ruby-value str">"kiwis"</span>.<span class="ruby-identifier">porter2_step1a</span> +126: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abacus"</span>, <span class="ruby-value str">"abacus"</span>.<span class="ruby-identifier">porter2_step1a</span> +127: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abacess"</span>, <span class="ruby-value str">"abacess"</span>.<span class="ruby-identifier">porter2_step1a</span> +128: <span class="ruby-keyword kw">end</span></pre> </div> </div> @@ -639,28 +637,28 @@ end <div class="method-source-code" id="test-step-b-source"> <pre> - <span class="ruby-comment cmt"># File test/tc_porter2_parts.rb, line 134</span> -134: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">test_step_1b</span> -135: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abacde"</span>, <span class="ruby-value str">"abacde"</span>.<span class="ruby-identifier">step_1b</span> -136: <span class="ruby-identifier">words_non_gb</span> = {<span class="ruby-value str">"luxuriated"</span> =<span class="ruby-operator">></span> <span class="ruby-value str">"luxuriate"</span>, <span class="ruby-value str">"luxuriating"</span> =<span class="ruby-operator">></span> <span class="ruby-value str">"luxuriate"</span>, -137: <span class="ruby-value str">"hopping"</span> =<span class="ruby-operator">></span> <span class="ruby-value str">"hop"</span>, <span class="ruby-value str">"hopped"</span> =<span class="ruby-operator">></span> <span class="ruby-value str">"hop"</span>, -138: <span class="ruby-value str">"hoped"</span> =<span class="ruby-operator">></span> <span class="ruby-value str">"hope"</span>, <span class="ruby-value str">"hoping"</span> =<span class="ruby-operator">></span> <span class="ruby-value str">"hope"</span>, -139: <span class="ruby-value str">"atomized"</span> =<span class="ruby-operator">></span> <span class="ruby-value str">"atomize"</span>, <span class="ruby-value str">"atomised"</span> =<span class="ruby-operator">></span> <span class="ruby-value str">"atomis"</span>, -140: <span class="ruby-value str">"addicted"</span> =<span class="ruby-operator">></span> <span class="ruby-value str">"addict"</span>, <span class="ruby-value str">"bleed"</span> =<span class="ruby-operator">></span> <span class="ruby-value str">"bleed"</span> } -141: <span class="ruby-identifier">words_non_gb</span>.<span class="ruby-identifier">each</span> <span class="ruby-keyword kw">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">original</span>, <span class="ruby-identifier">stemmed</span><span class="ruby-operator">|</span> -142: <span class="ruby-identifier">assert_equal</span> <span class="ruby-identifier">stemmed</span>, <span class="ruby-identifier">original</span>.<span class="ruby-identifier">step_1b</span>, -143: <span class="ruby-node">"#{original} should have stemmed to #{stemmed} but got #{original.step_1b(original.porter2_r1)} instead"</span> -144: <span class="ruby-keyword kw">end</span> -145: <span class="ruby-identifier">words_gb</span> = {<span class="ruby-value str">"luxuriated"</span> =<span class="ruby-operator">></span> <span class="ruby-value str">"luxuriate"</span>, <span class="ruby-value str">"luxuriating"</span> =<span class="ruby-operator">></span> <span class="ruby-value str">"luxuriate"</span>, -146: <span class="ruby-value str">"hopping"</span> =<span class="ruby-operator">></span> <span class="ruby-value str">"hop"</span>, <span class="ruby-value str">"hopped"</span> =<span class="ruby-operator">></span> <span class="ruby-value str">"hop"</span>, -147: <span class="ruby-value str">"hoped"</span> =<span class="ruby-operator">></span> <span class="ruby-value str">"hope"</span>, <span class="ruby-value str">"hoping"</span> =<span class="ruby-operator">></span> <span class="ruby-value str">"hope"</span>, -148: <span class="ruby-value str">"atomized"</span> =<span class="ruby-operator">></span> <span class="ruby-value str">"atomize"</span>, <span class="ruby-value str">"atomised"</span> =<span class="ruby-operator">></span> <span class="ruby-value str">"atomise"</span>, -149: <span class="ruby-value str">"addicted"</span> =<span class="ruby-operator">></span> <span class="ruby-value str">"addict"</span>, <span class="ruby-value str">"bleed"</span> =<span class="ruby-operator">></span> <span class="ruby-value str">"bleed"</span> } -150: <span class="ruby-identifier">words_gb</span>.<span class="ruby-identifier">each</span> <span class="ruby-keyword kw">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">original</span>, <span class="ruby-identifier">stemmed</span><span class="ruby-operator">|</span> -151: <span class="ruby-identifier">assert_equal</span> <span class="ruby-identifier">stemmed</span>, <span class="ruby-identifier">original</span>.<span class="ruby-identifier">step_1b</span>(<span class="ruby-keyword kw">true</span>), -152: <span class="ruby-node">"#{original} should have stemmed to #{stemmed} but got #{original.step_1b(original.porter2_r1)} instead"</span> -153: <span class="ruby-keyword kw">end</span> -154: <span class="ruby-keyword kw">end</span></pre> + <span class="ruby-comment cmt"># File test/tc_porter2_parts.rb, line 130</span> +130: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">test_step_1b</span> +131: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abacde"</span>, <span class="ruby-value str">"abacde"</span>.<span class="ruby-identifier">porter2_step1b</span> +132: <span class="ruby-identifier">words_non_gb</span> = {<span class="ruby-value str">"luxuriated"</span> =<span class="ruby-operator">></span> <span class="ruby-value str">"luxuriate"</span>, <span class="ruby-value str">"luxuriating"</span> =<span class="ruby-operator">></span> <span class="ruby-value str">"luxuriate"</span>, +133: <span class="ruby-value str">"hopping"</span> =<span class="ruby-operator">></span> <span class="ruby-value str">"hop"</span>, <span class="ruby-value str">"hopped"</span> =<span class="ruby-operator">></span> <span class="ruby-value str">"hop"</span>, +134: <span class="ruby-value str">"hoped"</span> =<span class="ruby-operator">></span> <span class="ruby-value str">"hope"</span>, <span class="ruby-value str">"hoping"</span> =<span class="ruby-operator">></span> <span class="ruby-value str">"hope"</span>, +135: <span class="ruby-value str">"atomized"</span> =<span class="ruby-operator">></span> <span class="ruby-value str">"atomize"</span>, <span class="ruby-value str">"atomised"</span> =<span class="ruby-operator">></span> <span class="ruby-value str">"atomis"</span>, +136: <span class="ruby-value str">"addicted"</span> =<span class="ruby-operator">></span> <span class="ruby-value str">"addict"</span>, <span class="ruby-value str">"bleed"</span> =<span class="ruby-operator">></span> <span class="ruby-value str">"bleed"</span> } +137: <span class="ruby-identifier">words_non_gb</span>.<span class="ruby-identifier">each</span> <span class="ruby-keyword kw">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">original</span>, <span class="ruby-identifier">stemmed</span><span class="ruby-operator">|</span> +138: <span class="ruby-identifier">assert_equal</span> <span class="ruby-identifier">stemmed</span>, <span class="ruby-identifier">original</span>.<span class="ruby-identifier">porter2_step1b</span>, +139: <span class="ruby-node">"#{original} should have stemmed to #{stemmed} but got #{original.porter2_step1b(original.porter2_r1)} instead"</span> +140: <span class="ruby-keyword kw">end</span> +141: <span class="ruby-identifier">words_gb</span> = {<span class="ruby-value str">"luxuriated"</span> =<span class="ruby-operator">></span> <span class="ruby-value str">"luxuriate"</span>, <span class="ruby-value str">"luxuriating"</span> =<span class="ruby-operator">></span> <span class="ruby-value str">"luxuriate"</span>, +142: <span class="ruby-value str">"hopping"</span> =<span class="ruby-operator">></span> <span class="ruby-value str">"hop"</span>, <span class="ruby-value str">"hopped"</span> =<span class="ruby-operator">></span> <span class="ruby-value str">"hop"</span>, +143: <span class="ruby-value str">"hoped"</span> =<span class="ruby-operator">></span> <span class="ruby-value str">"hope"</span>, <span class="ruby-value str">"hoping"</span> =<span class="ruby-operator">></span> <span class="ruby-value str">"hope"</span>, +144: <span class="ruby-value str">"atomized"</span> =<span class="ruby-operator">></span> <span class="ruby-value str">"atomize"</span>, <span class="ruby-value str">"atomised"</span> =<span class="ruby-operator">></span> <span class="ruby-value str">"atomise"</span>, +145: <span class="ruby-value str">"addicted"</span> =<span class="ruby-operator">></span> <span class="ruby-value str">"addict"</span>, <span class="ruby-value str">"bleed"</span> =<span class="ruby-operator">></span> <span class="ruby-value str">"bleed"</span> } +146: <span class="ruby-identifier">words_gb</span>.<span class="ruby-identifier">each</span> <span class="ruby-keyword kw">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">original</span>, <span class="ruby-identifier">stemmed</span><span class="ruby-operator">|</span> +147: <span class="ruby-identifier">assert_equal</span> <span class="ruby-identifier">stemmed</span>, <span class="ruby-identifier">original</span>.<span class="ruby-identifier">porter2_step1b</span>(<span class="ruby-keyword kw">true</span>), +148: <span class="ruby-node">"#{original} should have stemmed to #{stemmed} but got #{original.porter2_step1b(original.porter2_r1)} instead"</span> +149: <span class="ruby-keyword kw">end</span> +150: <span class="ruby-keyword kw">end</span></pre> </div> </div> @@ -691,13 +689,13 @@ end <div class="method-source-code" id="test-step-c-source"> <pre> - <span class="ruby-comment cmt"># File test/tc_porter2_parts.rb, line 156</span> -156: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">test_step_1c</span> -157: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"cri"</span>, <span class="ruby-value str">"cry"</span>.<span class="ruby-identifier">step_1c</span> -158: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"by"</span>, <span class="ruby-value str">"by"</span>.<span class="ruby-identifier">step_1c</span> -159: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"saY"</span>, <span class="ruby-value str">"saY"</span>.<span class="ruby-identifier">step_1c</span> -160: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abbeY"</span>, <span class="ruby-value str">"abbeY"</span>.<span class="ruby-identifier">step_1c</span> -161: <span class="ruby-keyword kw">end</span></pre> + <span class="ruby-comment cmt"># File test/tc_porter2_parts.rb, line 152</span> +152: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">test_step_1c</span> +153: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"cri"</span>, <span class="ruby-value str">"cry"</span>.<span class="ruby-identifier">porter2_step1c</span> +154: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"by"</span>, <span class="ruby-value str">"by"</span>.<span class="ruby-identifier">porter2_step1c</span> +155: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"saY"</span>, <span class="ruby-value str">"saY"</span>.<span class="ruby-identifier">porter2_step1c</span> +156: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abbeY"</span>, <span class="ruby-value str">"abbeY"</span>.<span class="ruby-identifier">porter2_step1c</span> +157: <span class="ruby-keyword kw">end</span></pre> </div> </div> @@ -728,66 +726,66 @@ end <div class="method-source-code" id="test-step--source"> <pre> - <span class="ruby-comment cmt"># File test/tc_porter2_parts.rb, line 163</span> -163: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">test_step_2</span> -164: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abac"</span>, <span class="ruby-value str">"abac"</span>.<span class="ruby-identifier">step_2</span> -165: -166: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"nationalize"</span>, <span class="ruby-value str">"nationalization"</span>.<span class="ruby-identifier">step_2</span> -167: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"nationalisate"</span>, <span class="ruby-value str">"nationalisation"</span>.<span class="ruby-identifier">step_2</span> -168: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"nationalize"</span>, <span class="ruby-value str">"nationalization"</span>.<span class="ruby-identifier">step_2</span>(<span class="ruby-keyword kw">true</span>) -169: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"nationalise"</span>, <span class="ruby-value str">"nationalisation"</span>.<span class="ruby-identifier">step_2</span>(<span class="ruby-keyword kw">true</span>) -170: <span class="ruby-comment cmt"># Repeat the steps to ensure that the english-gb behaviour isn't sticky</span> -171: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"nationalize"</span>, <span class="ruby-value str">"nationalization"</span>.<span class="ruby-identifier">step_2</span>(<span class="ruby-keyword kw">false</span>) -172: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"nationalisate"</span>, <span class="ruby-value str">"nationalisation"</span>.<span class="ruby-identifier">step_2</span>(<span class="ruby-keyword kw">false</span>) -173: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"nationalize"</span>, <span class="ruby-value str">"nationalization"</span>.<span class="ruby-identifier">step_2</span> -174: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"nationalisate"</span>, <span class="ruby-value str">"nationalisation"</span>.<span class="ruby-identifier">step_2</span> -175: -176: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"nationalize"</span>, <span class="ruby-value str">"nationalizer"</span>.<span class="ruby-identifier">step_2</span> -177: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"nationaliser"</span>, <span class="ruby-value str">"nationaliser"</span>.<span class="ruby-identifier">step_2</span> -178: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"nationalize"</span>, <span class="ruby-value str">"nationalizer"</span>.<span class="ruby-identifier">step_2</span>(<span class="ruby-keyword kw">true</span>) -179: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"nationalise"</span>, <span class="ruby-value str">"nationaliser"</span>.<span class="ruby-identifier">step_2</span>(<span class="ruby-keyword kw">true</span>) -180: -181: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abaction"</span>, <span class="ruby-value str">"abactional"</span>.<span class="ruby-identifier">step_2</span> -182: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abacence"</span>, <span class="ruby-value str">"abacenci"</span>.<span class="ruby-identifier">step_2</span> -183: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abacance"</span>, <span class="ruby-value str">"abacanci"</span>.<span class="ruby-identifier">step_2</span> -184: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abacable"</span>, <span class="ruby-value str">"abacabli"</span>.<span class="ruby-identifier">step_2</span> -185: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abacent"</span>, <span class="ruby-value str">"abacentli"</span>.<span class="ruby-identifier">step_2</span> -186: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abacize"</span>, <span class="ruby-value str">"abacizer"</span>.<span class="ruby-identifier">step_2</span> -187: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abacize"</span>, <span class="ruby-value str">"abacization"</span>.<span class="ruby-identifier">step_2</span> -188: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abacate"</span>, <span class="ruby-value str">"abacational"</span>.<span class="ruby-identifier">step_2</span> -189: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abacate"</span>, <span class="ruby-value str">"abacation"</span>.<span class="ruby-identifier">step_2</span> -190: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abacate"</span>, <span class="ruby-value str">"abacator"</span>.<span class="ruby-identifier">step_2</span> -191: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abacal"</span>, <span class="ruby-value str">"abacalism"</span>.<span class="ruby-identifier">step_2</span> -192: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abacal"</span>, <span class="ruby-value str">"abacaliti"</span>.<span class="ruby-identifier">step_2</span> -193: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abacal"</span>, <span class="ruby-value str">"abacalli"</span>.<span class="ruby-identifier">step_2</span> -194: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abacful"</span>, <span class="ruby-value str">"abacfulness"</span>.<span class="ruby-identifier">step_2</span> -195: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abacous"</span>, <span class="ruby-value str">"abacousli"</span>.<span class="ruby-identifier">step_2</span> -196: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abacous"</span>, <span class="ruby-value str">"abacousness"</span>.<span class="ruby-identifier">step_2</span> -197: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abacive"</span>, <span class="ruby-value str">"abaciveness"</span>.<span class="ruby-identifier">step_2</span> -198: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abacive"</span>, <span class="ruby-value str">"abaciviti"</span>.<span class="ruby-identifier">step_2</span> -199: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abiliti"</span>, <span class="ruby-value str">"abiliti"</span>.<span class="ruby-identifier">step_2</span> -200: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abacble"</span>, <span class="ruby-value str">"abacbiliti"</span>.<span class="ruby-identifier">step_2</span> -201: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abacble"</span>, <span class="ruby-value str">"abacbli"</span>.<span class="ruby-identifier">step_2</span> -202: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abacful"</span>, <span class="ruby-value str">"abacfulli"</span>.<span class="ruby-identifier">step_2</span> -203: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abacless"</span>, <span class="ruby-value str">"abaclessli"</span>.<span class="ruby-identifier">step_2</span> -204: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abaclog"</span>, <span class="ruby-value str">"abaclogi"</span>.<span class="ruby-identifier">step_2</span> -205: -206: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abac"</span>, <span class="ruby-value str">"abacli"</span>.<span class="ruby-identifier">step_2</span> -207: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abd"</span>, <span class="ruby-value str">"abdli"</span>.<span class="ruby-identifier">step_2</span> -208: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abe"</span>, <span class="ruby-value str">"abeli"</span>.<span class="ruby-identifier">step_2</span> -209: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abg"</span>, <span class="ruby-value str">"abgli"</span>.<span class="ruby-identifier">step_2</span> -210: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abh"</span>, <span class="ruby-value str">"abhli"</span>.<span class="ruby-identifier">step_2</span> -211: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abk"</span>, <span class="ruby-value str">"abkli"</span>.<span class="ruby-identifier">step_2</span> -212: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abm"</span>, <span class="ruby-value str">"abmli"</span>.<span class="ruby-identifier">step_2</span> -213: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abn"</span>, <span class="ruby-value str">"abnli"</span>.<span class="ruby-identifier">step_2</span> -214: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abr"</span>, <span class="ruby-value str">"abrli"</span>.<span class="ruby-identifier">step_2</span> -215: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abt"</span>, <span class="ruby-value str">"abtli"</span>.<span class="ruby-identifier">step_2</span> -216: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abali"</span>, <span class="ruby-value str">"abali"</span>.<span class="ruby-identifier">step_2</span> -217: -218: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"bad"</span>, <span class="ruby-value str">"badli"</span>.<span class="ruby-identifier">step_2</span> -219: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"fluentli"</span>, <span class="ruby-value str">"fluentli"</span>.<span class="ruby-identifier">step_2</span> -220: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"geolog"</span>, <span class="ruby-value str">"geologi"</span>.<span class="ruby-identifier">step_2</span> -221: <span class="ruby-keyword kw">end</span></pre> + <span class="ruby-comment cmt"># File test/tc_porter2_parts.rb, line 159</span> +159: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">test_step_2</span> +160: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abac"</span>, <span class="ruby-value str">"abac"</span>.<span class="ruby-identifier">porter2_step2</span> +161: +162: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"nationalize"</span>, <span class="ruby-value str">"nationalization"</span>.<span class="ruby-identifier">porter2_step2</span> +163: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"nationalisate"</span>, <span class="ruby-value str">"nationalisation"</span>.<span class="ruby-identifier">porter2_step2</span> +164: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"nationalize"</span>, <span class="ruby-value str">"nationalization"</span>.<span class="ruby-identifier">porter2_step2</span>(<span class="ruby-keyword kw">true</span>) +165: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"nationalise"</span>, <span class="ruby-value str">"nationalisation"</span>.<span class="ruby-identifier">porter2_step2</span>(<span class="ruby-keyword kw">true</span>) +166: <span class="ruby-comment cmt"># Repeat the steps to ensure that the english-gb behaviour isn't sticky</span> +167: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"nationalize"</span>, <span class="ruby-value str">"nationalization"</span>.<span class="ruby-identifier">porter2_step2</span>(<span class="ruby-keyword kw">false</span>) +168: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"nationalisate"</span>, <span class="ruby-value str">"nationalisation"</span>.<span class="ruby-identifier">porter2_step2</span>(<span class="ruby-keyword kw">false</span>) +169: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"nationalize"</span>, <span class="ruby-value str">"nationalization"</span>.<span class="ruby-identifier">porter2_step2</span> +170: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"nationalisate"</span>, <span class="ruby-value str">"nationalisation"</span>.<span class="ruby-identifier">porter2_step2</span> +171: +172: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"nationalize"</span>, <span class="ruby-value str">"nationalizer"</span>.<span class="ruby-identifier">porter2_step2</span> +173: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"nationaliser"</span>, <span class="ruby-value str">"nationaliser"</span>.<span class="ruby-identifier">porter2_step2</span> +174: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"nationalize"</span>, <span class="ruby-value str">"nationalizer"</span>.<span class="ruby-identifier">porter2_step2</span>(<span class="ruby-keyword kw">true</span>) +175: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"nationalise"</span>, <span class="ruby-value str">"nationaliser"</span>.<span class="ruby-identifier">porter2_step2</span>(<span class="ruby-keyword kw">true</span>) +176: +177: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abaction"</span>, <span class="ruby-value str">"abactional"</span>.<span class="ruby-identifier">porter2_step2</span> +178: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abacence"</span>, <span class="ruby-value str">"abacenci"</span>.<span class="ruby-identifier">porter2_step2</span> +179: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abacance"</span>, <span class="ruby-value str">"abacanci"</span>.<span class="ruby-identifier">porter2_step2</span> +180: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abacable"</span>, <span class="ruby-value str">"abacabli"</span>.<span class="ruby-identifier">porter2_step2</span> +181: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abacent"</span>, <span class="ruby-value str">"abacentli"</span>.<span class="ruby-identifier">porter2_step2</span> +182: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abacize"</span>, <span class="ruby-value str">"abacizer"</span>.<span class="ruby-identifier">porter2_step2</span> +183: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abacize"</span>, <span class="ruby-value str">"abacization"</span>.<span class="ruby-identifier">porter2_step2</span> +184: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abacate"</span>, <span class="ruby-value str">"abacational"</span>.<span class="ruby-identifier">porter2_step2</span> +185: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abacate"</span>, <span class="ruby-value str">"abacation"</span>.<span class="ruby-identifier">porter2_step2</span> +186: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abacate"</span>, <span class="ruby-value str">"abacator"</span>.<span class="ruby-identifier">porter2_step2</span> +187: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abacal"</span>, <span class="ruby-value str">"abacalism"</span>.<span class="ruby-identifier">porter2_step2</span> +188: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abacal"</span>, <span class="ruby-value str">"abacaliti"</span>.<span class="ruby-identifier">porter2_step2</span> +189: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abacal"</span>, <span class="ruby-value str">"abacalli"</span>.<span class="ruby-identifier">porter2_step2</span> +190: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abacful"</span>, <span class="ruby-value str">"abacfulness"</span>.<span class="ruby-identifier">porter2_step2</span> +191: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abacous"</span>, <span class="ruby-value str">"abacousli"</span>.<span class="ruby-identifier">porter2_step2</span> +192: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abacous"</span>, <span class="ruby-value str">"abacousness"</span>.<span class="ruby-identifier">porter2_step2</span> +193: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abacive"</span>, <span class="ruby-value str">"abaciveness"</span>.<span class="ruby-identifier">porter2_step2</span> +194: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abacive"</span>, <span class="ruby-value str">"abaciviti"</span>.<span class="ruby-identifier">porter2_step2</span> +195: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abiliti"</span>, <span class="ruby-value str">"abiliti"</span>.<span class="ruby-identifier">porter2_step2</span> +196: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abacble"</span>, <span class="ruby-value str">"abacbiliti"</span>.<span class="ruby-identifier">porter2_step2</span> +197: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abacble"</span>, <span class="ruby-value str">"abacbli"</span>.<span class="ruby-identifier">porter2_step2</span> +198: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abacful"</span>, <span class="ruby-value str">"abacfulli"</span>.<span class="ruby-identifier">porter2_step2</span> +199: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abacless"</span>, <span class="ruby-value str">"abaclessli"</span>.<span class="ruby-identifier">porter2_step2</span> +200: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abaclog"</span>, <span class="ruby-value str">"abaclogi"</span>.<span class="ruby-identifier">porter2_step2</span> +201: +202: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abac"</span>, <span class="ruby-value str">"abacli"</span>.<span class="ruby-identifier">porter2_step2</span> +203: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abd"</span>, <span class="ruby-value str">"abdli"</span>.<span class="ruby-identifier">porter2_step2</span> +204: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abe"</span>, <span class="ruby-value str">"abeli"</span>.<span class="ruby-identifier">porter2_step2</span> +205: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abg"</span>, <span class="ruby-value str">"abgli"</span>.<span class="ruby-identifier">porter2_step2</span> +206: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abh"</span>, <span class="ruby-value str">"abhli"</span>.<span class="ruby-identifier">porter2_step2</span> +207: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abk"</span>, <span class="ruby-value str">"abkli"</span>.<span class="ruby-identifier">porter2_step2</span> +208: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abm"</span>, <span class="ruby-value str">"abmli"</span>.<span class="ruby-identifier">porter2_step2</span> +209: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abn"</span>, <span class="ruby-value str">"abnli"</span>.<span class="ruby-identifier">porter2_step2</span> +210: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abr"</span>, <span class="ruby-value str">"abrli"</span>.<span class="ruby-identifier">porter2_step2</span> +211: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abt"</span>, <span class="ruby-value str">"abtli"</span>.<span class="ruby-identifier">porter2_step2</span> +212: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abali"</span>, <span class="ruby-value str">"abali"</span>.<span class="ruby-identifier">porter2_step2</span> +213: +214: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"bad"</span>, <span class="ruby-value str">"badli"</span>.<span class="ruby-identifier">porter2_step2</span> +215: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"fluentli"</span>, <span class="ruby-value str">"fluentli"</span>.<span class="ruby-identifier">porter2_step2</span> +216: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"geolog"</span>, <span class="ruby-value str">"geologi"</span>.<span class="ruby-identifier">porter2_step2</span> +217: <span class="ruby-keyword kw">end</span></pre> </div> </div> @@ -818,32 +816,32 @@ end <div class="method-source-code" id="test-step--source"> <pre> - <span class="ruby-comment cmt"># File test/tc_porter2_parts.rb, line 223</span> -223: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">test_step_3</span> -224: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abac"</span>, <span class="ruby-value str">"abac"</span>.<span class="ruby-identifier">step_3</span>(<span class="ruby-value str">""</span>) -225: -226: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"national"</span>, <span class="ruby-value str">"nationalize"</span>.<span class="ruby-identifier">step_3</span> -227: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"nationalise"</span>, <span class="ruby-value str">"nationalise"</span>.<span class="ruby-identifier">step_3</span> -228: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"national"</span>, <span class="ruby-value str">"nationalise"</span>.<span class="ruby-identifier">step_3</span>(<span class="ruby-keyword kw">true</span>) -229: <span class="ruby-comment cmt"># Repeat the steps to ensure that the english-gb behaviour isn't sticky</span> -230: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"national"</span>, <span class="ruby-value str">"nationalize"</span>.<span class="ruby-identifier">step_3</span>(<span class="ruby-keyword kw">false</span>) -231: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"nationalise"</span>, <span class="ruby-value str">"nationalise"</span>.<span class="ruby-identifier">step_3</span>(<span class="ruby-keyword kw">false</span>) -232: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"national"</span>, <span class="ruby-value str">"nationalize"</span>.<span class="ruby-identifier">step_3</span> -233: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"nationalise"</span>, <span class="ruby-value str">"nationalise"</span>.<span class="ruby-identifier">step_3</span> -234: -235: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abaction"</span>, <span class="ruby-value str">"abactional"</span>.<span class="ruby-identifier">step_3</span> -236: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abacate"</span>, <span class="ruby-value str">"abacational"</span>.<span class="ruby-identifier">step_3</span> -237: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abacic"</span>, <span class="ruby-value str">"abacicate"</span>.<span class="ruby-identifier">step_3</span> -238: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abacic"</span>, <span class="ruby-value str">"abaciciti"</span>.<span class="ruby-identifier">step_3</span> -239: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abacic"</span>, <span class="ruby-value str">"abacical"</span>.<span class="ruby-identifier">step_3</span> -240: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abac"</span>, <span class="ruby-value str">"abacful"</span>.<span class="ruby-identifier">step_3</span> -241: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abac"</span>, <span class="ruby-value str">"abacness"</span>.<span class="ruby-identifier">step_3</span> -242: -243: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abacabac"</span>, <span class="ruby-value str">"abacabacative"</span>.<span class="ruby-identifier">step_3</span> -244: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abacabac"</span>, <span class="ruby-value str">"abacabacative"</span>.<span class="ruby-identifier">step_3</span> -245: -246: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"dryness"</span>, <span class="ruby-value str">"dryness"</span>.<span class="ruby-identifier">step_3</span> -247: <span class="ruby-keyword kw">end</span></pre> + <span class="ruby-comment cmt"># File test/tc_porter2_parts.rb, line 219</span> +219: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">test_step_3</span> +220: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abac"</span>, <span class="ruby-value str">"abac"</span>.<span class="ruby-identifier">porter2_step3</span>(<span class="ruby-value str">""</span>) +221: +222: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"national"</span>, <span class="ruby-value str">"nationalize"</span>.<span class="ruby-identifier">porter2_step3</span> +223: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"nationalise"</span>, <span class="ruby-value str">"nationalise"</span>.<span class="ruby-identifier">porter2_step3</span> +224: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"national"</span>, <span class="ruby-value str">"nationalise"</span>.<span class="ruby-identifier">porter2_step3</span>(<span class="ruby-keyword kw">true</span>) +225: <span class="ruby-comment cmt"># Repeat the steps to ensure that the english-gb behaviour isn't sticky</span> +226: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"national"</span>, <span class="ruby-value str">"nationalize"</span>.<span class="ruby-identifier">porter2_step3</span>(<span class="ruby-keyword kw">false</span>) +227: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"nationalise"</span>, <span class="ruby-value str">"nationalise"</span>.<span class="ruby-identifier">porter2_step3</span>(<span class="ruby-keyword kw">false</span>) +228: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"national"</span>, <span class="ruby-value str">"nationalize"</span>.<span class="ruby-identifier">porter2_step3</span> +229: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"nationalise"</span>, <span class="ruby-value str">"nationalise"</span>.<span class="ruby-identifier">porter2_step3</span> +230: +231: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abaction"</span>, <span class="ruby-value str">"abactional"</span>.<span class="ruby-identifier">porter2_step3</span> +232: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abacate"</span>, <span class="ruby-value str">"abacational"</span>.<span class="ruby-identifier">porter2_step3</span> +233: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abacic"</span>, <span class="ruby-value str">"abacicate"</span>.<span class="ruby-identifier">porter2_step3</span> +234: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abacic"</span>, <span class="ruby-value str">"abaciciti"</span>.<span class="ruby-identifier">porter2_step3</span> +235: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abacic"</span>, <span class="ruby-value str">"abacical"</span>.<span class="ruby-identifier">porter2_step3</span> +236: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abac"</span>, <span class="ruby-value str">"abacful"</span>.<span class="ruby-identifier">porter2_step3</span> +237: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abac"</span>, <span class="ruby-value str">"abacness"</span>.<span class="ruby-identifier">porter2_step3</span> +238: +239: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abacabac"</span>, <span class="ruby-value str">"abacabacative"</span>.<span class="ruby-identifier">porter2_step3</span> +240: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abacabac"</span>, <span class="ruby-value str">"abacabacative"</span>.<span class="ruby-identifier">porter2_step3</span> +241: +242: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"dryness"</span>, <span class="ruby-value str">"dryness"</span>.<span class="ruby-identifier">porter2_step3</span> +243: <span class="ruby-keyword kw">end</span></pre> </div> </div> @@ -874,47 +872,47 @@ end <div class="method-source-code" id="test-step--source"> <pre> - <span class="ruby-comment cmt"># File test/tc_porter2_parts.rb, line 249</span> -249: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">test_step_4</span> -250: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abac"</span>, <span class="ruby-value str">"abac"</span>.<span class="ruby-identifier">step_4</span>(<span class="ruby-value str">""</span>) -251: -252: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"nation"</span>, <span class="ruby-value str">"nationize"</span>.<span class="ruby-identifier">step_4</span> -253: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"nationise"</span>, <span class="ruby-value str">"nationise"</span>.<span class="ruby-identifier">step_4</span> -254: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"nation"</span>, <span class="ruby-value str">"nationize"</span>.<span class="ruby-identifier">step_4</span>(<span class="ruby-keyword kw">true</span>) -255: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"nation"</span>, <span class="ruby-value str">"nationise"</span>.<span class="ruby-identifier">step_4</span>(<span class="ruby-keyword kw">true</span>) -256: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"nation"</span>, <span class="ruby-value str">"nationize"</span>.<span class="ruby-identifier">step_4</span>(<span class="ruby-keyword kw">false</span>) -257: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"nationise"</span>, <span class="ruby-value str">"nationise"</span>.<span class="ruby-identifier">step_4</span>(<span class="ruby-keyword kw">false</span>) -258: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"nation"</span>, <span class="ruby-value str">"nationize"</span>.<span class="ruby-identifier">step_4</span>() -259: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"nationise"</span>, <span class="ruby-value str">"nationise"</span>.<span class="ruby-identifier">step_4</span>() -260: -261: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abac"</span>, <span class="ruby-value str">"abacal"</span>.<span class="ruby-identifier">step_4</span> -262: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abac"</span>, <span class="ruby-value str">"abacance"</span>.<span class="ruby-identifier">step_4</span> -263: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abac"</span>, <span class="ruby-value str">"abacence"</span>.<span class="ruby-identifier">step_4</span> -264: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abac"</span>, <span class="ruby-value str">"abacer"</span>.<span class="ruby-identifier">step_4</span> -265: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abac"</span>, <span class="ruby-value str">"abacic"</span>.<span class="ruby-identifier">step_4</span> -266: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abacer"</span>, <span class="ruby-value str">"abacerable"</span>.<span class="ruby-identifier">step_4</span> -267: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abac"</span>, <span class="ruby-value str">"abacible"</span>.<span class="ruby-identifier">step_4</span> -268: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abac"</span>, <span class="ruby-value str">"abacant"</span>.<span class="ruby-identifier">step_4</span> -269: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abac"</span>, <span class="ruby-value str">"abacement"</span>.<span class="ruby-identifier">step_4</span> <span class="ruby-comment cmt"># Check we handle overlapping suffixes properly</span> -270: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abacac"</span>, <span class="ruby-value str">"abacacement"</span>.<span class="ruby-identifier">step_4</span> -271: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abacac"</span>, <span class="ruby-value str">"abacacment"</span>.<span class="ruby-identifier">step_4</span> -272: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abac"</span>, <span class="ruby-value str">"abacment"</span>.<span class="ruby-identifier">step_4</span> -273: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abac"</span>, <span class="ruby-value str">"abacent"</span>.<span class="ruby-identifier">step_4</span> -274: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abac"</span>, <span class="ruby-value str">"abacism"</span>.<span class="ruby-identifier">step_4</span> -275: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abac"</span>, <span class="ruby-value str">"abacate"</span>.<span class="ruby-identifier">step_4</span> -276: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abac"</span>, <span class="ruby-value str">"abaciti"</span>.<span class="ruby-identifier">step_4</span> -277: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abac"</span>, <span class="ruby-value str">"abacous"</span>.<span class="ruby-identifier">step_4</span> -278: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abac"</span>, <span class="ruby-value str">"abacive"</span>.<span class="ruby-identifier">step_4</span> -279: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abac"</span>, <span class="ruby-value str">"abacize"</span>.<span class="ruby-identifier">step_4</span> -280: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abacion"</span>, <span class="ruby-value str">"abacion"</span>.<span class="ruby-identifier">step_4</span> -281: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abacs"</span>, <span class="ruby-value str">"abacsion"</span>.<span class="ruby-identifier">step_4</span> -282: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abact"</span>, <span class="ruby-value str">"abaction"</span>.<span class="ruby-identifier">step_4</span> -283: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abction"</span>, <span class="ruby-value str">"abction"</span>.<span class="ruby-identifier">step_4</span> -284: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"ablut"</span>, <span class="ruby-value str">"ablution"</span>.<span class="ruby-identifier">step_4</span> -285: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"agreement"</span>, <span class="ruby-value str">"agreement"</span>.<span class="ruby-identifier">step_4</span> -286: -287: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abcal"</span>, <span class="ruby-value str">"abcal"</span>.<span class="ruby-identifier">step_4</span> <span class="ruby-comment cmt"># No removal if suffix isn't in R2</span> -288: <span class="ruby-keyword kw">end</span></pre> + <span class="ruby-comment cmt"># File test/tc_porter2_parts.rb, line 245</span> +245: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">test_step_4</span> +246: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abac"</span>, <span class="ruby-value str">"abac"</span>.<span class="ruby-identifier">porter2_step4</span>(<span class="ruby-value str">""</span>) +247: +248: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"nation"</span>, <span class="ruby-value str">"nationize"</span>.<span class="ruby-identifier">porter2_step4</span> +249: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"nationise"</span>, <span class="ruby-value str">"nationise"</span>.<span class="ruby-identifier">porter2_step4</span> +250: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"nation"</span>, <span class="ruby-value str">"nationize"</span>.<span class="ruby-identifier">porter2_step4</span>(<span class="ruby-keyword kw">true</span>) +251: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"nation"</span>, <span class="ruby-value str">"nationise"</span>.<span class="ruby-identifier">porter2_step4</span>(<span class="ruby-keyword kw">true</span>) +252: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"nation"</span>, <span class="ruby-value str">"nationize"</span>.<span class="ruby-identifier">porter2_step4</span>(<span class="ruby-keyword kw">false</span>) +253: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"nationise"</span>, <span class="ruby-value str">"nationise"</span>.<span class="ruby-identifier">porter2_step4</span>(<span class="ruby-keyword kw">false</span>) +254: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"nation"</span>, <span class="ruby-value str">"nationize"</span>.<span class="ruby-identifier">porter2_step4</span>() +255: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"nationise"</span>, <span class="ruby-value str">"nationise"</span>.<span class="ruby-identifier">porter2_step4</span>() +256: +257: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abac"</span>, <span class="ruby-value str">"abacal"</span>.<span class="ruby-identifier">porter2_step4</span> +258: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abac"</span>, <span class="ruby-value str">"abacance"</span>.<span class="ruby-identifier">porter2_step4</span> +259: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abac"</span>, <span class="ruby-value str">"abacence"</span>.<span class="ruby-identifier">porter2_step4</span> +260: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abac"</span>, <span class="ruby-value str">"abacer"</span>.<span class="ruby-identifier">porter2_step4</span> +261: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abac"</span>, <span class="ruby-value str">"abacic"</span>.<span class="ruby-identifier">porter2_step4</span> +262: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abacer"</span>, <span class="ruby-value str">"abacerable"</span>.<span class="ruby-identifier">porter2_step4</span> +263: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abac"</span>, <span class="ruby-value str">"abacible"</span>.<span class="ruby-identifier">porter2_step4</span> +264: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abac"</span>, <span class="ruby-value str">"abacant"</span>.<span class="ruby-identifier">porter2_step4</span> +265: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abac"</span>, <span class="ruby-value str">"abacement"</span>.<span class="ruby-identifier">porter2_step4</span> <span class="ruby-comment cmt"># Check we handle overlapping suffixes properly</span> +266: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abacac"</span>, <span class="ruby-value str">"abacacement"</span>.<span class="ruby-identifier">porter2_step4</span> +267: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abacac"</span>, <span class="ruby-value str">"abacacment"</span>.<span class="ruby-identifier">porter2_step4</span> +268: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abac"</span>, <span class="ruby-value str">"abacment"</span>.<span class="ruby-identifier">porter2_step4</span> +269: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abac"</span>, <span class="ruby-value str">"abacent"</span>.<span class="ruby-identifier">porter2_step4</span> +270: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abac"</span>, <span class="ruby-value str">"abacism"</span>.<span class="ruby-identifier">porter2_step4</span> +271: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abac"</span>, <span class="ruby-value str">"abacate"</span>.<span class="ruby-identifier">porter2_step4</span> +272: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abac"</span>, <span class="ruby-value str">"abaciti"</span>.<span class="ruby-identifier">porter2_step4</span> +273: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abac"</span>, <span class="ruby-value str">"abacous"</span>.<span class="ruby-identifier">porter2_step4</span> +274: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abac"</span>, <span class="ruby-value str">"abacive"</span>.<span class="ruby-identifier">porter2_step4</span> +275: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abac"</span>, <span class="ruby-value str">"abacize"</span>.<span class="ruby-identifier">porter2_step4</span> +276: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abacion"</span>, <span class="ruby-value str">"abacion"</span>.<span class="ruby-identifier">porter2_step4</span> +277: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abacs"</span>, <span class="ruby-value str">"abacsion"</span>.<span class="ruby-identifier">porter2_step4</span> +278: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abact"</span>, <span class="ruby-value str">"abaction"</span>.<span class="ruby-identifier">porter2_step4</span> +279: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abction"</span>, <span class="ruby-value str">"abction"</span>.<span class="ruby-identifier">porter2_step4</span> +280: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"ablut"</span>, <span class="ruby-value str">"ablution"</span>.<span class="ruby-identifier">porter2_step4</span> +281: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"agreement"</span>, <span class="ruby-value str">"agreement"</span>.<span class="ruby-identifier">porter2_step4</span> +282: +283: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abcal"</span>, <span class="ruby-value str">"abcal"</span>.<span class="ruby-identifier">porter2_step4</span> <span class="ruby-comment cmt"># No removal if suffix isn't in R2</span> +284: <span class="ruby-keyword kw">end</span></pre> </div> </div> @@ -945,19 +943,19 @@ end <div class="method-source-code" id="test-step--source"> <pre> - <span class="ruby-comment cmt"># File test/tc_porter2_parts.rb, line 290</span> -290: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">test_step_5</span> -291: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abac"</span>, <span class="ruby-value str">"abac"</span>.<span class="ruby-identifier">step_5</span> -292: -293: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abacl"</span>, <span class="ruby-value str">"abacll"</span>.<span class="ruby-identifier">step_5</span> -294: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abcll"</span>, <span class="ruby-value str">"abcll"</span>.<span class="ruby-identifier">step_5</span> -295: -296: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abc"</span>, <span class="ruby-value str">"abc"</span>.<span class="ruby-identifier">step_5</span> -297: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abl"</span>, <span class="ruby-value str">"able"</span>.<span class="ruby-identifier">step_5</span> -298: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abe"</span>, <span class="ruby-value str">"abe"</span>.<span class="ruby-identifier">step_5</span> -299: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abac"</span>, <span class="ruby-value str">"abace"</span>.<span class="ruby-identifier">step_5</span> -300: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"bawac"</span>, <span class="ruby-value str">"bawace"</span>.<span class="ruby-identifier">step_5</span> -301: <span class="ruby-keyword kw">end</span></pre> + <span class="ruby-comment cmt"># File test/tc_porter2_parts.rb, line 286</span> +286: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">test_step_5</span> +287: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abac"</span>, <span class="ruby-value str">"abac"</span>.<span class="ruby-identifier">porter2_step5</span> +288: +289: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abacl"</span>, <span class="ruby-value str">"abacll"</span>.<span class="ruby-identifier">porter2_step5</span> +290: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abcll"</span>, <span class="ruby-value str">"abcll"</span>.<span class="ruby-identifier">porter2_step5</span> +291: +292: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abc"</span>, <span class="ruby-value str">"abc"</span>.<span class="ruby-identifier">porter2_step5</span> +293: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abl"</span>, <span class="ruby-value str">"able"</span>.<span class="ruby-identifier">porter2_step5</span> +294: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abe"</span>, <span class="ruby-value str">"abe"</span>.<span class="ruby-identifier">porter2_step5</span> +295: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abac"</span>, <span class="ruby-value str">"abace"</span>.<span class="ruby-identifier">porter2_step5</span> +296: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"bawac"</span>, <span class="ruby-value str">"bawace"</span>.<span class="ruby-identifier">porter2_step5</span> +297: <span class="ruby-keyword kw">end</span></pre> </div> </div> @@ -988,17 +986,17 @@ end <div class="method-source-code" id="test-tidy-source"> <pre> - <span class="ruby-comment cmt"># File test/tc_porter2_parts.rb, line 18</span> -18: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">test_tidy</span> -19: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abacde"</span>, <span class="ruby-value str">"abacde"</span>.<span class="ruby-identifier">porter2_tidy</span> -20: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abacde"</span>, <span class="ruby-value str">" abacde "</span>.<span class="ruby-identifier">porter2_tidy</span> -21: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abacde"</span>, <span class="ruby-value str">"ABACDE"</span>.<span class="ruby-identifier">porter2_tidy</span> -22: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"ab'cde"</span>, <span class="ruby-value str">"abâcde"</span>.<span class="ruby-identifier">porter2_tidy</span> -23: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"ab'cde"</span>, <span class="ruby-value str">"abâcde"</span>.<span class="ruby-identifier">porter2_tidy</span> -24: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"ab'c'de"</span>, <span class="ruby-value str">"abâcâde"</span>.<span class="ruby-identifier">porter2_tidy</span> -25: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"ab'c'de"</span>, <span class="ruby-value str">"abâcâde"</span>.<span class="ruby-identifier">porter2_tidy</span> -26: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"''abacde"</span>, <span class="ruby-value str">"ââabacde"</span>.<span class="ruby-identifier">porter2_tidy</span> -27: <span class="ruby-keyword kw">end</span></pre> + <span class="ruby-comment cmt"># File test/tc_porter2_parts.rb, line 13</span> +13: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">test_tidy</span> +14: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abacde"</span>, <span class="ruby-value str">"abacde"</span>.<span class="ruby-identifier">porter2_tidy</span> +15: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abacde"</span>, <span class="ruby-value str">" abacde "</span>.<span class="ruby-identifier">porter2_tidy</span> +16: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"abacde"</span>, <span class="ruby-value str">"ABACDE"</span>.<span class="ruby-identifier">porter2_tidy</span> +17: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"ab'cde"</span>, <span class="ruby-value str">"abâcde"</span>.<span class="ruby-identifier">porter2_tidy</span> +18: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"ab'cde"</span>, <span class="ruby-value str">"abâcde"</span>.<span class="ruby-identifier">porter2_tidy</span> +19: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"ab'c'de"</span>, <span class="ruby-value str">"abâcâde"</span>.<span class="ruby-identifier">porter2_tidy</span> +20: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"ab'c'de"</span>, <span class="ruby-value str">"abâcâde"</span>.<span class="ruby-identifier">porter2_tidy</span> +21: <span class="ruby-identifier">assert_equal</span> <span class="ruby-value str">"''abacde"</span>, <span class="ruby-value str">"ââabacde"</span>.<span class="ruby-identifier">porter2_tidy</span> +22: <span class="ruby-keyword kw">end</span></pre> </div> </div> diff --git a/doc/created.rid b/doc/created.rid index 3424fc2..fd2cc82 100644 --- a/doc/created.rid +++ b/doc/created.rid @@ -1,5 +1,6 @@ -Tue, 04 Jan 2011 16:27:15 +0000 -./test/tc_porter2_parts.rb Sun, 02 Jan 2011 23:49:23 +0000 +Wed, 05 Jan 2011 11:40:35 +0000 +./test/tc_porter2_parts.rb Wed, 05 Jan 2011 11:38:33 +0000 ./test/ts_porter2.rb Mon, 03 Jan 2011 00:20:11 +0000 -./test/tc_porter2_full.rb Sun, 02 Jan 2011 23:49:55 +0000 -./lib/porter2.rb Tue, 04 Jan 2011 16:27:11 +0000 +./test/tc_porter2_full.rb Wed, 05 Jan 2011 11:35:59 +0000 +./lib/porter2_module.rb Wed, 05 Jan 2011 11:34:03 +0000 +./lib/porter2.rb Wed, 05 Jan 2011 11:33:59 +0000 diff --git a/doc/index.html b/doc/index.html index 6eb787f..6c609f9 100644 --- a/doc/index.html +++ b/doc/index.html @@ -31,7 +31,7 @@ <h2 id="classes">Classes/Modules</h2> <ul> - <li class="module"><a href="Stemmable.html">Stemmable</a></li> + <li class="module"><a href="Porter2.html">Porter2</a></li> <li class="class"><a href="String.html">String</a></li> @@ -42,47 +42,47 @@ <h2 id="methods">Methods</h2> <ul> - <li><a href="Stemmable.html#method-i-porter2_ends_with_short_syllable%3F">#porter2_ends_with_short_syllable? — Stemmable</a></li> + <li><a href="String.html#method-i-porter2_ends_with_short_syllable%3F">#porter2_ends_with_short_syllable? — String</a></li> - <li><a href="Stemmable.html#method-i-porter2_is_short_word%3F">#porter2_is_short_word? — Stemmable</a></li> + <li><a href="String.html#method-i-porter2_is_short_word%3F">#porter2_is_short_word? — String</a></li> - <li><a href="Stemmable.html#method-i-porter2_postprocess">#porter2_postprocess — Stemmable</a></li> + <li><a href="String.html#method-i-porter2_postprocess">#porter2_postprocess — String</a></li> - <li><a href="Stemmable.html#method-i-porter2_preprocess">#porter2_preprocess — Stemmable</a></li> + <li><a href="String.html#method-i-porter2_preprocess">#porter2_preprocess — String</a></li> - <li><a href="Stemmable.html#method-i-porter2_r1">#porter2_r1 — Stemmable</a></li> + <li><a href="String.html#method-i-porter2_r1">#porter2_r1 — String</a></li> - <li><a href="Stemmable.html#method-i-porter2_r2">#porter2_r2 — Stemmable</a></li> + <li><a href="String.html#method-i-porter2_r2">#porter2_r2 — String</a></li> - <li><a href="Stemmable.html#method-i-porter2_stem">#porter2_stem — Stemmable</a></li> + <li><a href="String.html#method-i-porter2_stem">#porter2_stem — String</a></li> - <li><a href="Stemmable.html#method-i-porter2_stem_verbose">#porter2_stem_verbose — Stemmable</a></li> + <li><a href="String.html#method-i-porter2_stem_verbose">#porter2_stem_verbose — String</a></li> - <li><a href="Stemmable.html#method-i-porter2_tidy">#porter2_tidy — Stemmable</a></li> + <li><a href="String.html#method-i-porter2_step0">#porter2_step0 — String</a></li> - <li><a href="Stemmable.html#method-i-stem">#stem — Stemmable</a></li> + <li><a href="String.html#method-i-porter2_step1a">#porter2_step1a — String</a></li> - <li><a href="Stemmable.html#method-i-step_0">#step_0 — Stemmable</a></li> + <li><a href="String.html#method-i-porter2_step1b">#porter2_step1b — String</a></li> - <li><a href="Stemmable.html#method-i-step_1a">#step_1a — Stemmable</a></li> + <li><a href="String.html#method-i-porter2_step1c">#porter2_step1c — String</a></li> - <li><a href="Stemmable.html#method-i-step_1b">#step_1b — Stemmable</a></li> + <li><a href="String.html#method-i-porter2_step2">#porter2_step2 — String</a></li> - <li><a href="Stemmable.html#method-i-step_1c">#step_1c — Stemmable</a></li> + <li><a href="String.html#method-i-porter2_step3">#porter2_step3 — String</a></li> - <li><a href="Stemmable.html#method-i-step_2">#step_2 — Stemmable</a></li> + <li><a href="String.html#method-i-porter2_step4">#porter2_step4 — String</a></li> - <li><a href="Stemmable.html#method-i-step_3">#step_3 — Stemmable</a></li> + <li><a href="String.html#method-i-porter2_step5">#porter2_step5 — String</a></li> - <li><a href="Stemmable.html#method-i-step_4">#step_4 — Stemmable</a></li> + <li><a href="String.html#method-i-porter2_tidy">#porter2_tidy — String</a></li> - <li><a href="Stemmable.html#method-i-step_5">#step_5 — Stemmable</a></li> + <li><a href="String.html#method-i-stem">#stem — String</a></li> - <li><a href="TestPorter2.html#method-i-test_ends_with_short_syllable%3F">#test_ends_with_short_syllable? — TestPorter2</a></li> + <li><a href="TestPorter2.html#method-i-test_R1">#test_R1 — TestPorter2</a></li> - <li><a href="TestPorter2.html#method-i-test_find_R1">#test_find_R1 — TestPorter2</a></li> + <li><a href="TestPorter2.html#method-i-test_R2">#test_R2 — TestPorter2</a></li> - <li><a href="TestPorter2.html#method-i-test_find_R2">#test_find_R2 — TestPorter2</a></li> + <li><a href="TestPorter2.html#method-i-test_ends_with_short_syllable%3F">#test_ends_with_short_syllable? — TestPorter2</a></li> <li><a href="TestPorter2.html#method-i-test_is_short_word%3F">#test_is_short_word? — TestPorter2</a></li> diff --git a/doc/lib/porter2_rb.html b/doc/lib/porter2_rb.html index 869fb01..3b98008 100644 --- a/doc/lib/porter2_rb.html +++ b/doc/lib/porter2_rb.html @@ -24,13 +24,15 @@ <div id="metadata"> <dl> <dt class="modified-date">Last Modified</dt> - <dd class="modified-date">2011-01-04 16:27:11 +0000</dd> + <dd class="modified-date">2011-01-05 11:33:59 +0000</dd> <dt class="requires">Requires</dt> <dd class="requires"> <ul> + <li>porter2_module</li> + </ul> </dd> @@ -44,7 +46,7 @@ <div class="description"> <h2>Description</h2> <p> -coding: utf-8 +coding: utf-8 </p> </div> diff --git a/doc/test/tc_porter2_full_rb.html b/doc/test/tc_porter2_full_rb.html index 7acc34b..a50d33c 100644 --- a/doc/test/tc_porter2_full_rb.html +++ b/doc/test/tc_porter2_full_rb.html @@ -24,7 +24,7 @@ <div id="metadata"> <dl> <dt class="modified-date">Last Modified</dt> - <dd class="modified-date">2011-01-02 23:49:55 +0000</dd> + <dd class="modified-date">2011-01-05 11:35:59 +0000</dd> <dt class="requires">Requires</dt> diff --git a/doc/test/tc_porter2_parts_rb.html b/doc/test/tc_porter2_parts_rb.html index e65824e..25593f3 100644 --- a/doc/test/tc_porter2_parts_rb.html +++ b/doc/test/tc_porter2_parts_rb.html @@ -24,7 +24,7 @@ <div id="metadata"> <dl> <dt class="modified-date">Last Modified</dt> - <dd class="modified-date">2011-01-02 23:49:23 +0000</dd> + <dd class="modified-date">2011-01-05 11:38:33 +0000</dd> <dt class="requires">Requires</dt> diff --git a/lib/porter2.rb b/lib/porter2.rb index 2236b12..3f43303 100644 --- a/lib/porter2.rb +++ b/lib/porter2.rb @@ -1,377 +1,354 @@ -# coding: utf-8 - -# Porter 2 stemmer in Ruby. -# -# This is the Porter 2 stemming algorithm, as described at -# http://snowball.tartarus.org/algorithms/english/stemmer.html -# The original paper is: -# -# Porter, 1980, "An algorithm for suffix stripping", _Program_, Vol. 14, -# no. 3, pp 130-137 - -module Stemmable - # A non-vowel - C = "[^aeiouy]" - - # A vowel - V = "[aeiouy]" - - # A non-vowel other than w, x, or Y - CW = "[^aeiouywxY]" - - # Doubles created when added a suffix: these are undoubled when stemmed - Double = "(bb|dd|ff|gg|mm|nn|pp|rr|tt)" - - # A valid letter that can come before 'li' - Valid_LI = "[cdeghkmnrt]" - - # A specification for a short syllable - SHORT_SYLLABLE = "((#{C}#{V}#{CW})|(^#{V}#{C}))" - - # Suffix transformations used in Step 2. - # (ogi, li endings dealt with in procedure) - STEP_2_MAPS = {"tional" => "tion", - "enci" => "ence", - "anci" => "ance", - "abli" => "able", - "entli" => "ent", - "ization" => "ize", - "izer" => "ize", - "ational" => "ate", - "ation" => "ate", - "ator" => "ate", - "alism" => "al", - "aliti" => "al", - "alli" => "al", - "fulness" => "ful", - "ousli" => "ous", - "ousness" => "ous", - "iveness" => "ive", - "iviti" => "ive", - "biliti" => "ble", - "bli" => "ble", - "fulli" => "ful", - "lessli" => "less" } - - # Suffix transformations used in Step 3. - # (ative ending dealt with in procedure) - STEP_3_MAPS = {"tional" => "tion", - "ational" => "ate", - "alize" => "al", - "icate" => "ic", - "iciti" => "ic", - "ical" => "ic", - "ful" => "", - "ness" => "" } - - # Suffix transformations used in Step 4. - STEP_4_MAPS = {"al" => "", - "ance" => "", - "ence" => "", - "er" => "", - "ic" => "", - "able" => "", - "ible" => "", - "ant" => "", - "ement" => "", - "ment" => "", - "ent" => "", - "ism" => "", - "ate" => "", - "iti" => "", - "ous" => "", - "ive" => "", - "ize" => "" } - - # Special-case stemmings - SPECIAL_CASES = {"skis" => "ski", - "skies" => "sky", - - "dying" => "die", - "lying" => "lie", - "tying" => "tie", - "idly" => "idl", - "gently" => "gentl", - "ugly" => "ugli", - "early" => "earli", - "only" => "onli", - "singly" =>"singl", - - "sky" => "sky", - "news" => "news", - "howe" => "howe", - "atlas" => "atlas", - "cosmos" => "cosmos", - "bias" => "bias", - "andes" => "andes" } - - # Special case words to ignore after step 1a. - STEP_1A_SPECIAL_CASES = %w[ inning outing canning herring earring proceed exceed succeed ] - - # Tidy up the word before we get down to the algorithm - def porter2_tidy - preword = self.to_s.strip.downcase - - # map apostrophe-like characters to apostrophes - preword.gsub!(/â/, "'") - preword.gsub!(/â/, "'") - - preword - end - - def porter2_preprocess - w = self.dup - - # remove any initial apostrophe - w.gsub!(/^'*(.)/, '\1') - - # set initial y, or y after a vowel, to Y - w.gsub!(/^y/, "Y") - w.gsub!(/(#{V})y/, '\1Y') - - w - end - - # The word after the first non-vowel after the first vowel - def porter2_r1 - if self =~ /^(gener|commun|arsen)(?<r1>.*)/ - Regexp.last_match(:r1) - else - self =~ /#{V}#{C}(?<r1>.*)$/ - Regexp.last_match(:r1) || "" - end - end - - # R1 after the first non-vowel after the first vowel - def porter2_r2 - self.porter2_r1 =~ /#{V}#{C}(?<r2>.*)$/ - Regexp.last_match(:r2) || "" - end - - # A short syllable in a word is either - # 1. a vowel followed by a non-vowel other than w, x or Y and preceded by a non-vowel, or - # 2. a vowel at the beginning of the word followed by a non-vowel. - def porter2_ends_with_short_syllable? - self =~ /#{SHORT_SYLLABLE}$/ ? true : false - end - - # A word is short if it ends in a short syllable, and if R1 is null - def porter2_is_short_word? - self.porter2_ends_with_short_syllable? and self.porter2_r1.empty? - end - - # Search for the longest among the suffixes, - # * ' - # * 's - # * 's' - # and remove if found. - def step_0 - self.sub!(/(.)('s'|'s|')$/, '\1') || self - end - - # Remove plural suffixes - def step_1a - if self =~ /sses$/ - self.sub(/sses$/, 'ss') - elsif self =~ /..(ied|ies)$/ - self.sub(/(ied|ies)$/, 'i') - elsif self =~ /(ied|ies)$/ - self.sub(/(ied|ies)$/, 'ie') - elsif self =~ /(us|ss)$/ - self - elsif self =~ /s$/ - if self =~ /(#{V}.+)s$/ - self.sub(/s$/, '') - else - self - end - else - self - end - end - - def step_1b(gb_english = false) - if self =~ /(eed|eedly)$/ - if self.porter2_r1 =~ /(eed|eedly)$/ - self.sub(/(eed|eedly)$/, 'ee') - else - self - end - else - w = self.dup - if w =~ /#{V}.*(ed|edly|ing|ingly)$/ - w.sub!(/(ed|edly|ing|ingly)$/, '') - if w =~ /(at|lb|iz)$/ - w += 'e' - elsif w =~ /is$/ and gb_english - w += 'e' - elsif w =~ /#{Double}$/ - w.chop! - elsif w.porter2_is_short_word? - w += 'e' - end - end - w - end - end - - - def step_1c - if self =~ /.+#{C}(y|Y)$/ - self.sub(/(y|Y)$/, 'i') - else - self - end - end - - - def step_2(gb_english = false) - r1 = self.porter2_r1 - s2m = STEP_2_MAPS.dup - if gb_english - s2m["iser"] = "ise" - s2m["isation"] = "ise" - end - step_2_re = Regexp.union(s2m.keys.map {|r| Regexp.new(r + "$")}) - if self =~ step_2_re - if r1 =~ /#{$&}$/ - self.sub(/#{$&}$/, s2m[$&]) - else - self - end - elsif r1 =~ /li$/ and self =~ /(#{Valid_LI})li$/ - self.sub(/li$/, '') - elsif r1 =~ /ogi$/ and self =~ /logi$/ - self.sub(/ogi$/, 'og') - else - self - end - end - - - def step_3(gb_english = false) - if self =~ /ative$/ and self.porter2_r2 =~ /ative$/ - self.sub(/ative$/, '') - else - s3m = STEP_3_MAPS.dup - if gb_english - s3m["alise"] = "al" - end - step_3_re = Regexp.union(s3m.keys.map {|r| Regexp.new(r + "$")}) - r1 = self.porter2_r1 - if self =~ step_3_re and r1 =~ /#{$&}$/ - self.sub(/#{$&}$/, s3m[$&]) - else - self - end - end - end - - - def step_4(gb_english = false) - if self.porter2_r2 =~ /ion$/ and self =~ /(s|t)ion$/ - self.sub(/ion$/, '') - else - s4m = STEP_4_MAPS.dup - if gb_english - s4m["ise"] = "" - end - step_4_re = Regexp.union(s4m.keys.map {|r| Regexp.new(r + "$")}) - r2 = self.porter2_r2 - if self =~ step_4_re - if r2 =~ /#{$&}/ - self.sub(/#{$&}$/, s4m[$&]) - else - self - end - else - self - end - end - end - - - def step_5 - if self =~ /ll$/ and self.porter2_r2 =~ /l$/ - self.sub(/ll$/, 'l') - elsif self =~ /e$/ and self.porter2_r2 =~ /e$/ - self.sub(/e$/, '') - else - r1 = self.porter2_r1 - if self =~ /e$/ and r1 =~ /e$/ and not self =~ /#{SHORT_SYLLABLE}e$/ - self.sub(/e$/, '') - else - self - end - end - end - - - def porter2_postprocess - self.gsub(/Y/, 'y') - end - - - def porter2_stem(gb_english = false) - preword = self.porter2_tidy - return preword if preword.length <= 2 - - word = preword.porter2_preprocess - - if SPECIAL_CASES.has_key? word - SPECIAL_CASES[word] - else - w1a = word.step_0.step_1a - if STEP_1A_SPECIAL_CASES.include? w1a - w1a - else - w1a.step_1b(gb_english).step_1c.step_2(gb_english).step_3(gb_english).step_4(gb_english).step_5.porter2_postprocess - end - end - end - - def porter2_stem_verbose(gb_english = false) - preword = self.porter2_tidy - puts "Preword: #{preword}" - return preword if preword.length <= 2 - - word = preword.porter2_preprocess - puts "Preprocessed: #{word}" - - if SPECIAL_CASES.has_key? word - puts "Returning #{word} as special case #{SPECIAL_CASES[word]}" - SPECIAL_CASES[word] - else - r1 = word.porter2_r1 - r2 = word.porter2_r2 - puts "R1 = #{r1}, R2 = #{r2}" - - w0 = word.step_0 ; puts "After step 0: #{w0} (R1 = #{w0.porter2_r1}, R2 = #{w0.porter2_r2})" - w1a = w0.step_1a ; puts "After step 1a: #{w1a} (R1 = #{w1a.porter2_r1}, R2 = #{w1a.porter2_r2})" - - if STEP_1A_SPECIAL_CASES.include? w1a - puts "Returning #{w1a} as 1a special case" - w1a - else - w1b = w1a.step_1b(gb_english) ; puts "After step 1b: #{w1b} (R1 = #{w1b.porter2_r1}, R2 = #{w1b.porter2_r2})" - w1c = w1b.step_1c ; puts "After step 1c: #{w1c} (R1 = #{w1c.porter2_r1}, R2 = #{w1c.porter2_r2})" - w2 = w1c.step_2(gb_english) ; puts "After step 2: #{w2} (R1 = #{w2.porter2_r1}, R2 = #{w2.porter2_r2})" - w3 = w2.step_3(gb_english) ; puts "After step 3: #{w3} (R1 = #{w3.porter2_r1}, R2 = #{w3.porter2_r2})" - w4 = w3.step_4(gb_english) ; puts "After step 4: #{w4} (R1 = #{w4.porter2_r1}, R2 = #{w4.porter2_r2})" - w5 = w4.step_5 ; puts "After step 5: #{w5}" - wpost = w5.porter2_postprocess ; puts "After postprocess: #{wpost}" - wpost - end - end - end - - alias stem porter2_stem - -end - -# Add stem method to all Strings -class String - include Stemmable - - # private :porter2_preprocess, :porter2_r1, :porter2_r2 -end +# coding: utf-8 + +require 'porter2_module' + +# ==The Porter 2 stemmer +# +# This is the Porter 2 stemming algorithm, as described at +# http://snowball.tartarus.org/algorithms/english/stemmer.html +# The original paper is: +# +# Porter, 1980, "An algorithm for suffix stripping", _Program_, Vol. 14, +# no. 3, pp 130-137 +# +# Constants for the stemmer are in the Porter2 module. +# +# Procedures that implement the stemmer are added to the String class. +# +# The stemmer algorithm is implemented in the porter2_stem procedure. +# +# ==Internationalisation +# There isn't much, as this is a stemmer that only works for English. +# +# The +gb_english+ flag to the various procedures allows the stemmer to treat the British +# English '-ise' the same as the American English '-ize'. +# +# ==Longest suffixes +# Several places in the algorithm require matching the longest suffix of a word. The +# regexp engine in Ruby 1.9 seems to handle alterntives in regexps by finding the +# alternative that matches at the first position in the string. As we're only talking +# about suffixes, that first match is also the longest suffix. If the regexp engine changes, +# this behaviour may change and break the stemmer. + +class String + # Tidy up the word before we get down to the algorithm + def porter2_tidy + preword = self.to_s.strip.downcase + + # map apostrophe-like characters to apostrophes + preword.gsub!(/â/, "'") + preword.gsub!(/â/, "'") + + preword + end + + + # Preprocess the word. + # Remove any initial ', if present. Then, set initial y, or y after a vowel, to Y + # + # (The comment to 'establish the regions R1 and R2' in the original description + # is an implementation optimisation that identifies where the regions start. As + # no modifications are made to the word that affect those positions, you may want + # to cache them now. This implementation doesn't do that.) + def porter2_preprocess + w = self.dup + + # remove any initial apostrophe + w.gsub!(/^'*(.)/, '\1') + + # set initial y, or y after a vowel, to Y + w.gsub!(/^y/, "Y") + w.gsub!(/(#{Porter2::V})y/, '\1Y') + + w + end + + + # R1 is the portion of the word after the first non-vowel after the first vowel + # (with words beginning 'gener-', 'commun-', and 'arsen-' treated as special cases + def porter2_r1 + if self =~ /^(gener|commun|arsen)(?<r1>.*)/ + Regexp.last_match(:r1) + else + self =~ /#{Porter2::V}#{Porter2::C}(?<r1>.*)$/ + Regexp.last_match(:r1) || "" + end + end + + + # R2 is the portion of R1 (porter2_r1) after the first non-vowel after the first vowel + def porter2_r2 + self.porter2_r1 =~ /#{Porter2::V}#{Porter2::C}(?<r2>.*)$/ + Regexp.last_match(:r2) || "" + end + + + # Returns true if the word ends with a short syllable + def porter2_ends_with_short_syllable? + self =~ /#{Porter2::SHORT_SYLLABLE}$/ ? true : false + end + + + # A word is short if it ends in a short syllable, and R1 is null + def porter2_is_short_word? + self.porter2_ends_with_short_syllable? and self.porter2_r1.empty? + end + + + # Search for the longest among the suffixes, + # * ' + # * 's + # * 's' + # and remove if found. + def porter2_step0 + self.sub!(/(.)('s'|'s|')$/, '\1') || self + end + + + # Search for the longest among the following suffixes, and perform the action indicated. + # sses:: replace by ss + # ied, ies:: replace by i if preceded by more than one letter, otherwise by ie + # s:: delete if the preceding word part contains a vowel not immediately before the s + # us, ss:: do nothing + def porter2_step1a + if self =~ /sses$/ + self.sub(/sses$/, 'ss') + elsif self =~ /..(ied|ies)$/ + self.sub(/(ied|ies)$/, 'i') + elsif self =~ /(ied|ies)$/ + self.sub(/(ied|ies)$/, 'ie') + elsif self =~ /(us|ss)$/ + self + elsif self =~ /s$/ + if self =~ /(#{Porter2::V}.+)s$/ + self.sub(/s$/, '') + else + self + end + else + self + end + end + + + # Search for the longest among the following suffixes, and perform the action indicated. + # eed, eedly:: replace by ee if the suffix is also in R1 + # ed, edly, ing, ingly:: delete if the preceding word part contains a vowel and, + # after the deletion: + # * if the word ends at, bl or iz: add e, or + # * if the word ends with a double: remove the last letter, or + # * if the word is short: add e + # + # (If gb_english is +true+, treat the 'is' suffix as 'iz' above.) + def porter2_step1b(gb_english = false) + if self =~ /(eed|eedly)$/ + if self.porter2_r1 =~ /(eed|eedly)$/ + self.sub(/(eed|eedly)$/, 'ee') + else + self + end + else + w = self.dup + if w =~ /#{Porter2::V}.*(ed|edly|ing|ingly)$/ + w.sub!(/(ed|edly|ing|ingly)$/, '') + if w =~ /(at|lb|iz)$/ + w += 'e' + elsif w =~ /is$/ and gb_english + w += 'e' + elsif w =~ /#{Porter2::Double}$/ + w.chop! + elsif w.porter2_is_short_word? + w += 'e' + end + end + w + end + end + + + # Replace a suffix of y or Y by i if it is preceded by a non-vowel which is + # not the first letter of the word. + def porter2_step1c + if self =~ /.+#{Porter2::C}(y|Y)$/ + self.sub(/(y|Y)$/, 'i') + else + self + end + end + + + # Search for the longest among the suffixes listed in the keys of Porter2::STEP_2_MAPS. + # If one is found and that suffix occurs in R1, replace it with the value + # found in STEP_2_MAPS. + # + # (Suffixes 'ogi' and 'li' are treated as special cases in the procedure.) + # + # (If gb_english is +true+, replace the 'iser' and 'isation' suffixes with + # 'ise', similarly to how 'izer' and 'ization' are treated.) + def porter2_step2(gb_english = false) + r1 = self.porter2_r1 + s2m = Porter2::STEP_2_MAPS.dup + if gb_english + s2m["iser"] = "ise" + s2m["isation"] = "ise" + end + step_2_re = Regexp.union(s2m.keys.map {|r| Regexp.new(r + "$")}) + if self =~ step_2_re + if r1 =~ /#{$&}$/ + self.sub(/#{$&}$/, s2m[$&]) + else + self + end + elsif r1 =~ /li$/ and self =~ /(#{Porter2::Valid_LI})li$/ + self.sub(/li$/, '') + elsif r1 =~ /ogi$/ and self =~ /logi$/ + self.sub(/ogi$/, 'og') + else + self + end + end + + + # Search for the longest among the suffixes listed in the keys of Porter2::STEP_3_MAPS. + # If one is found and that suffix occurs in R1, replace it with the value + # found in STEP_3_MAPS. + # + # (Suffix 'ative' is treated as a special case in the procedure.) + # + # (If gb_english is +true+, replace the 'alise' suffix with + # 'al', similarly to how 'alize' is treated.) + def porter2_step3(gb_english = false) + if self =~ /ative$/ and self.porter2_r2 =~ /ative$/ + self.sub(/ative$/, '') + else + s3m = Porter2::STEP_3_MAPS.dup + if gb_english + s3m["alise"] = "al" + end + step_3_re = Regexp.union(s3m.keys.map {|r| Regexp.new(r + "$")}) + r1 = self.porter2_r1 + if self =~ step_3_re and r1 =~ /#{$&}$/ + self.sub(/#{$&}$/, s3m[$&]) + else + self + end + end + end + + + # Search for the longest among the suffixes listed in the keys of Porter2::STEP_4_MAPS. + # If one is found and that suffix occurs in R2, replace it with the value + # found in STEP_4_MAPS. + # + # (Suffix 'ion' is treated as a special case in the procedure.) + # + # (If gb_english is +true+, delete the 'ise' suffix if found.) + def porter2_step4(gb_english = false) + if self.porter2_r2 =~ /ion$/ and self =~ /(s|t)ion$/ + self.sub(/ion$/, '') + else + s4m = Porter2::STEP_4_MAPS.dup + if gb_english + s4m["ise"] = "" + end + step_4_re = Regexp.union(s4m.keys.map {|r| Regexp.new(r + "$")}) + r2 = self.porter2_r2 + if self =~ step_4_re + if r2 =~ /#{$&}/ + self.sub(/#{$&}$/, s4m[$&]) + else + self + end + else + self + end + end + end + + + # Search for the the following suffixes, and, if found, perform the action indicated. + # e:: delete if in R2, or in R1 and not preceded by a short syllable + # l:: delete if in R2 and preceded by l + def porter2_step5 + if self =~ /ll$/ and self.porter2_r2 =~ /l$/ + self.sub(/ll$/, 'l') + elsif self =~ /e$/ and self.porter2_r2 =~ /e$/ + self.sub(/e$/, '') + else + r1 = self.porter2_r1 + if self =~ /e$/ and r1 =~ /e$/ and not self =~ /#{Porter2::SHORT_SYLLABLE}e$/ + self.sub(/e$/, '') + else + self + end + end + end + + + # Turn all Y letters into y + def porter2_postprocess + self.gsub(/Y/, 'y') + end + + public + + # Perform the stemming procedure. If +gb_english+ is true, treat '-ise' and similar suffixes + # as '-ize' in American English. + def porter2_stem(gb_english = false) + preword = self.porter2_tidy + return preword if preword.length <= 2 + + word = preword.porter2_preprocess + + if Porter2::SPECIAL_CASES.has_key? word + Porter2::SPECIAL_CASES[word] + else + w1a = word.porter2_step0.porter2_step1a + if Porter2::STEP_1A_SPECIAL_CASES.include? w1a + w1a + else + w1a.porter2_step1b(gb_english).porter2_step1c.porter2_step2(gb_english).porter2_step3(gb_english).porter2_step4(gb_english).porter2_step5.porter2_postprocess + end + end + end + + # A verbose version of porter2_stem that prints the output of each stage to STDOUT + def porter2_stem_verbose(gb_english = false) + preword = self.porter2_tidy + puts "Preword: #{preword}" + return preword if preword.length <= 2 + + word = preword.porter2_preprocess + puts "Preprocessed: #{word}" + + if Porter2::SPECIAL_CASES.has_key? word + puts "Returning #{word} as special case #{Porter2::SPECIAL_CASES[word]}" + Porter2::SPECIAL_CASES[word] + else + r1 = word.porter2_r1 + r2 = word.porter2_r2 + puts "R1 = #{r1}, R2 = #{r2}" + + w0 = word.porter2_step0 ; puts "After step 0: #{w0} (R1 = #{w0.porter2_r1}, R2 = #{w0.porter2_r2})" + w1a = w0.porter2_step1a ; puts "After step 1a: #{w1a} (R1 = #{w1a.porter2_r1}, R2 = #{w1a.porter2_r2})" + + if Porter2::STEP_1A_SPECIAL_CASES.include? w1a + puts "Returning #{w1a} as 1a special case" + w1a + else + w1b = w1a.porter2_step1b(gb_english) ; puts "After step 1b: #{w1b} (R1 = #{w1b.porter2_r1}, R2 = #{w1b.porter2_r2})" + w1c = w1b.porter2_step1c ; puts "After step 1c: #{w1c} (R1 = #{w1c.porter2_r1}, R2 = #{w1c.porter2_r2})" + w2 = w1c.porter2_step2(gb_english) ; puts "After step 2: #{w2} (R1 = #{w2.porter2_r1}, R2 = #{w2.porter2_r2})" + w3 = w2.porter2_step3(gb_english) ; puts "After step 3: #{w3} (R1 = #{w3.porter2_r1}, R2 = #{w3.porter2_r2})" + w4 = w3.porter2_step4(gb_english) ; puts "After step 4: #{w4} (R1 = #{w4.porter2_r1}, R2 = #{w4.porter2_r2})" + w5 = w4.porter2_step5 ; puts "After step 5: #{w5}" + wpost = w5.porter2_postprocess ; puts "After postprocess: #{wpost}" + wpost + end + end + end + + alias stem porter2_stem + +end + diff --git a/test/tc_porter2_full.rb b/test/tc_porter2_full.rb index 2520dae..982fab2 100644 --- a/test/tc_porter2_full.rb +++ b/test/tc_porter2_full.rb @@ -8,12 +8,9 @@ $:.unshift File.join(File.dirname(__FILE__), "..", "lib") require 'test/unit' require 'porter2' -class String - public :porter2_preprocess, :porter2_r1, :porter2_r2 -end - class TestPorter2 < Test::Unit::TestCase - + +# The full set of test words from http://snowball.tartarus.org/algorithms/english/stemmer.html TEST_WORDS = {"'" => "'", "''" => "''", "'a" => "'a", @@ -29431,6 +29428,7 @@ TEST_WORDS = {"'" => "'", "zossimov" => "zossimov", "zu" => "zu" } +# Test words with -ise suffixes (and similar), to test how British English is stemmed TEST_WORDS_ENGB = { "aggrandisement" => "aggrandis", "agonising" => "agon", "anathematising" => "anathemat", diff --git a/test/tc_porter2_parts.rb b/test/tc_porter2_parts.rb index b38016a..824e2dd 100644 --- a/test/tc_porter2_parts.rb +++ b/test/tc_porter2_parts.rb @@ -3,16 +3,11 @@ # # This file tests each stage of the stemmer individually. - $:.unshift File.join(File.dirname(__FILE__), "..", "lib") require 'test/unit' require 'porter2' -#class String -# public :porter2_preprocess, :porter2_r1, :porter2_r2 -#end - class TestPorter2 < Test::Unit::TestCase def test_tidy @@ -36,10 +31,11 @@ class TestPorter2 < Test::Unit::TestCase assert_equal "abacdeY", "abacdey".porter2_preprocess assert_equal "abaYde", "abayde".porter2_preprocess assert_equal "kabaYde", "kabayde".porter2_preprocess + assert_equal "kabyaYde", "kabyayde".porter2_preprocess assert_equal "'", "'''".porter2_preprocess end - def test_find_R1 + def test_R1 assert_equal "iful", "beautiful".porter2_r1 assert_equal "y", "beauty".porter2_r1 assert_equal "", "beau".porter2_r1 @@ -66,6 +62,15 @@ class TestPorter2 < Test::Unit::TestCase assert_equal "al", "arsenal".porter2_r1 end + def test_R2 + assert_equal "ul", "beautiful".porter2_r2 + assert_equal "", "beauty".porter2_r2 + assert_equal "", "beau".porter2_r2 + assert_equal "adversion", "animadversion".porter2_r2 + assert_equal "", "sprinkled".porter2_r2 + assert_equal "ist", "eucharist".porter2_r2 + end + def test_ends_with_short_syllable? assert_equal true, "rap".porter2_ends_with_short_syllable? assert_equal true, "trap".porter2_ends_with_short_syllable? @@ -93,54 +98,45 @@ class TestPorter2 < Test::Unit::TestCase end end - def test_find_R2 - assert_equal "ul", "beautiful".porter2_r2 - assert_equal "", "beauty".porter2_r2 - assert_equal "", "beau".porter2_r2 - assert_equal "adversion", "animadversion".porter2_r2 - assert_equal "", "sprinkled".porter2_r2 - assert_equal "ist", "eucharist".porter2_r2 - end - def test_step_0 - assert_equal "abac", "abac".step_0 - assert_equal "abac", "abac'".step_0 - assert_equal "abac", "abac's".step_0 - assert_equal "abac", "abac's'".step_0 - assert_equal "ab'c", "ab'c".step_0 - assert_equal "ab'sc", "ab'sc".step_0 - assert_equal "ab's'c", "ab's'c".step_0 - assert_equal "ab'sc", "ab'sc's".step_0 - assert_equal "'", "'".step_0 - assert_equal "'s", "'s".step_0 - assert_equal "'s", "'s'".step_0 + assert_equal "abac", "abac".porter2_step0 + assert_equal "abac", "abac'".porter2_step0 + assert_equal "abac", "abac's".porter2_step0 + assert_equal "abac", "abac's'".porter2_step0 + assert_equal "ab'c", "ab'c".porter2_step0 + assert_equal "ab'sc", "ab'sc".porter2_step0 + assert_equal "ab's'c", "ab's'c".porter2_step0 + assert_equal "ab'sc", "ab'sc's".porter2_step0 + assert_equal "'", "'".porter2_step0 + assert_equal "'s", "'s".porter2_step0 + assert_equal "'s", "'s'".porter2_step0 end def test_step_1a - assert_equal "abacde", "abacde".step_1a - assert_equal "abacess", "abacesses".step_1a - assert_equal "tie", "ties".step_1a - assert_equal "tie", "tied".step_1a - assert_equal "cri", "cries".step_1a - assert_equal "cri", "cried".step_1a - assert_equal "gas", "gas".step_1a - assert_equal "this", "this".step_1a - assert_equal "gap", "gaps".step_1a - assert_equal "kiwi", "kiwis".step_1a - assert_equal "abacus", "abacus".step_1a - assert_equal "abacess", "abacess".step_1a + assert_equal "abacde", "abacde".porter2_step1a + assert_equal "abacess", "abacesses".porter2_step1a + assert_equal "tie", "ties".porter2_step1a + assert_equal "tie", "tied".porter2_step1a + assert_equal "cri", "cries".porter2_step1a + assert_equal "cri", "cried".porter2_step1a + assert_equal "gas", "gas".porter2_step1a + assert_equal "this", "this".porter2_step1a + assert_equal "gap", "gaps".porter2_step1a + assert_equal "kiwi", "kiwis".porter2_step1a + assert_equal "abacus", "abacus".porter2_step1a + assert_equal "abacess", "abacess".porter2_step1a end def test_step_1b - assert_equal "abacde", "abacde".step_1b + assert_equal "abacde", "abacde".porter2_step1b words_non_gb = {"luxuriated" => "luxuriate", "luxuriating" => "luxuriate", "hopping" => "hop", "hopped" => "hop", "hoped" => "hope", "hoping" => "hope", "atomized" => "atomize", "atomised" => "atomis", "addicted" => "addict", "bleed" => "bleed" } words_non_gb.each do |original, stemmed| - assert_equal stemmed, original.step_1b, - "#{original} should have stemmed to #{stemmed} but got #{original.step_1b(original.porter2_r1)} instead" + assert_equal stemmed, original.porter2_step1b, + "#{original} should have stemmed to #{stemmed} but got #{original.porter2_step1b(original.porter2_r1)} instead" end words_gb = {"luxuriated" => "luxuriate", "luxuriating" => "luxuriate", "hopping" => "hop", "hopped" => "hop", @@ -148,156 +144,156 @@ class TestPorter2 < Test::Unit::TestCase "atomized" => "atomize", "atomised" => "atomise", "addicted" => "addict", "bleed" => "bleed" } words_gb.each do |original, stemmed| - assert_equal stemmed, original.step_1b(true), - "#{original} should have stemmed to #{stemmed} but got #{original.step_1b(original.porter2_r1)} instead" + assert_equal stemmed, original.porter2_step1b(true), + "#{original} should have stemmed to #{stemmed} but got #{original.porter2_step1b(original.porter2_r1)} instead" end end def test_step_1c - assert_equal "cri", "cry".step_1c - assert_equal "by", "by".step_1c - assert_equal "saY", "saY".step_1c - assert_equal "abbeY", "abbeY".step_1c + assert_equal "cri", "cry".porter2_step1c + assert_equal "by", "by".porter2_step1c + assert_equal "saY", "saY".porter2_step1c + assert_equal "abbeY", "abbeY".porter2_step1c end def test_step_2 - assert_equal "abac", "abac".step_2 + assert_equal "abac", "abac".porter2_step2 - assert_equal "nationalize", "nationalization".step_2 - assert_equal "nationalisate", "nationalisation".step_2 - assert_equal "nationalize", "nationalization".step_2(true) - assert_equal "nationalise", "nationalisation".step_2(true) + assert_equal "nationalize", "nationalization".porter2_step2 + assert_equal "nationalisate", "nationalisation".porter2_step2 + assert_equal "nationalize", "nationalization".porter2_step2(true) + assert_equal "nationalise", "nationalisation".porter2_step2(true) # Repeat the steps to ensure that the english-gb behaviour isn't sticky - assert_equal "nationalize", "nationalization".step_2(false) - assert_equal "nationalisate", "nationalisation".step_2(false) - assert_equal "nationalize", "nationalization".step_2 - assert_equal "nationalisate", "nationalisation".step_2 + assert_equal "nationalize", "nationalization".porter2_step2(false) + assert_equal "nationalisate", "nationalisation".porter2_step2(false) + assert_equal "nationalize", "nationalization".porter2_step2 + assert_equal "nationalisate", "nationalisation".porter2_step2 - assert_equal "nationalize", "nationalizer".step_2 - assert_equal "nationaliser", "nationaliser".step_2 - assert_equal "nationalize", "nationalizer".step_2(true) - assert_equal "nationalise", "nationaliser".step_2(true) + assert_equal "nationalize", "nationalizer".porter2_step2 + assert_equal "nationaliser", "nationaliser".porter2_step2 + assert_equal "nationalize", "nationalizer".porter2_step2(true) + assert_equal "nationalise", "nationaliser".porter2_step2(true) - assert_equal "abaction", "abactional".step_2 - assert_equal "abacence", "abacenci".step_2 - assert_equal "abacance", "abacanci".step_2 - assert_equal "abacable", "abacabli".step_2 - assert_equal "abacent", "abacentli".step_2 - assert_equal "abacize", "abacizer".step_2 - assert_equal "abacize", "abacization".step_2 - assert_equal "abacate", "abacational".step_2 - assert_equal "abacate", "abacation".step_2 - assert_equal "abacate", "abacator".step_2 - assert_equal "abacal", "abacalism".step_2 - assert_equal "abacal", "abacaliti".step_2 - assert_equal "abacal", "abacalli".step_2 - assert_equal "abacful", "abacfulness".step_2 - assert_equal "abacous", "abacousli".step_2 - assert_equal "abacous", "abacousness".step_2 - assert_equal "abacive", "abaciveness".step_2 - assert_equal "abacive", "abaciviti".step_2 - assert_equal "abiliti", "abiliti".step_2 - assert_equal "abacble", "abacbiliti".step_2 - assert_equal "abacble", "abacbli".step_2 - assert_equal "abacful", "abacfulli".step_2 - assert_equal "abacless", "abaclessli".step_2 - assert_equal "abaclog", "abaclogi".step_2 + assert_equal "abaction", "abactional".porter2_step2 + assert_equal "abacence", "abacenci".porter2_step2 + assert_equal "abacance", "abacanci".porter2_step2 + assert_equal "abacable", "abacabli".porter2_step2 + assert_equal "abacent", "abacentli".porter2_step2 + assert_equal "abacize", "abacizer".porter2_step2 + assert_equal "abacize", "abacization".porter2_step2 + assert_equal "abacate", "abacational".porter2_step2 + assert_equal "abacate", "abacation".porter2_step2 + assert_equal "abacate", "abacator".porter2_step2 + assert_equal "abacal", "abacalism".porter2_step2 + assert_equal "abacal", "abacaliti".porter2_step2 + assert_equal "abacal", "abacalli".porter2_step2 + assert_equal "abacful", "abacfulness".porter2_step2 + assert_equal "abacous", "abacousli".porter2_step2 + assert_equal "abacous", "abacousness".porter2_step2 + assert_equal "abacive", "abaciveness".porter2_step2 + assert_equal "abacive", "abaciviti".porter2_step2 + assert_equal "abiliti", "abiliti".porter2_step2 + assert_equal "abacble", "abacbiliti".porter2_step2 + assert_equal "abacble", "abacbli".porter2_step2 + assert_equal "abacful", "abacfulli".porter2_step2 + assert_equal "abacless", "abaclessli".porter2_step2 + assert_equal "abaclog", "abaclogi".porter2_step2 - assert_equal "abac", "abacli".step_2 - assert_equal "abd", "abdli".step_2 - assert_equal "abe", "abeli".step_2 - assert_equal "abg", "abgli".step_2 - assert_equal "abh", "abhli".step_2 - assert_equal "abk", "abkli".step_2 - assert_equal "abm", "abmli".step_2 - assert_equal "abn", "abnli".step_2 - assert_equal "abr", "abrli".step_2 - assert_equal "abt", "abtli".step_2 - assert_equal "abali", "abali".step_2 + assert_equal "abac", "abacli".porter2_step2 + assert_equal "abd", "abdli".porter2_step2 + assert_equal "abe", "abeli".porter2_step2 + assert_equal "abg", "abgli".porter2_step2 + assert_equal "abh", "abhli".porter2_step2 + assert_equal "abk", "abkli".porter2_step2 + assert_equal "abm", "abmli".porter2_step2 + assert_equal "abn", "abnli".porter2_step2 + assert_equal "abr", "abrli".porter2_step2 + assert_equal "abt", "abtli".porter2_step2 + assert_equal "abali", "abali".porter2_step2 - assert_equal "bad", "badli".step_2 - assert_equal "fluentli", "fluentli".step_2 - assert_equal "geolog", "geologi".step_2 + assert_equal "bad", "badli".porter2_step2 + assert_equal "fluentli", "fluentli".porter2_step2 + assert_equal "geolog", "geologi".porter2_step2 end def test_step_3 - assert_equal "abac", "abac".step_3("") + assert_equal "abac", "abac".porter2_step3("") - assert_equal "national", "nationalize".step_3 - assert_equal "nationalise", "nationalise".step_3 - assert_equal "national", "nationalise".step_3(true) + assert_equal "national", "nationalize".porter2_step3 + assert_equal "nationalise", "nationalise".porter2_step3 + assert_equal "national", "nationalise".porter2_step3(true) # Repeat the steps to ensure that the english-gb behaviour isn't sticky - assert_equal "national", "nationalize".step_3(false) - assert_equal "nationalise", "nationalise".step_3(false) - assert_equal "national", "nationalize".step_3 - assert_equal "nationalise", "nationalise".step_3 + assert_equal "national", "nationalize".porter2_step3(false) + assert_equal "nationalise", "nationalise".porter2_step3(false) + assert_equal "national", "nationalize".porter2_step3 + assert_equal "nationalise", "nationalise".porter2_step3 - assert_equal "abaction", "abactional".step_3 - assert_equal "abacate", "abacational".step_3 - assert_equal "abacic", "abacicate".step_3 - assert_equal "abacic", "abaciciti".step_3 - assert_equal "abacic", "abacical".step_3 - assert_equal "abac", "abacful".step_3 - assert_equal "abac", "abacness".step_3 + assert_equal "abaction", "abactional".porter2_step3 + assert_equal "abacate", "abacational".porter2_step3 + assert_equal "abacic", "abacicate".porter2_step3 + assert_equal "abacic", "abaciciti".porter2_step3 + assert_equal "abacic", "abacical".porter2_step3 + assert_equal "abac", "abacful".porter2_step3 + assert_equal "abac", "abacness".porter2_step3 - assert_equal "abacabac", "abacabacative".step_3 - assert_equal "abacabac", "abacabacative".step_3 + assert_equal "abacabac", "abacabacative".porter2_step3 + assert_equal "abacabac", "abacabacative".porter2_step3 - assert_equal "dryness", "dryness".step_3 + assert_equal "dryness", "dryness".porter2_step3 end def test_step_4 - assert_equal "abac", "abac".step_4("") + assert_equal "abac", "abac".porter2_step4("") - assert_equal "nation", "nationize".step_4 - assert_equal "nationise", "nationise".step_4 - assert_equal "nation", "nationize".step_4(true) - assert_equal "nation", "nationise".step_4(true) - assert_equal "nation", "nationize".step_4(false) - assert_equal "nationise", "nationise".step_4(false) - assert_equal "nation", "nationize".step_4() - assert_equal "nationise", "nationise".step_4() + assert_equal "nation", "nationize".porter2_step4 + assert_equal "nationise", "nationise".porter2_step4 + assert_equal "nation", "nationize".porter2_step4(true) + assert_equal "nation", "nationise".porter2_step4(true) + assert_equal "nation", "nationize".porter2_step4(false) + assert_equal "nationise", "nationise".porter2_step4(false) + assert_equal "nation", "nationize".porter2_step4() + assert_equal "nationise", "nationise".porter2_step4() - assert_equal "abac", "abacal".step_4 - assert_equal "abac", "abacance".step_4 - assert_equal "abac", "abacence".step_4 - assert_equal "abac", "abacer".step_4 - assert_equal "abac", "abacic".step_4 - assert_equal "abacer", "abacerable".step_4 - assert_equal "abac", "abacible".step_4 - assert_equal "abac", "abacant".step_4 - assert_equal "abac", "abacement".step_4 # Check we handle overlapping suffixes properly - assert_equal "abacac", "abacacement".step_4 - assert_equal "abacac", "abacacment".step_4 - assert_equal "abac", "abacment".step_4 - assert_equal "abac", "abacent".step_4 - assert_equal "abac", "abacism".step_4 - assert_equal "abac", "abacate".step_4 - assert_equal "abac", "abaciti".step_4 - assert_equal "abac", "abacous".step_4 - assert_equal "abac", "abacive".step_4 - assert_equal "abac", "abacize".step_4 - assert_equal "abacion", "abacion".step_4 - assert_equal "abacs", "abacsion".step_4 - assert_equal "abact", "abaction".step_4 - assert_equal "abction", "abction".step_4 - assert_equal "ablut", "ablution".step_4 - assert_equal "agreement", "agreement".step_4 + assert_equal "abac", "abacal".porter2_step4 + assert_equal "abac", "abacance".porter2_step4 + assert_equal "abac", "abacence".porter2_step4 + assert_equal "abac", "abacer".porter2_step4 + assert_equal "abac", "abacic".porter2_step4 + assert_equal "abacer", "abacerable".porter2_step4 + assert_equal "abac", "abacible".porter2_step4 + assert_equal "abac", "abacant".porter2_step4 + assert_equal "abac", "abacement".porter2_step4 # Check we handle overlapping suffixes properly + assert_equal "abacac", "abacacement".porter2_step4 + assert_equal "abacac", "abacacment".porter2_step4 + assert_equal "abac", "abacment".porter2_step4 + assert_equal "abac", "abacent".porter2_step4 + assert_equal "abac", "abacism".porter2_step4 + assert_equal "abac", "abacate".porter2_step4 + assert_equal "abac", "abaciti".porter2_step4 + assert_equal "abac", "abacous".porter2_step4 + assert_equal "abac", "abacive".porter2_step4 + assert_equal "abac", "abacize".porter2_step4 + assert_equal "abacion", "abacion".porter2_step4 + assert_equal "abacs", "abacsion".porter2_step4 + assert_equal "abact", "abaction".porter2_step4 + assert_equal "abction", "abction".porter2_step4 + assert_equal "ablut", "ablution".porter2_step4 + assert_equal "agreement", "agreement".porter2_step4 - assert_equal "abcal", "abcal".step_4 # No removal if suffix isn't in R2 + assert_equal "abcal", "abcal".porter2_step4 # No removal if suffix isn't in R2 end def test_step_5 - assert_equal "abac", "abac".step_5 + assert_equal "abac", "abac".porter2_step5 - assert_equal "abacl", "abacll".step_5 - assert_equal "abcll", "abcll".step_5 + assert_equal "abacl", "abacll".porter2_step5 + assert_equal "abcll", "abcll".porter2_step5 - assert_equal "abc", "abc".step_5 - assert_equal "abl", "able".step_5 - assert_equal "abe", "abe".step_5 - assert_equal "abac", "abace".step_5 - assert_equal "bawac", "bawace".step_5 + assert_equal "abc", "abc".porter2_step5 + assert_equal "abl", "able".porter2_step5 + assert_equal "abe", "abe".porter2_step5 + assert_equal "abac", "abace".porter2_step5 + assert_equal "bawac", "bawace".porter2_step5 end def test_porter2_postprocess