Tidied up the gem requirements and fixed the use of Bundler
[porter2stemmer.git] / rdoc / String.html
1 <?xml version="1.0" encoding="utf-8"?>
2 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
3 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
4 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
5 <head>
6 <meta content="text/html; charset=utf-8" http-equiv="Content-Type" />
7
8 <title>Class: String</title>
9
10 <link rel="stylesheet" href="./rdoc.css" type="text/css" media="screen" />
11
12 <script src="./js/jquery.js" type="text/javascript"
13 charset="utf-8"></script>
14 <script src="./js/thickbox-compressed.js" type="text/javascript"
15 charset="utf-8"></script>
16 <script src="./js/quicksearch.js" type="text/javascript"
17 charset="utf-8"></script>
18 <script src="./js/darkfish.js" type="text/javascript"
19 charset="utf-8"></script>
20
21 </head>
22 <body class="class">
23
24 <div id="metadata">
25 <div id="home-metadata">
26 <div id="home-section" class="section">
27 <h3 class="section-header">
28 <a href="./index.html">Home</a>
29 <a href="./index.html#classes">Classes</a>
30 <a href="./index.html#methods">Methods</a>
31 </h3>
32 </div>
33 </div>
34
35 <div id="file-metadata">
36 <div id="file-list-section" class="section">
37 <h3 class="section-header">In Files</h3>
38 <div class="section-body">
39 <ul>
40
41 <li><a href="./lib/porter2stemmer/implementation_rb.html?TB_iframe=true&amp;height=550&amp;width=785"
42 class="thickbox" title="lib/porter2stemmer/implementation.rb">lib/porter2stemmer/implementation.rb</a></li>
43
44 </ul>
45 </div>
46 </div>
47
48
49 </div>
50
51 <div id="class-metadata">
52
53 <!-- Parent Class -->
54
55 <div id="parent-class-section" class="section">
56 <h3 class="section-header">Parent</h3>
57
58 <p class="link">Object</p>
59
60 </div>
61
62
63 <!-- Namespace Contents -->
64
65
66 <!-- Method Quickref -->
67
68 <div id="method-list-section" class="section">
69 <h3 class="section-header">Methods</h3>
70 <ul class="link-list">
71
72 <li><a href="#method-i-porter2_ends_with_short_syllable%3F">#porter2_ends_with_short_syllable?</a></li>
73
74 <li><a href="#method-i-porter2_is_short_word%3F">#porter2_is_short_word?</a></li>
75
76 <li><a href="#method-i-porter2_postprocess">#porter2_postprocess</a></li>
77
78 <li><a href="#method-i-porter2_preprocess">#porter2_preprocess</a></li>
79
80 <li><a href="#method-i-porter2_r1">#porter2_r1</a></li>
81
82 <li><a href="#method-i-porter2_r2">#porter2_r2</a></li>
83
84 <li><a href="#method-i-porter2_stem">#porter2_stem</a></li>
85
86 <li><a href="#method-i-porter2_stem_verbose">#porter2_stem_verbose</a></li>
87
88 <li><a href="#method-i-porter2_step0">#porter2_step0</a></li>
89
90 <li><a href="#method-i-porter2_step1a">#porter2_step1a</a></li>
91
92 <li><a href="#method-i-porter2_step1b">#porter2_step1b</a></li>
93
94 <li><a href="#method-i-porter2_step1c">#porter2_step1c</a></li>
95
96 <li><a href="#method-i-porter2_step2">#porter2_step2</a></li>
97
98 <li><a href="#method-i-porter2_step3">#porter2_step3</a></li>
99
100 <li><a href="#method-i-porter2_step4">#porter2_step4</a></li>
101
102 <li><a href="#method-i-porter2_step5">#porter2_step5</a></li>
103
104 <li><a href="#method-i-porter2_tidy">#porter2_tidy</a></li>
105
106 <li><a href="#method-i-stem">#stem</a></li>
107
108 </ul>
109 </div>
110
111
112 <!-- Included Modules -->
113
114 </div>
115
116 <div id="project-metadata">
117
118
119 <div id="fileindex-section" class="section project-section">
120 <h3 class="section-header">Files</h3>
121 <ul>
122
123 <li class="file"><a href="./README_rdoc.html">README.rdoc</a></li>
124
125 </ul>
126 </div>
127
128
129 <div id="classindex-section" class="section project-section">
130 <h3 class="section-header">Class Index
131 <span class="search-toggle"><img src="./images/find.png"
132 height="16" width="16" alt="[+]"
133 title="show/hide quicksearch" /></span></h3>
134 <form action="#" method="get" accept-charset="utf-8" class="initially-hidden">
135 <fieldset>
136 <legend>Quicksearch</legend>
137 <input type="text" name="quicksearch" value=""
138 class="quicksearch-field" />
139 </fieldset>
140 </form>
141
142 <ul class="link-list">
143
144 <li><a href="./Porter2.html">Porter2</a></li>
145
146 <li><a href="./String.html">String</a></li>
147
148 </ul>
149 <div id="no-class-search-results" style="display: none;">No matching classes.</div>
150 </div>
151
152
153 </div>
154 </div>
155
156 <div id="documentation">
157 <h1 class="class">String</h1>
158
159 <div id="description">
160 <p>
161 Implementation of the Porter 2 stemmer. <a
162 href="String.html#method-i-porter2_stem">String#porter2_stem</a> is the
163 main stemming procedure.
164 </p>
165
166 </div>
167
168 <!-- Constants -->
169
170
171 <!-- Attributes -->
172
173
174 <!-- Methods -->
175
176 <div id="public-instance-method-details" class="method-section section">
177 <h3 class="section-header">Public Instance Methods</h3>
178
179
180 <div id="porter-ends-with-short-syllable--method" class="method-detail ">
181 <a name="method-i-porter2_ends_with_short_syllable%3F"></a>
182
183 <div class="method-heading">
184
185 <span class="method-name">porter2_ends_with_short_syllable?</span><span
186 class="method-args">()</span>
187 <span class="method-click-advice">click to toggle source</span>
188
189 </div>
190
191 <div class="method-description">
192
193 <p>
194 Returns true if the word ends with a short syllable
195 </p>
196
197
198
199 <div class="method-source-code"
200 id="porter-ends-with-short-syllable--source">
201 <pre>
202 <span class="ruby-comment cmt"># File lib/porter2stemmer/implementation.rb, line 59</span>
203 59: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_ends_with_short_syllable?</span>
204 60: <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-node">/#{Porter2::SHORT_SYLLABLE}$/</span> <span class="ruby-operator">?</span> <span class="ruby-keyword kw">true</span> <span class="ruby-operator">:</span> <span class="ruby-keyword kw">false</span>
205 61: <span class="ruby-keyword kw">end</span></pre>
206 </div>
207
208 </div>
209
210
211
212
213 </div>
214
215
216 <div id="porter-is-short-word--method" class="method-detail ">
217 <a name="method-i-porter2_is_short_word%3F"></a>
218
219 <div class="method-heading">
220
221 <span class="method-name">porter2_is_short_word?</span><span
222 class="method-args">()</span>
223 <span class="method-click-advice">click to toggle source</span>
224
225 </div>
226
227 <div class="method-description">
228
229 <p>
230 A word is short if it ends in a short syllable, and R1 is null
231 </p>
232
233
234
235 <div class="method-source-code"
236 id="porter-is-short-word--source">
237 <pre>
238 <span class="ruby-comment cmt"># File lib/porter2stemmer/implementation.rb, line 65</span>
239 65: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_is_short_word?</span>
240 66: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_ends_with_short_syllable?</span> <span class="ruby-keyword kw">and</span> <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_r1</span>.<span class="ruby-identifier">empty?</span>
241 67: <span class="ruby-keyword kw">end</span></pre>
242 </div>
243
244 </div>
245
246
247
248
249 </div>
250
251
252 <div id="porter-postprocess-method" class="method-detail ">
253 <a name="method-i-porter2_postprocess"></a>
254
255 <div class="method-heading">
256
257 <span class="method-name">porter2_postprocess</span><span
258 class="method-args">()</span>
259 <span class="method-click-advice">click to toggle source</span>
260
261 </div>
262
263 <div class="method-description">
264
265 <p>
266 Turn all Y letters into y
267 </p>
268
269
270
271 <div class="method-source-code"
272 id="porter-postprocess-source">
273 <pre>
274 <span class="ruby-comment cmt"># File lib/porter2stemmer/implementation.rb, line 261</span>
275 261: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_postprocess</span>
276 262: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">gsub</span>(<span class="ruby-regexp re">/Y/</span>, <span class="ruby-value str">'y'</span>)
277 263: <span class="ruby-keyword kw">end</span></pre>
278 </div>
279
280 </div>
281
282
283
284
285 </div>
286
287
288 <div id="porter-preprocess-method" class="method-detail ">
289 <a name="method-i-porter2_preprocess"></a>
290
291 <div class="method-heading">
292
293 <span class="method-name">porter2_preprocess</span><span
294 class="method-args">()</span>
295 <span class="method-click-advice">click to toggle source</span>
296
297 </div>
298
299 <div class="method-description">
300
301 <p>
302 Preprocess the word. Remove any initial &#8217;, if present. Then, set
303 initial y, or y after a vowel, to Y
304 </p>
305 <p>
306 (The comment to &#8216;establish the regions R1 and R2&#8217; in the
307 original description is an implementation optimisation that identifies
308 where the regions start. As no modifications are made to the word that
309 affect those positions, you may want to cache them now. This implementation
310 doesn&#8217;t do that.)
311 </p>
312
313
314
315 <div class="method-source-code"
316 id="porter-preprocess-source">
317 <pre>
318 <span class="ruby-comment cmt"># File lib/porter2stemmer/implementation.rb, line 25</span>
319 25: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_preprocess</span>
320 26: <span class="ruby-identifier">w</span> = <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">dup</span>
321 27:
322 28: <span class="ruby-comment cmt"># remove any initial apostrophe</span>
323 29: <span class="ruby-identifier">w</span>.<span class="ruby-identifier">gsub!</span>(<span class="ruby-regexp re">/^'*(.)/</span>, <span class="ruby-value str">'\1'</span>)
324 30:
325 31: <span class="ruby-comment cmt"># set initial y, or y after a vowel, to Y</span>
326 32: <span class="ruby-identifier">w</span>.<span class="ruby-identifier">gsub!</span>(<span class="ruby-regexp re">/^y/</span>, <span class="ruby-value str">&quot;Y&quot;</span>)
327 33: <span class="ruby-identifier">w</span>.<span class="ruby-identifier">gsub!</span>(<span class="ruby-node">/(#{Porter2::V})y/</span>, <span class="ruby-value str">'\1Y'</span>)
328 34:
329 35: <span class="ruby-identifier">w</span>
330 36: <span class="ruby-keyword kw">end</span></pre>
331 </div>
332
333 </div>
334
335
336
337
338 </div>
339
340
341 <div id="porter-r--method" class="method-detail ">
342 <a name="method-i-porter2_r1"></a>
343
344 <div class="method-heading">
345
346 <span class="method-name">porter2_r1</span><span
347 class="method-args">()</span>
348 <span class="method-click-advice">click to toggle source</span>
349
350 </div>
351
352 <div class="method-description">
353
354 <p>
355 R1 is the portion of the word after the first non-vowel after the first
356 vowel (with words beginning &#8216;gener-&#8217;, &#8216;commun-&#8217;,
357 and &#8216;arsen-&#8217; treated as special cases
358 </p>
359
360
361
362 <div class="method-source-code"
363 id="porter-r--source">
364 <pre>
365 <span class="ruby-comment cmt"># File lib/porter2stemmer/implementation.rb, line 41</span>
366 41: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_r1</span>
367 42: <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/^(gener|commun|arsen)(?&lt;r1&gt;.*)/</span>
368 43: <span class="ruby-constant">Regexp</span>.<span class="ruby-identifier">last_match</span>(<span class="ruby-value">:r1</span>)
369 44: <span class="ruby-keyword kw">else</span>
370 45: <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-node">/#{Porter2::V}#{Porter2::C}(?&lt;r1&gt;.*)$/</span>
371 46: <span class="ruby-constant">Regexp</span>.<span class="ruby-identifier">last_match</span>(<span class="ruby-value">:r1</span>) <span class="ruby-operator">||</span> <span class="ruby-value str">&quot;&quot;</span>
372 47: <span class="ruby-keyword kw">end</span>
373 48: <span class="ruby-keyword kw">end</span></pre>
374 </div>
375
376 </div>
377
378
379
380
381 </div>
382
383
384 <div id="porter-r--method" class="method-detail ">
385 <a name="method-i-porter2_r2"></a>
386
387 <div class="method-heading">
388
389 <span class="method-name">porter2_r2</span><span
390 class="method-args">()</span>
391 <span class="method-click-advice">click to toggle source</span>
392
393 </div>
394
395 <div class="method-description">
396
397 <p>
398 R2 is the portion of R1 (<a
399 href="String.html#method-i-porter2_r1">porter2_r1</a>) after the first
400 non-vowel after the first vowel
401 </p>
402
403
404
405 <div class="method-source-code"
406 id="porter-r--source">
407 <pre>
408 <span class="ruby-comment cmt"># File lib/porter2stemmer/implementation.rb, line 52</span>
409 52: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_r2</span>
410 53: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_r1</span> <span class="ruby-operator">=~</span> <span class="ruby-node">/#{Porter2::V}#{Porter2::C}(?&lt;r2&gt;.*)$/</span>
411 54: <span class="ruby-constant">Regexp</span>.<span class="ruby-identifier">last_match</span>(<span class="ruby-value">:r2</span>) <span class="ruby-operator">||</span> <span class="ruby-value str">&quot;&quot;</span>
412 55: <span class="ruby-keyword kw">end</span></pre>
413 </div>
414
415 </div>
416
417
418
419
420 </div>
421
422
423 <div id="porter-stem-method" class="method-detail ">
424 <a name="method-i-porter2_stem"></a>
425
426 <div class="method-heading">
427
428 <span class="method-name">porter2_stem</span><span
429 class="method-args">(gb_english = false)</span>
430 <span class="method-click-advice">click to toggle source</span>
431
432 </div>
433
434 <div class="method-description">
435
436 <p>
437 Perform the stemming procedure. If <tt>gb_english</tt> is true, treat
438 &#8217;-ise&#8217; and similar suffixes as &#8217;-ize&#8217; in American
439 English.
440 </p>
441
442
443
444 <div class="method-source-code"
445 id="porter-stem-source">
446 <pre>
447 <span class="ruby-comment cmt"># File lib/porter2stemmer/implementation.rb, line 269</span>
448 269: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_stem</span>(<span class="ruby-identifier">gb_english</span> = <span class="ruby-keyword kw">false</span>)
449 270: <span class="ruby-identifier">preword</span> = <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_tidy</span>
450 271: <span class="ruby-keyword kw">return</span> <span class="ruby-identifier">preword</span> <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">preword</span>.<span class="ruby-identifier">length</span> <span class="ruby-operator">&lt;=</span> <span class="ruby-value">2</span>
451 272:
452 273: <span class="ruby-identifier">word</span> = <span class="ruby-identifier">preword</span>.<span class="ruby-identifier">porter2_preprocess</span>
453 274:
454 275: <span class="ruby-keyword kw">if</span> <span class="ruby-constant">Porter2</span><span class="ruby-operator">::</span><span class="ruby-constant">SPECIAL_CASES</span>.<span class="ruby-identifier">has_key?</span> <span class="ruby-identifier">word</span>
455 276: <span class="ruby-constant">Porter2</span><span class="ruby-operator">::</span><span class="ruby-constant">SPECIAL_CASES</span>[<span class="ruby-identifier">word</span>]
456 277: <span class="ruby-keyword kw">else</span>
457 278: <span class="ruby-identifier">w1a</span> = <span class="ruby-identifier">word</span>.<span class="ruby-identifier">porter2_step0</span>.<span class="ruby-identifier">porter2_step1a</span>
458 279: <span class="ruby-keyword kw">if</span> <span class="ruby-constant">Porter2</span><span class="ruby-operator">::</span><span class="ruby-constant">STEP_1A_SPECIAL_CASES</span>.<span class="ruby-identifier">include?</span> <span class="ruby-identifier">w1a</span>
459 280: <span class="ruby-identifier">w1a</span>
460 281: <span class="ruby-keyword kw">else</span>
461 282: <span class="ruby-identifier">w1a</span>.<span class="ruby-identifier">porter2_step1b</span>(<span class="ruby-identifier">gb_english</span>).<span class="ruby-identifier">porter2_step1c</span>.<span class="ruby-identifier">porter2_step2</span>(<span class="ruby-identifier">gb_english</span>).<span class="ruby-identifier">porter2_step3</span>(<span class="ruby-identifier">gb_english</span>).<span class="ruby-identifier">porter2_step4</span>(<span class="ruby-identifier">gb_english</span>).<span class="ruby-identifier">porter2_step5</span>.<span class="ruby-identifier">porter2_postprocess</span>
462 283: <span class="ruby-keyword kw">end</span>
463 284: <span class="ruby-keyword kw">end</span>
464 285: <span class="ruby-keyword kw">end</span></pre>
465 </div>
466
467 </div>
468
469
470 <div class="aliases">
471 Also aliased as: <a href="String.html#method-i-stem">stem</a>
472 </div>
473
474
475
476 </div>
477
478
479 <div id="porter-stem-verbose-method" class="method-detail ">
480 <a name="method-i-porter2_stem_verbose"></a>
481
482 <div class="method-heading">
483
484 <span class="method-name">porter2_stem_verbose</span><span
485 class="method-args">(gb_english = false)</span>
486 <span class="method-click-advice">click to toggle source</span>
487
488 </div>
489
490 <div class="method-description">
491
492 <p>
493 A verbose version of <a
494 href="String.html#method-i-porter2_stem">porter2_stem</a> that prints the
495 output of each stage to STDOUT
496 </p>
497
498
499
500 <div class="method-source-code"
501 id="porter-stem-verbose-source">
502 <pre>
503 <span class="ruby-comment cmt"># File lib/porter2stemmer/implementation.rb, line 288</span>
504 288: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_stem_verbose</span>(<span class="ruby-identifier">gb_english</span> = <span class="ruby-keyword kw">false</span>)
505 289: <span class="ruby-identifier">preword</span> = <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_tidy</span>
506 290: <span class="ruby-identifier">puts</span> <span class="ruby-node">&quot;Preword: #{preword}&quot;</span>
507 291: <span class="ruby-keyword kw">return</span> <span class="ruby-identifier">preword</span> <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">preword</span>.<span class="ruby-identifier">length</span> <span class="ruby-operator">&lt;=</span> <span class="ruby-value">2</span>
508 292:
509 293: <span class="ruby-identifier">word</span> = <span class="ruby-identifier">preword</span>.<span class="ruby-identifier">porter2_preprocess</span>
510 294: <span class="ruby-identifier">puts</span> <span class="ruby-node">&quot;Preprocessed: #{word}&quot;</span>
511 295:
512 296: <span class="ruby-keyword kw">if</span> <span class="ruby-constant">Porter2</span><span class="ruby-operator">::</span><span class="ruby-constant">SPECIAL_CASES</span>.<span class="ruby-identifier">has_key?</span> <span class="ruby-identifier">word</span>
513 297: <span class="ruby-identifier">puts</span> <span class="ruby-node">&quot;Returning #{word} as special case #{Porter2::SPECIAL_CASES[word]}&quot;</span>
514 298: <span class="ruby-constant">Porter2</span><span class="ruby-operator">::</span><span class="ruby-constant">SPECIAL_CASES</span>[<span class="ruby-identifier">word</span>]
515 299: <span class="ruby-keyword kw">else</span>
516 300: <span class="ruby-identifier">r1</span> = <span class="ruby-identifier">word</span>.<span class="ruby-identifier">porter2_r1</span>
517 301: <span class="ruby-identifier">r2</span> = <span class="ruby-identifier">word</span>.<span class="ruby-identifier">porter2_r2</span>
518 302: <span class="ruby-identifier">puts</span> <span class="ruby-node">&quot;R1 = #{r1}, R2 = #{r2}&quot;</span>
519 303:
520 304: <span class="ruby-identifier">w0</span> = <span class="ruby-identifier">word</span>.<span class="ruby-identifier">porter2_step0</span> ; <span class="ruby-identifier">puts</span> <span class="ruby-node">&quot;After step 0: #{w0} (R1 = #{w0.porter2_r1}, R2 = #{w0.porter2_r2})&quot;</span>
521 305: <span class="ruby-identifier">w1a</span> = <span class="ruby-identifier">w0</span>.<span class="ruby-identifier">porter2_step1a</span> ; <span class="ruby-identifier">puts</span> <span class="ruby-node">&quot;After step 1a: #{w1a} (R1 = #{w1a.porter2_r1}, R2 = #{w1a.porter2_r2})&quot;</span>
522 306:
523 307: <span class="ruby-keyword kw">if</span> <span class="ruby-constant">Porter2</span><span class="ruby-operator">::</span><span class="ruby-constant">STEP_1A_SPECIAL_CASES</span>.<span class="ruby-identifier">include?</span> <span class="ruby-identifier">w1a</span>
524 308: <span class="ruby-identifier">puts</span> <span class="ruby-node">&quot;Returning #{w1a} as 1a special case&quot;</span>
525 309: <span class="ruby-identifier">w1a</span>
526 310: <span class="ruby-keyword kw">else</span>
527 311: <span class="ruby-identifier">w1b</span> = <span class="ruby-identifier">w1a</span>.<span class="ruby-identifier">porter2_step1b</span>(<span class="ruby-identifier">gb_english</span>) ; <span class="ruby-identifier">puts</span> <span class="ruby-node">&quot;After step 1b: #{w1b} (R1 = #{w1b.porter2_r1}, R2 = #{w1b.porter2_r2})&quot;</span>
528 312: <span class="ruby-identifier">w1c</span> = <span class="ruby-identifier">w1b</span>.<span class="ruby-identifier">porter2_step1c</span> ; <span class="ruby-identifier">puts</span> <span class="ruby-node">&quot;After step 1c: #{w1c} (R1 = #{w1c.porter2_r1}, R2 = #{w1c.porter2_r2})&quot;</span>
529 313: <span class="ruby-identifier">w2</span> = <span class="ruby-identifier">w1c</span>.<span class="ruby-identifier">porter2_step2</span>(<span class="ruby-identifier">gb_english</span>) ; <span class="ruby-identifier">puts</span> <span class="ruby-node">&quot;After step 2: #{w2} (R1 = #{w2.porter2_r1}, R2 = #{w2.porter2_r2})&quot;</span>
530 314: <span class="ruby-identifier">w3</span> = <span class="ruby-identifier">w2</span>.<span class="ruby-identifier">porter2_step3</span>(<span class="ruby-identifier">gb_english</span>) ; <span class="ruby-identifier">puts</span> <span class="ruby-node">&quot;After step 3: #{w3} (R1 = #{w3.porter2_r1}, R2 = #{w3.porter2_r2})&quot;</span>
531 315: <span class="ruby-identifier">w4</span> = <span class="ruby-identifier">w3</span>.<span class="ruby-identifier">porter2_step4</span>(<span class="ruby-identifier">gb_english</span>) ; <span class="ruby-identifier">puts</span> <span class="ruby-node">&quot;After step 4: #{w4} (R1 = #{w4.porter2_r1}, R2 = #{w4.porter2_r2})&quot;</span>
532 316: <span class="ruby-identifier">w5</span> = <span class="ruby-identifier">w4</span>.<span class="ruby-identifier">porter2_step5</span> ; <span class="ruby-identifier">puts</span> <span class="ruby-node">&quot;After step 5: #{w5}&quot;</span>
533 317: <span class="ruby-identifier">wpost</span> = <span class="ruby-identifier">w5</span>.<span class="ruby-identifier">porter2_postprocess</span> ; <span class="ruby-identifier">puts</span> <span class="ruby-node">&quot;After postprocess: #{wpost}&quot;</span>
534 318: <span class="ruby-identifier">wpost</span>
535 319: <span class="ruby-keyword kw">end</span>
536 320: <span class="ruby-keyword kw">end</span>
537 321: <span class="ruby-keyword kw">end</span></pre>
538 </div>
539
540 </div>
541
542
543
544
545 </div>
546
547
548 <div id="porter-step--method" class="method-detail ">
549 <a name="method-i-porter2_step0"></a>
550
551 <div class="method-heading">
552
553 <span class="method-name">porter2_step0</span><span
554 class="method-args">()</span>
555 <span class="method-click-advice">click to toggle source</span>
556
557 </div>
558
559 <div class="method-description">
560
561 <p>
562 Search for the longest among the suffixes,
563 </p>
564 <ul>
565 <li><p>
566 &#8216;
567 </p>
568 </li>
569 <li><p>
570 &#8217;s
571 </p>
572 </li>
573 <li><p>
574 &#8217;s&#8217;
575 </p>
576 </li>
577 </ul>
578 <p>
579 and remove if found.
580 </p>
581
582
583
584 <div class="method-source-code"
585 id="porter-step--source">
586 <pre>
587 <span class="ruby-comment cmt"># File lib/porter2stemmer/implementation.rb, line 75</span>
588 75: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_step0</span>
589 76: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub!</span>(<span class="ruby-regexp re">/(.)('s'|'s|')$/</span>, <span class="ruby-value str">'\1'</span>) <span class="ruby-operator">||</span> <span class="ruby-keyword kw">self</span>
590 77: <span class="ruby-keyword kw">end</span></pre>
591 </div>
592
593 </div>
594
595
596
597
598 </div>
599
600
601 <div id="porter-step-a-method" class="method-detail ">
602 <a name="method-i-porter2_step1a"></a>
603
604 <div class="method-heading">
605
606 <span class="method-name">porter2_step1a</span><span
607 class="method-args">()</span>
608 <span class="method-click-advice">click to toggle source</span>
609
610 </div>
611
612 <div class="method-description">
613
614 <p>
615 Search for the longest among the following suffixes, and perform the action
616 indicated.
617 </p>
618 <table>
619 <tr><td valign="top">sses</td><td><p>
620 replace by ss
621 </p>
622 </td></tr>
623 <tr><td valign="top">ied, ies</td><td><p>
624 replace by i if preceded by more than one letter, otherwise by ie
625 </p>
626 </td></tr>
627 <tr><td valign="top">s</td><td><p>
628 delete if the preceding word part contains a vowel not immediately before
629 the s
630 </p>
631 </td></tr>
632 <tr><td valign="top">us, ss</td><td><p>
633 do nothing
634 </p>
635 </td></tr>
636 </table>
637
638
639
640 <div class="method-source-code"
641 id="porter-step-a-source">
642 <pre>
643 <span class="ruby-comment cmt"># File lib/porter2stemmer/implementation.rb, line 85</span>
644 85: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_step1a</span>
645 86: <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/sses$/</span>
646 87: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/sses$/</span>, <span class="ruby-value str">'ss'</span>)
647 88: <span class="ruby-keyword kw">elsif</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/..(ied|ies)$/</span>
648 89: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/(ied|ies)$/</span>, <span class="ruby-value str">'i'</span>)
649 90: <span class="ruby-keyword kw">elsif</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/(ied|ies)$/</span>
650 91: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/(ied|ies)$/</span>, <span class="ruby-value str">'ie'</span>)
651 92: <span class="ruby-keyword kw">elsif</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/(us|ss)$/</span>
652 93: <span class="ruby-keyword kw">self</span>
653 94: <span class="ruby-keyword kw">elsif</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/s$/</span>
654 95: <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-node">/(#{Porter2::V}.+)s$/</span>
655 96: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/s$/</span>, <span class="ruby-value str">''</span>)
656 97: <span class="ruby-keyword kw">else</span>
657 98: <span class="ruby-keyword kw">self</span>
658 99: <span class="ruby-keyword kw">end</span>
659 100: <span class="ruby-keyword kw">else</span>
660 101: <span class="ruby-keyword kw">self</span>
661 102: <span class="ruby-keyword kw">end</span>
662 103: <span class="ruby-keyword kw">end</span></pre>
663 </div>
664
665 </div>
666
667
668
669
670 </div>
671
672
673 <div id="porter-step-b-method" class="method-detail ">
674 <a name="method-i-porter2_step1b"></a>
675
676 <div class="method-heading">
677
678 <span class="method-name">porter2_step1b</span><span
679 class="method-args">(gb_english = false)</span>
680 <span class="method-click-advice">click to toggle source</span>
681
682 </div>
683
684 <div class="method-description">
685
686 <p>
687 Search for the longest among the following suffixes, and perform the action
688 indicated.
689 </p>
690 <table>
691 <tr><td valign="top">eed, eedly</td><td><p>
692 replace by ee if the suffix is also in R1
693 </p>
694 </td></tr>
695 <tr><td valign="top">ed, edly, ing, ingly</td><td><p>
696 delete if the preceding word part contains a vowel and, after the
697 deletion:
698 </p>
699 <ul>
700 <li><p>
701 if the word ends at, bl or iz: add e, or
702 </p>
703 </li>
704 </ul>
705 <ul>
706 <li><p>
707 if the word ends with a double: remove the last letter, or
708 </p>
709 </li>
710 </ul>
711 <ul>
712 <li><p>
713 if the word is short: add e
714 </p>
715 </li>
716 </ul>
717 </td></tr>
718 </table>
719 <p>
720 (If gb_english is <tt>true</tt>, treat the &#8216;is&#8217; suffix as
721 &#8216;iz&#8217; above.)
722 </p>
723
724
725
726 <div class="method-source-code"
727 id="porter-step-b-source">
728 <pre>
729 <span class="ruby-comment cmt"># File lib/porter2stemmer/implementation.rb, line 115</span>
730 115: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_step1b</span>(<span class="ruby-identifier">gb_english</span> = <span class="ruby-keyword kw">false</span>)
731 116: <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/(eed|eedly)$/</span>
732 117: <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_r1</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/(eed|eedly)$/</span>
733 118: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/(eed|eedly)$/</span>, <span class="ruby-value str">'ee'</span>)
734 119: <span class="ruby-keyword kw">else</span>
735 120: <span class="ruby-keyword kw">self</span>
736 121: <span class="ruby-keyword kw">end</span>
737 122: <span class="ruby-keyword kw">else</span>
738 123: <span class="ruby-identifier">w</span> = <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">dup</span>
739 124: <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">w</span> <span class="ruby-operator">=~</span> <span class="ruby-node">/#{Porter2::V}.*(ed|edly|ing|ingly)$/</span>
740 125: <span class="ruby-identifier">w</span>.<span class="ruby-identifier">sub!</span>(<span class="ruby-regexp re">/(ed|edly|ing|ingly)$/</span>, <span class="ruby-value str">''</span>)
741 126: <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">w</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/(at|lb|iz)$/</span>
742 127: <span class="ruby-identifier">w</span> <span class="ruby-operator">+=</span> <span class="ruby-value str">'e'</span>
743 128: <span class="ruby-keyword kw">elsif</span> <span class="ruby-identifier">w</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/is$/</span> <span class="ruby-keyword kw">and</span> <span class="ruby-identifier">gb_english</span>
744 129: <span class="ruby-identifier">w</span> <span class="ruby-operator">+=</span> <span class="ruby-value str">'e'</span>
745 130: <span class="ruby-keyword kw">elsif</span> <span class="ruby-identifier">w</span> <span class="ruby-operator">=~</span> <span class="ruby-node">/#{Porter2::Double}$/</span>
746 131: <span class="ruby-identifier">w</span>.<span class="ruby-identifier">chop!</span>
747 132: <span class="ruby-keyword kw">elsif</span> <span class="ruby-identifier">w</span>.<span class="ruby-identifier">porter2_is_short_word?</span>
748 133: <span class="ruby-identifier">w</span> <span class="ruby-operator">+=</span> <span class="ruby-value str">'e'</span>
749 134: <span class="ruby-keyword kw">end</span>
750 135: <span class="ruby-keyword kw">end</span>
751 136: <span class="ruby-identifier">w</span>
752 137: <span class="ruby-keyword kw">end</span>
753 138: <span class="ruby-keyword kw">end</span></pre>
754 </div>
755
756 </div>
757
758
759
760
761 </div>
762
763
764 <div id="porter-step-c-method" class="method-detail ">
765 <a name="method-i-porter2_step1c"></a>
766
767 <div class="method-heading">
768
769 <span class="method-name">porter2_step1c</span><span
770 class="method-args">()</span>
771 <span class="method-click-advice">click to toggle source</span>
772
773 </div>
774
775 <div class="method-description">
776
777 <p>
778 Replace a suffix of y or Y by i if it is preceded by a non-vowel which is
779 not the first letter of the word.
780 </p>
781
782
783
784 <div class="method-source-code"
785 id="porter-step-c-source">
786 <pre>
787 <span class="ruby-comment cmt"># File lib/porter2stemmer/implementation.rb, line 143</span>
788 143: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_step1c</span>
789 144: <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-node">/.+#{Porter2::C}(y|Y)$/</span>
790 145: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/(y|Y)$/</span>, <span class="ruby-value str">'i'</span>)
791 146: <span class="ruby-keyword kw">else</span>
792 147: <span class="ruby-keyword kw">self</span>
793 148: <span class="ruby-keyword kw">end</span>
794 149: <span class="ruby-keyword kw">end</span></pre>
795 </div>
796
797 </div>
798
799
800
801
802 </div>
803
804
805 <div id="porter-step--method" class="method-detail ">
806 <a name="method-i-porter2_step2"></a>
807
808 <div class="method-heading">
809
810 <span class="method-name">porter2_step2</span><span
811 class="method-args">(gb_english = false)</span>
812 <span class="method-click-advice">click to toggle source</span>
813
814 </div>
815
816 <div class="method-description">
817
818 <p>
819 Search for the longest among the suffixes listed in the keys of
820 Porter2::STEP_2_MAPS. If one is found and that suffix occurs in R1,
821 replace it with the value found in STEP_2_MAPS.
822 </p>
823 <p>
824 (Suffixes &#8216;ogi&#8217; and &#8216;li&#8217; are treated as special
825 cases in the procedure.)
826 </p>
827 <p>
828 (If gb_english is <tt>true</tt>, replace the &#8216;iser&#8217; and
829 &#8216;isation&#8217; suffixes with &#8216;ise&#8217;, similarly to how
830 &#8216;izer&#8217; and &#8216;ization&#8217; are treated.)
831 </p>
832
833
834
835 <div class="method-source-code"
836 id="porter-step--source">
837 <pre>
838 <span class="ruby-comment cmt"># File lib/porter2stemmer/implementation.rb, line 160</span>
839 160: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_step2</span>(<span class="ruby-identifier">gb_english</span> = <span class="ruby-keyword kw">false</span>)
840 161: <span class="ruby-identifier">r1</span> = <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_r1</span>
841 162: <span class="ruby-identifier">s2m</span> = <span class="ruby-constant">Porter2</span><span class="ruby-operator">::</span><span class="ruby-constant">STEP_2_MAPS</span>.<span class="ruby-identifier">dup</span>
842 163: <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">gb_english</span>
843 164: <span class="ruby-identifier">s2m</span>[<span class="ruby-value str">&quot;iser&quot;</span>] = <span class="ruby-value str">&quot;ise&quot;</span>
844 165: <span class="ruby-identifier">s2m</span>[<span class="ruby-value str">&quot;isation&quot;</span>] = <span class="ruby-value str">&quot;ise&quot;</span>
845 166: <span class="ruby-keyword kw">end</span>
846 167: <span class="ruby-identifier">step_2_re</span> = <span class="ruby-constant">Regexp</span>.<span class="ruby-identifier">union</span>(<span class="ruby-identifier">s2m</span>.<span class="ruby-identifier">keys</span>.<span class="ruby-identifier">map</span> {<span class="ruby-operator">|</span><span class="ruby-identifier">r</span><span class="ruby-operator">|</span> <span class="ruby-constant">Regexp</span>.<span class="ruby-identifier">new</span>(<span class="ruby-identifier">r</span> <span class="ruby-operator">+</span> <span class="ruby-value str">&quot;$&quot;</span>)})
847 168: <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-identifier">step_2_re</span>
848 169: <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">r1</span> <span class="ruby-operator">=~</span> <span class="ruby-node">/#{$&amp;}$/</span>
849 170: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-node">/#{$&amp;}$/</span>, <span class="ruby-identifier">s2m</span>[<span class="ruby-node">$&amp;</span>])
850 171: <span class="ruby-keyword kw">else</span>
851 172: <span class="ruby-keyword kw">self</span>
852 173: <span class="ruby-keyword kw">end</span>
853 174: <span class="ruby-keyword kw">elsif</span> <span class="ruby-identifier">r1</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/li$/</span> <span class="ruby-keyword kw">and</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-node">/(#{Porter2::Valid_LI})li$/</span>
854 175: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/li$/</span>, <span class="ruby-value str">''</span>)
855 176: <span class="ruby-keyword kw">elsif</span> <span class="ruby-identifier">r1</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/ogi$/</span> <span class="ruby-keyword kw">and</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/logi$/</span>
856 177: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/ogi$/</span>, <span class="ruby-value str">'og'</span>)
857 178: <span class="ruby-keyword kw">else</span>
858 179: <span class="ruby-keyword kw">self</span>
859 180: <span class="ruby-keyword kw">end</span>
860 181: <span class="ruby-keyword kw">end</span></pre>
861 </div>
862
863 </div>
864
865
866
867
868 </div>
869
870
871 <div id="porter-step--method" class="method-detail ">
872 <a name="method-i-porter2_step3"></a>
873
874 <div class="method-heading">
875
876 <span class="method-name">porter2_step3</span><span
877 class="method-args">(gb_english = false)</span>
878 <span class="method-click-advice">click to toggle source</span>
879
880 </div>
881
882 <div class="method-description">
883
884 <p>
885 Search for the longest among the suffixes listed in the keys of
886 Porter2::STEP_3_MAPS. If one is found and that suffix occurs in R1,
887 replace it with the value found in STEP_3_MAPS.
888 </p>
889 <p>
890 (Suffix &#8216;ative&#8217; is treated as a special case in the procedure.)
891 </p>
892 <p>
893 (If gb_english is <tt>true</tt>, replace the &#8216;alise&#8217; suffix
894 with &#8216;al&#8217;, similarly to how &#8216;alize&#8217; is treated.)
895 </p>
896
897
898
899 <div class="method-source-code"
900 id="porter-step--source">
901 <pre>
902 <span class="ruby-comment cmt"># File lib/porter2stemmer/implementation.rb, line 192</span>
903 192: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_step3</span>(<span class="ruby-identifier">gb_english</span> = <span class="ruby-keyword kw">false</span>)
904 193: <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/ative$/</span> <span class="ruby-keyword kw">and</span> <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_r2</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/ative$/</span>
905 194: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/ative$/</span>, <span class="ruby-value str">''</span>)
906 195: <span class="ruby-keyword kw">else</span>
907 196: <span class="ruby-identifier">s3m</span> = <span class="ruby-constant">Porter2</span><span class="ruby-operator">::</span><span class="ruby-constant">STEP_3_MAPS</span>.<span class="ruby-identifier">dup</span>
908 197: <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">gb_english</span>
909 198: <span class="ruby-identifier">s3m</span>[<span class="ruby-value str">&quot;alise&quot;</span>] = <span class="ruby-value str">&quot;al&quot;</span>
910 199: <span class="ruby-keyword kw">end</span>
911 200: <span class="ruby-identifier">step_3_re</span> = <span class="ruby-constant">Regexp</span>.<span class="ruby-identifier">union</span>(<span class="ruby-identifier">s3m</span>.<span class="ruby-identifier">keys</span>.<span class="ruby-identifier">map</span> {<span class="ruby-operator">|</span><span class="ruby-identifier">r</span><span class="ruby-operator">|</span> <span class="ruby-constant">Regexp</span>.<span class="ruby-identifier">new</span>(<span class="ruby-identifier">r</span> <span class="ruby-operator">+</span> <span class="ruby-value str">&quot;$&quot;</span>)})
912 201: <span class="ruby-identifier">r1</span> = <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_r1</span>
913 202: <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-identifier">step_3_re</span> <span class="ruby-keyword kw">and</span> <span class="ruby-identifier">r1</span> <span class="ruby-operator">=~</span> <span class="ruby-node">/#{$&amp;}$/</span>
914 203: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-node">/#{$&amp;}$/</span>, <span class="ruby-identifier">s3m</span>[<span class="ruby-node">$&amp;</span>])
915 204: <span class="ruby-keyword kw">else</span>
916 205: <span class="ruby-keyword kw">self</span>
917 206: <span class="ruby-keyword kw">end</span>
918 207: <span class="ruby-keyword kw">end</span>
919 208: <span class="ruby-keyword kw">end</span></pre>
920 </div>
921
922 </div>
923
924
925
926
927 </div>
928
929
930 <div id="porter-step--method" class="method-detail ">
931 <a name="method-i-porter2_step4"></a>
932
933 <div class="method-heading">
934
935 <span class="method-name">porter2_step4</span><span
936 class="method-args">(gb_english = false)</span>
937 <span class="method-click-advice">click to toggle source</span>
938
939 </div>
940
941 <div class="method-description">
942
943 <p>
944 Search for the longest among the suffixes listed in the keys of
945 Porter2::STEP_4_MAPS. If one is found and that suffix occurs in R2,
946 replace it with the value found in STEP_4_MAPS.
947 </p>
948 <p>
949 (Suffix &#8216;ion&#8217; is treated as a special case in the procedure.)
950 </p>
951 <p>
952 (If gb_english is <tt>true</tt>, delete the &#8216;ise&#8217; suffix if
953 found.)
954 </p>
955
956
957
958 <div class="method-source-code"
959 id="porter-step--source">
960 <pre>
961 <span class="ruby-comment cmt"># File lib/porter2stemmer/implementation.rb, line 218</span>
962 218: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_step4</span>(<span class="ruby-identifier">gb_english</span> = <span class="ruby-keyword kw">false</span>)
963 219: <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_r2</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/ion$/</span> <span class="ruby-keyword kw">and</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/(s|t)ion$/</span>
964 220: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/ion$/</span>, <span class="ruby-value str">''</span>)
965 221: <span class="ruby-keyword kw">else</span>
966 222: <span class="ruby-identifier">s4m</span> = <span class="ruby-constant">Porter2</span><span class="ruby-operator">::</span><span class="ruby-constant">STEP_4_MAPS</span>.<span class="ruby-identifier">dup</span>
967 223: <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">gb_english</span>
968 224: <span class="ruby-identifier">s4m</span>[<span class="ruby-value str">&quot;ise&quot;</span>] = <span class="ruby-value str">&quot;&quot;</span>
969 225: <span class="ruby-keyword kw">end</span>
970 226: <span class="ruby-identifier">step_4_re</span> = <span class="ruby-constant">Regexp</span>.<span class="ruby-identifier">union</span>(<span class="ruby-identifier">s4m</span>.<span class="ruby-identifier">keys</span>.<span class="ruby-identifier">map</span> {<span class="ruby-operator">|</span><span class="ruby-identifier">r</span><span class="ruby-operator">|</span> <span class="ruby-constant">Regexp</span>.<span class="ruby-identifier">new</span>(<span class="ruby-identifier">r</span> <span class="ruby-operator">+</span> <span class="ruby-value str">&quot;$&quot;</span>)})
971 227: <span class="ruby-identifier">r2</span> = <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_r2</span>
972 228: <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-identifier">step_4_re</span>
973 229: <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">r2</span> <span class="ruby-operator">=~</span> <span class="ruby-node">/#{$&amp;}/</span>
974 230: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-node">/#{$&amp;}$/</span>, <span class="ruby-identifier">s4m</span>[<span class="ruby-node">$&amp;</span>])
975 231: <span class="ruby-keyword kw">else</span>
976 232: <span class="ruby-keyword kw">self</span>
977 233: <span class="ruby-keyword kw">end</span>
978 234: <span class="ruby-keyword kw">else</span>
979 235: <span class="ruby-keyword kw">self</span>
980 236: <span class="ruby-keyword kw">end</span>
981 237: <span class="ruby-keyword kw">end</span>
982 238: <span class="ruby-keyword kw">end</span></pre>
983 </div>
984
985 </div>
986
987
988
989
990 </div>
991
992
993 <div id="porter-step--method" class="method-detail ">
994 <a name="method-i-porter2_step5"></a>
995
996 <div class="method-heading">
997
998 <span class="method-name">porter2_step5</span><span
999 class="method-args">()</span>
1000 <span class="method-click-advice">click to toggle source</span>
1001
1002 </div>
1003
1004 <div class="method-description">
1005
1006 <p>
1007 Search for the the following suffixes, and, if found, perform the action
1008 indicated.
1009 </p>
1010 <table>
1011 <tr><td valign="top">e</td><td><p>
1012 delete if in R2, or in R1 and not preceded by a short syllable
1013 </p>
1014 </td></tr>
1015 <tr><td valign="top">l</td><td><p>
1016 delete if in R2 and preceded by l
1017 </p>
1018 </td></tr>
1019 </table>
1020
1021
1022
1023 <div class="method-source-code"
1024 id="porter-step--source">
1025 <pre>
1026 <span class="ruby-comment cmt"># File lib/porter2stemmer/implementation.rb, line 244</span>
1027 244: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_step5</span>
1028 245: <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/ll$/</span> <span class="ruby-keyword kw">and</span> <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_r2</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/l$/</span>
1029 246: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/ll$/</span>, <span class="ruby-value str">'l'</span>)
1030 247: <span class="ruby-keyword kw">elsif</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/e$/</span> <span class="ruby-keyword kw">and</span> <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_r2</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/e$/</span>
1031 248: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/e$/</span>, <span class="ruby-value str">''</span>)
1032 249: <span class="ruby-keyword kw">else</span>
1033 250: <span class="ruby-identifier">r1</span> = <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_r1</span>
1034 251: <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/e$/</span> <span class="ruby-keyword kw">and</span> <span class="ruby-identifier">r1</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/e$/</span> <span class="ruby-keyword kw">and</span> <span class="ruby-keyword kw">not</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-node">/#{Porter2::SHORT_SYLLABLE}e$/</span>
1035 252: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/e$/</span>, <span class="ruby-value str">''</span>)
1036 253: <span class="ruby-keyword kw">else</span>
1037 254: <span class="ruby-keyword kw">self</span>
1038 255: <span class="ruby-keyword kw">end</span>
1039 256: <span class="ruby-keyword kw">end</span>
1040 257: <span class="ruby-keyword kw">end</span></pre>
1041 </div>
1042
1043 </div>
1044
1045
1046
1047
1048 </div>
1049
1050
1051 <div id="porter-tidy-method" class="method-detail ">
1052 <a name="method-i-porter2_tidy"></a>
1053
1054 <div class="method-heading">
1055
1056 <span class="method-name">porter2_tidy</span><span
1057 class="method-args">()</span>
1058 <span class="method-click-advice">click to toggle source</span>
1059
1060 </div>
1061
1062 <div class="method-description">
1063
1064 <p>
1065 Tidy up the word before we get down to the algorithm
1066 </p>
1067
1068
1069
1070 <div class="method-source-code"
1071 id="porter-tidy-source">
1072 <pre>
1073 <span class="ruby-comment cmt"># File lib/porter2stemmer/implementation.rb, line 7</span>
1074 7: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_tidy</span>
1075 8: <span class="ruby-identifier">preword</span> = <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">to_s</span>.<span class="ruby-identifier">strip</span>.<span class="ruby-identifier">downcase</span>
1076 9:
1077 10: <span class="ruby-comment cmt"># map apostrophe-like characters to apostrophes</span>
1078 11: <span class="ruby-identifier">preword</span>.<span class="ruby-identifier">gsub!</span>(<span class="ruby-regexp re">/‘/</span>, <span class="ruby-value str">&quot;'&quot;</span>)
1079 12: <span class="ruby-identifier">preword</span>.<span class="ruby-identifier">gsub!</span>(<span class="ruby-regexp re">/’/</span>, <span class="ruby-value str">&quot;'&quot;</span>)
1080 13:
1081 14: <span class="ruby-identifier">preword</span>
1082 15: <span class="ruby-keyword kw">end</span></pre>
1083 </div>
1084
1085 </div>
1086
1087
1088
1089
1090 </div>
1091
1092
1093 <div id="stem-method" class="method-detail method-alias">
1094 <a name="method-i-stem"></a>
1095
1096 <div class="method-heading">
1097
1098 <span class="method-name">stem</span><span
1099 class="method-args">(gb_english = false)</span>
1100 <span class="method-click-advice">click to toggle source</span>
1101
1102 </div>
1103
1104 <div class="method-description">
1105
1106
1107
1108
1109
1110 </div>
1111
1112
1113
1114
1115 <div class="aliases">
1116 Alias for: <a href="String.html#method-i-porter2_stem">porter2_stem</a>
1117 </div>
1118
1119 </div>
1120
1121
1122 </div>
1123
1124
1125 </div>
1126
1127
1128 <div id="rdoc-debugging-section-dump" class="debugging-section">
1129
1130 <p>Disabled; run with --debug to generate this.</p>
1131
1132 </div>
1133
1134 <div id="validator-badges">
1135 <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
1136 <p><small>Generated with the <a href="http://deveiate.org/projects/Darkfish-Rdoc/">Darkfish
1137 Rdoc Generator</a> 1.1.6</small>.</p>
1138 </div>
1139
1140 </body>
1141 </html>
1142