1 <?xml version=
"1.0" encoding=
"iso-8859-1"?>
3 PUBLIC
"-//W3C//DTD XHTML 1.0 Transitional//EN"
4 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
6 <html xmlns=
"http://www.w3.org/1999/xhtml" xml:
lang=
"en" lang=
"en">
8 <title>Class: String
</title>
9 <meta http-equiv=
"Content-Type" content=
"text/html; charset=iso-8859-1" />
10 <meta http-equiv=
"Content-Script-Type" content=
"text/javascript" />
11 <link rel=
"stylesheet" href=
".././rdoc-style.css" type=
"text/css" media=
"screen" />
12 <script type=
"text/javascript">
15 function popupCode( url ) {
16 window.open(url,
"Code",
"resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
19 function toggleCode( id ) {
20 if ( document.getElementById )
21 elem = document.getElementById( id );
22 else if ( document.all )
23 elem = eval(
"document.all." + id );
27 elemStyle = elem.style;
29 if ( elemStyle.display !=
"block" ) {
30 elemStyle.display =
"block"
32 elemStyle.display =
"none"
38 // Make codeblocks hidden by default
39 document.writeln(
"<style type=\"text/css\
">div.method-source-code { display: none }</style>" )
49 <div id=
"classHeader">
50 <table class=
"header-table">
51 <tr class=
"top-aligned-row">
52 <td><strong>Class
</strong></td>
53 <td class=
"class-name-in-header">String
</td>
55 <tr class=
"top-aligned-row">
56 <td><strong>In:
</strong></td>
58 <a href=
"../files/lib/porter2stemmer/implementation_rb.html">
59 lib/porter2stemmer/implementation.rb
65 <tr class=
"top-aligned-row">
66 <td><strong>Parent:
</strong></td>
73 <!-- banner header -->
75 <div id=
"bodyContent">
79 <div id=
"contextContent">
81 <div id=
"description">
83 Implementation of the Porter
2 stemmer.
<a
84 href=
"String.html#M000016">String#porter2_stem
</a> is the main stemming
93 <div id=
"method-list">
94 <h3 class=
"section-bar">Methods
</h3>
96 <div class=
"name-list">
97 <a href=
"#M000005">porter2_ends_with_short_syllable?
</a>
98 <a href=
"#M000006">porter2_is_short_word?
</a>
99 <a href=
"#M000015">porter2_postprocess
</a>
100 <a href=
"#M000002">porter2_preprocess
</a>
101 <a href=
"#M000003">porter2_r1
</a>
102 <a href=
"#M000004">porter2_r2
</a>
103 <a href=
"#M000016">porter2_stem
</a>
104 <a href=
"#M000017">porter2_stem_verbose
</a>
105 <a href=
"#M000007">porter2_step0
</a>
106 <a href=
"#M000008">porter2_step1a
</a>
107 <a href=
"#M000009">porter2_step1b
</a>
108 <a href=
"#M000010">porter2_step1c
</a>
109 <a href=
"#M000011">porter2_step2
</a>
110 <a href=
"#M000012">porter2_step3
</a>
111 <a href=
"#M000013">porter2_step4
</a>
112 <a href=
"#M000014">porter2_step5
</a>
113 <a href=
"#M000001">porter2_tidy
</a>
114 <a href=
"#M000018">stem
</a>
132 <!-- if method_list -->
134 <h3 class=
"section-bar">Public Instance methods
</h3>
136 <div id=
"method-M000005" class=
"method-detail">
137 <a name=
"M000005"></a>
139 <div class=
"method-heading">
140 <a href=
"#M000005" class=
"method-signature">
141 <span class=
"method-name">porter2_ends_with_short_syllable?
</span><span class=
"method-args">()
</span>
145 <div class=
"method-description">
147 Returns true if the word ends with a short syllable
149 <p><a class=
"source-toggle" href=
"#"
150 onclick=
"toggleCode('M000005-source');return false;">[Source]
</a></p>
151 <div class=
"method-source-code" id=
"M000005-source">
153 <span class=
"ruby-comment cmt"># File lib/porter2stemmer/implementation.rb, line
59</span>
154 <span class=
"ruby-keyword kw">def
</span> <span class=
"ruby-identifier">porter2_ends_with_short_syllable?
</span>
155 <span class=
"ruby-keyword kw">self
</span> <span class=
"ruby-operator">=~
</span> <span class=
"ruby-node">/#{Porter2::SHORT_SYLLABLE}$/
</span> <span class=
"ruby-operator">?
</span> <span class=
"ruby-keyword kw">true
</span> <span class=
"ruby-operator">:
</span> <span class=
"ruby-keyword kw">false
</span>
156 <span class=
"ruby-keyword kw">end
</span>
162 <div id=
"method-M000006" class=
"method-detail">
163 <a name=
"M000006"></a>
165 <div class=
"method-heading">
166 <a href=
"#M000006" class=
"method-signature">
167 <span class=
"method-name">porter2_is_short_word?
</span><span class=
"method-args">()
</span>
171 <div class=
"method-description">
173 A word is short if it ends in a short syllable, and R1 is null
175 <p><a class=
"source-toggle" href=
"#"
176 onclick=
"toggleCode('M000006-source');return false;">[Source]
</a></p>
177 <div class=
"method-source-code" id=
"M000006-source">
179 <span class=
"ruby-comment cmt"># File lib/porter2stemmer/implementation.rb, line
65</span>
180 <span class=
"ruby-keyword kw">def
</span> <span class=
"ruby-identifier">porter2_is_short_word?
</span>
181 <span class=
"ruby-keyword kw">self
</span>.
<span class=
"ruby-identifier">porter2_ends_with_short_syllable?
</span> <span class=
"ruby-keyword kw">and
</span> <span class=
"ruby-keyword kw">self
</span>.
<span class=
"ruby-identifier">porter2_r1
</span>.
<span class=
"ruby-identifier">empty?
</span>
182 <span class=
"ruby-keyword kw">end
</span>
188 <div id=
"method-M000015" class=
"method-detail">
189 <a name=
"M000015"></a>
191 <div class=
"method-heading">
192 <a href=
"#M000015" class=
"method-signature">
193 <span class=
"method-name">porter2_postprocess
</span><span class=
"method-args">()
</span>
197 <div class=
"method-description">
199 Turn all Y letters into y
201 <p><a class=
"source-toggle" href=
"#"
202 onclick=
"toggleCode('M000015-source');return false;">[Source]
</a></p>
203 <div class=
"method-source-code" id=
"M000015-source">
205 <span class=
"ruby-comment cmt"># File lib/porter2stemmer/implementation.rb, line
261</span>
206 <span class=
"ruby-keyword kw">def
</span> <span class=
"ruby-identifier">porter2_postprocess
</span>
207 <span class=
"ruby-keyword kw">self
</span>.
<span class=
"ruby-identifier">gsub
</span>(
<span class=
"ruby-regexp re">/Y/
</span>,
<span class=
"ruby-value str">'y'
</span>)
208 <span class=
"ruby-keyword kw">end
</span>
214 <div id=
"method-M000002" class=
"method-detail">
215 <a name=
"M000002"></a>
217 <div class=
"method-heading">
218 <a href=
"#M000002" class=
"method-signature">
219 <span class=
"method-name">porter2_preprocess
</span><span class=
"method-args">()
</span>
223 <div class=
"method-description">
225 Preprocess the word. Remove any initial
’, if present. Then, set
226 initial y, or y after a vowel, to Y
229 (The comment to
‘establish the regions R1 and R2
’ in the
230 original description is an implementation optimisation that identifies
231 where the regions start. As no modifications are made to the word that
232 affect those positions, you may want to cache them now. This implementation
233 doesn
‘t do that.)
235 <p><a class=
"source-toggle" href=
"#"
236 onclick=
"toggleCode('M000002-source');return false;">[Source]
</a></p>
237 <div class=
"method-source-code" id=
"M000002-source">
239 <span class=
"ruby-comment cmt"># File lib/porter2stemmer/implementation.rb, line
25</span>
240 <span class=
"ruby-keyword kw">def
</span> <span class=
"ruby-identifier">porter2_preprocess
</span>
241 <span class=
"ruby-identifier">w
</span> =
<span class=
"ruby-keyword kw">self
</span>.
<span class=
"ruby-identifier">dup
</span>
243 <span class=
"ruby-comment cmt"># remove any initial apostrophe
</span>
244 <span class=
"ruby-identifier">w
</span>.
<span class=
"ruby-identifier">gsub!
</span>(
<span class=
"ruby-regexp re">/^'*(.)/
</span>,
<span class=
"ruby-value str">'\
1'
</span>)
246 <span class=
"ruby-comment cmt"># set initial y, or y after a vowel, to Y
</span>
247 <span class=
"ruby-identifier">w
</span>.
<span class=
"ruby-identifier">gsub!
</span>(
<span class=
"ruby-regexp re">/^y/
</span>,
<span class=
"ruby-value str">"Y
"</span>)
248 <span class=
"ruby-identifier">w
</span>.
<span class=
"ruby-identifier">gsub!
</span>(
<span class=
"ruby-node">/(#{Porter2::V})y/
</span>,
<span class=
"ruby-value str">'\
1Y'
</span>)
250 <span class=
"ruby-identifier">w
</span>
251 <span class=
"ruby-keyword kw">end
</span>
257 <div id=
"method-M000003" class=
"method-detail">
258 <a name=
"M000003"></a>
260 <div class=
"method-heading">
261 <a href=
"#M000003" class=
"method-signature">
262 <span class=
"method-name">porter2_r1
</span><span class=
"method-args">()
</span>
266 <div class=
"method-description">
268 R1 is the portion of the word after the first non-vowel after the first
269 vowel (with words beginning
‘gener-
’,
‘commun-
’,
270 and
‘arsen-
’ treated as special cases
272 <p><a class=
"source-toggle" href=
"#"
273 onclick=
"toggleCode('M000003-source');return false;">[Source]
</a></p>
274 <div class=
"method-source-code" id=
"M000003-source">
276 <span class=
"ruby-comment cmt"># File lib/porter2stemmer/implementation.rb, line
41</span>
277 <span class=
"ruby-keyword kw">def
</span> <span class=
"ruby-identifier">porter2_r1
</span>
278 <span class=
"ruby-keyword kw">if
</span> <span class=
"ruby-keyword kw">self
</span> <span class=
"ruby-operator">=~
</span> <span class=
"ruby-regexp re">/^(gener|commun|arsen)(?
<r1
>.*)/
</span>
279 <span class=
"ruby-constant">Regexp
</span>.
<span class=
"ruby-identifier">last_match
</span>(
<span class=
"ruby-identifier">:r1
</span>)
280 <span class=
"ruby-keyword kw">else
</span>
281 <span class=
"ruby-keyword kw">self
</span> <span class=
"ruby-operator">=~
</span> <span class=
"ruby-node">/#{Porter2::V}#{Porter2::C}(?
<r1
>.*)$/
</span>
282 <span class=
"ruby-constant">Regexp
</span>.
<span class=
"ruby-identifier">last_match
</span>(
<span class=
"ruby-identifier">:r1
</span>)
<span class=
"ruby-operator">||
</span> <span class=
"ruby-value str">""</span>
283 <span class=
"ruby-keyword kw">end
</span>
284 <span class=
"ruby-keyword kw">end
</span>
290 <div id=
"method-M000004" class=
"method-detail">
291 <a name=
"M000004"></a>
293 <div class=
"method-heading">
294 <a href=
"#M000004" class=
"method-signature">
295 <span class=
"method-name">porter2_r2
</span><span class=
"method-args">()
</span>
299 <div class=
"method-description">
301 R2 is the portion of R1 (
<a href=
"String.html#M000003">porter2_r1
</a>)
302 after the first non-vowel after the first vowel
304 <p><a class=
"source-toggle" href=
"#"
305 onclick=
"toggleCode('M000004-source');return false;">[Source]
</a></p>
306 <div class=
"method-source-code" id=
"M000004-source">
308 <span class=
"ruby-comment cmt"># File lib/porter2stemmer/implementation.rb, line
52</span>
309 <span class=
"ruby-keyword kw">def
</span> <span class=
"ruby-identifier">porter2_r2
</span>
310 <span class=
"ruby-keyword kw">self
</span>.
<span class=
"ruby-identifier">porter2_r1
</span> <span class=
"ruby-operator">=~
</span> <span class=
"ruby-node">/#{Porter2::V}#{Porter2::C}(?
<r2
>.*)$/
</span>
311 <span class=
"ruby-constant">Regexp
</span>.
<span class=
"ruby-identifier">last_match
</span>(
<span class=
"ruby-identifier">:r2
</span>)
<span class=
"ruby-operator">||
</span> <span class=
"ruby-value str">""</span>
312 <span class=
"ruby-keyword kw">end
</span>
318 <div id=
"method-M000016" class=
"method-detail">
319 <a name=
"M000016"></a>
321 <div class=
"method-heading">
322 <a href=
"#M000016" class=
"method-signature">
323 <span class=
"method-name">porter2_stem
</span><span class=
"method-args">(gb_english = false)
</span>
327 <div class=
"method-description">
329 Perform the stemming procedure. If
<tt>gb_english
</tt> is true, treat
330 ’-ise
’ and similar suffixes as
’-ize
’ in American
333 <p><a class=
"source-toggle" href=
"#"
334 onclick=
"toggleCode('M000016-source');return false;">[Source]
</a></p>
335 <div class=
"method-source-code" id=
"M000016-source">
337 <span class=
"ruby-comment cmt"># File lib/porter2stemmer/implementation.rb, line
269</span>
338 <span class=
"ruby-keyword kw">def
</span> <span class=
"ruby-identifier">porter2_stem
</span>(
<span class=
"ruby-identifier">gb_english
</span> =
<span class=
"ruby-keyword kw">false
</span>)
339 <span class=
"ruby-identifier">preword
</span> =
<span class=
"ruby-keyword kw">self
</span>.
<span class=
"ruby-identifier">porter2_tidy
</span>
340 <span class=
"ruby-keyword kw">return
</span> <span class=
"ruby-identifier">preword
</span> <span class=
"ruby-keyword kw">if
</span> <span class=
"ruby-identifier">preword
</span>.
<span class=
"ruby-identifier">length
</span> <span class=
"ruby-operator"><=
</span> <span class=
"ruby-value">2</span>
342 <span class=
"ruby-identifier">word
</span> =
<span class=
"ruby-identifier">preword
</span>.
<span class=
"ruby-identifier">porter2_preprocess
</span>
344 <span class=
"ruby-keyword kw">if
</span> <span class=
"ruby-constant">Porter2
</span><span class=
"ruby-operator">::
</span><span class=
"ruby-constant">SPECIAL_CASES
</span>.
<span class=
"ruby-identifier">has_key?
</span> <span class=
"ruby-identifier">word
</span>
345 <span class=
"ruby-constant">Porter2
</span><span class=
"ruby-operator">::
</span><span class=
"ruby-constant">SPECIAL_CASES
</span>[
<span class=
"ruby-identifier">word
</span>]
346 <span class=
"ruby-keyword kw">else
</span>
347 <span class=
"ruby-identifier">w1a
</span> =
<span class=
"ruby-identifier">word
</span>.
<span class=
"ruby-identifier">porter2_step0
</span>.
<span class=
"ruby-identifier">porter2_step1a
</span>
348 <span class=
"ruby-keyword kw">if
</span> <span class=
"ruby-constant">Porter2
</span><span class=
"ruby-operator">::
</span><span class=
"ruby-constant">STEP_1A_SPECIAL_CASES
</span>.
<span class=
"ruby-identifier">include?
</span> <span class=
"ruby-identifier">w1a
</span>
349 <span class=
"ruby-identifier">w1a
</span>
350 <span class=
"ruby-keyword kw">else
</span>
351 <span class=
"ruby-identifier">w1a
</span>.
<span class=
"ruby-identifier">porter2_step1b
</span>(
<span class=
"ruby-identifier">gb_english
</span>).
<span class=
"ruby-identifier">porter2_step1c
</span>.
<span class=
"ruby-identifier">porter2_step2
</span>(
<span class=
"ruby-identifier">gb_english
</span>).
<span class=
"ruby-identifier">porter2_step3
</span>(
<span class=
"ruby-identifier">gb_english
</span>).
<span class=
"ruby-identifier">porter2_step4
</span>(
<span class=
"ruby-identifier">gb_english
</span>).
<span class=
"ruby-identifier">porter2_step5
</span>.
<span class=
"ruby-identifier">porter2_postprocess
</span>
352 <span class=
"ruby-keyword kw">end
</span>
353 <span class=
"ruby-keyword kw">end
</span>
354 <span class=
"ruby-keyword kw">end
</span>
360 <div id=
"method-M000017" class=
"method-detail">
361 <a name=
"M000017"></a>
363 <div class=
"method-heading">
364 <a href=
"#M000017" class=
"method-signature">
365 <span class=
"method-name">porter2_stem_verbose
</span><span class=
"method-args">(gb_english = false)
</span>
369 <div class=
"method-description">
371 A verbose version of
<a href=
"String.html#M000016">porter2_stem
</a> that
372 prints the output of each stage to STDOUT
374 <p><a class=
"source-toggle" href=
"#"
375 onclick=
"toggleCode('M000017-source');return false;">[Source]
</a></p>
376 <div class=
"method-source-code" id=
"M000017-source">
378 <span class=
"ruby-comment cmt"># File lib/porter2stemmer/implementation.rb, line
288</span>
379 <span class=
"ruby-keyword kw">def
</span> <span class=
"ruby-identifier">porter2_stem_verbose
</span>(
<span class=
"ruby-identifier">gb_english
</span> =
<span class=
"ruby-keyword kw">false
</span>)
380 <span class=
"ruby-identifier">preword
</span> =
<span class=
"ruby-keyword kw">self
</span>.
<span class=
"ruby-identifier">porter2_tidy
</span>
381 <span class=
"ruby-identifier">puts
</span> <span class=
"ruby-node">"Preword: #{preword}
"</span>
382 <span class=
"ruby-keyword kw">return
</span> <span class=
"ruby-identifier">preword
</span> <span class=
"ruby-keyword kw">if
</span> <span class=
"ruby-identifier">preword
</span>.
<span class=
"ruby-identifier">length
</span> <span class=
"ruby-operator"><=
</span> <span class=
"ruby-value">2</span>
384 <span class=
"ruby-identifier">word
</span> =
<span class=
"ruby-identifier">preword
</span>.
<span class=
"ruby-identifier">porter2_preprocess
</span>
385 <span class=
"ruby-identifier">puts
</span> <span class=
"ruby-node">"Preprocessed: #{word}
"</span>
387 <span class=
"ruby-keyword kw">if
</span> <span class=
"ruby-constant">Porter2
</span><span class=
"ruby-operator">::
</span><span class=
"ruby-constant">SPECIAL_CASES
</span>.
<span class=
"ruby-identifier">has_key?
</span> <span class=
"ruby-identifier">word
</span>
388 <span class=
"ruby-identifier">puts
</span> <span class=
"ruby-node">"Returning #{word} as special case #{Porter2::SPECIAL_CASES[word]}
"</span>
389 <span class=
"ruby-constant">Porter2
</span><span class=
"ruby-operator">::
</span><span class=
"ruby-constant">SPECIAL_CASES
</span>[
<span class=
"ruby-identifier">word
</span>]
390 <span class=
"ruby-keyword kw">else
</span>
391 <span class=
"ruby-identifier">r1
</span> =
<span class=
"ruby-identifier">word
</span>.
<span class=
"ruby-identifier">porter2_r1
</span>
392 <span class=
"ruby-identifier">r2
</span> =
<span class=
"ruby-identifier">word
</span>.
<span class=
"ruby-identifier">porter2_r2
</span>
393 <span class=
"ruby-identifier">puts
</span> <span class=
"ruby-node">"R1 = #{r1}, R2 = #{r2}
"</span>
395 <span class=
"ruby-identifier">w0
</span> =
<span class=
"ruby-identifier">word
</span>.
<span class=
"ruby-identifier">porter2_step0
</span> ;
<span class=
"ruby-identifier">puts
</span> <span class=
"ruby-node">"After step
0: #{w0} (R1 = #{w0.porter2_r1}, R2 = #{w0.porter2_r2})
"</span>
396 <span class=
"ruby-identifier">w1a
</span> =
<span class=
"ruby-identifier">w0
</span>.
<span class=
"ruby-identifier">porter2_step1a
</span> ;
<span class=
"ruby-identifier">puts
</span> <span class=
"ruby-node">"After step
1a: #{w1a} (R1 = #{w1a.porter2_r1}, R2 = #{w1a.porter2_r2})
"</span>
398 <span class=
"ruby-keyword kw">if
</span> <span class=
"ruby-constant">Porter2
</span><span class=
"ruby-operator">::
</span><span class=
"ruby-constant">STEP_1A_SPECIAL_CASES
</span>.
<span class=
"ruby-identifier">include?
</span> <span class=
"ruby-identifier">w1a
</span>
399 <span class=
"ruby-identifier">puts
</span> <span class=
"ruby-node">"Returning #{w1a} as
1a special case
"</span>
400 <span class=
"ruby-identifier">w1a
</span>
401 <span class=
"ruby-keyword kw">else
</span>
402 <span class=
"ruby-identifier">w1b
</span> =
<span class=
"ruby-identifier">w1a
</span>.
<span class=
"ruby-identifier">porter2_step1b
</span>(
<span class=
"ruby-identifier">gb_english
</span>) ;
<span class=
"ruby-identifier">puts
</span> <span class=
"ruby-node">"After step
1b: #{w1b} (R1 = #{w1b.porter2_r1}, R2 = #{w1b.porter2_r2})
"</span>
403 <span class=
"ruby-identifier">w1c
</span> =
<span class=
"ruby-identifier">w1b
</span>.
<span class=
"ruby-identifier">porter2_step1c
</span> ;
<span class=
"ruby-identifier">puts
</span> <span class=
"ruby-node">"After step
1c: #{w1c} (R1 = #{w1c.porter2_r1}, R2 = #{w1c.porter2_r2})
"</span>
404 <span class=
"ruby-identifier">w2
</span> =
<span class=
"ruby-identifier">w1c
</span>.
<span class=
"ruby-identifier">porter2_step2
</span>(
<span class=
"ruby-identifier">gb_english
</span>) ;
<span class=
"ruby-identifier">puts
</span> <span class=
"ruby-node">"After step
2: #{w2} (R1 = #{w2.porter2_r1}, R2 = #{w2.porter2_r2})
"</span>
405 <span class=
"ruby-identifier">w3
</span> =
<span class=
"ruby-identifier">w2
</span>.
<span class=
"ruby-identifier">porter2_step3
</span>(
<span class=
"ruby-identifier">gb_english
</span>) ;
<span class=
"ruby-identifier">puts
</span> <span class=
"ruby-node">"After step
3: #{w3} (R1 = #{w3.porter2_r1}, R2 = #{w3.porter2_r2})
"</span>
406 <span class=
"ruby-identifier">w4
</span> =
<span class=
"ruby-identifier">w3
</span>.
<span class=
"ruby-identifier">porter2_step4
</span>(
<span class=
"ruby-identifier">gb_english
</span>) ;
<span class=
"ruby-identifier">puts
</span> <span class=
"ruby-node">"After step
4: #{w4} (R1 = #{w4.porter2_r1}, R2 = #{w4.porter2_r2})
"</span>
407 <span class=
"ruby-identifier">w5
</span> =
<span class=
"ruby-identifier">w4
</span>.
<span class=
"ruby-identifier">porter2_step5
</span> ;
<span class=
"ruby-identifier">puts
</span> <span class=
"ruby-node">"After step
5: #{w5}
"</span>
408 <span class=
"ruby-identifier">wpost
</span> =
<span class=
"ruby-identifier">w5
</span>.
<span class=
"ruby-identifier">porter2_postprocess
</span> ;
<span class=
"ruby-identifier">puts
</span> <span class=
"ruby-node">"After postprocess: #{wpost}
"</span>
409 <span class=
"ruby-identifier">wpost
</span>
410 <span class=
"ruby-keyword kw">end
</span>
411 <span class=
"ruby-keyword kw">end
</span>
412 <span class=
"ruby-keyword kw">end
</span>
418 <div id=
"method-M000007" class=
"method-detail">
419 <a name=
"M000007"></a>
421 <div class=
"method-heading">
422 <a href=
"#M000007" class=
"method-signature">
423 <span class=
"method-name">porter2_step0
</span><span class=
"method-args">()
</span>
427 <div class=
"method-description">
429 Search for the longest among the suffixes,
445 <p><a class=
"source-toggle" href=
"#"
446 onclick=
"toggleCode('M000007-source');return false;">[Source]
</a></p>
447 <div class=
"method-source-code" id=
"M000007-source">
449 <span class=
"ruby-comment cmt"># File lib/porter2stemmer/implementation.rb, line
75</span>
450 <span class=
"ruby-keyword kw">def
</span> <span class=
"ruby-identifier">porter2_step0
</span>
451 <span class=
"ruby-keyword kw">self
</span>.
<span class=
"ruby-identifier">sub!
</span>(
<span class=
"ruby-regexp re">/(.)('s'|'s|')$/
</span>,
<span class=
"ruby-value str">'\
1'
</span>)
<span class=
"ruby-operator">||
</span> <span class=
"ruby-keyword kw">self
</span>
452 <span class=
"ruby-keyword kw">end
</span>
458 <div id=
"method-M000008" class=
"method-detail">
459 <a name=
"M000008"></a>
461 <div class=
"method-heading">
462 <a href=
"#M000008" class=
"method-signature">
463 <span class=
"method-name">porter2_step1a
</span><span class=
"method-args">()
</span>
467 <div class=
"method-description">
469 Search for the longest among the following suffixes, and perform the action
473 <tr><td valign=
"top">sses:
</td><td>replace by ss
476 <tr><td valign=
"top">ied, ies:
</td><td>replace by i if preceded by more than one letter, otherwise by ie
479 <tr><td valign=
"top">s:
</td><td>delete if the preceding word part contains a vowel not immediately before
483 <tr><td valign=
"top">us, ss:
</td><td>do nothing
487 <p><a class=
"source-toggle" href=
"#"
488 onclick=
"toggleCode('M000008-source');return false;">[Source]
</a></p>
489 <div class=
"method-source-code" id=
"M000008-source">
491 <span class=
"ruby-comment cmt"># File lib/porter2stemmer/implementation.rb, line
85</span>
492 <span class=
"ruby-keyword kw">def
</span> <span class=
"ruby-identifier">porter2_step1a
</span>
493 <span class=
"ruby-keyword kw">if
</span> <span class=
"ruby-keyword kw">self
</span> <span class=
"ruby-operator">=~
</span> <span class=
"ruby-regexp re">/sses$/
</span>
494 <span class=
"ruby-keyword kw">self
</span>.
<span class=
"ruby-identifier">sub
</span>(
<span class=
"ruby-regexp re">/sses$/
</span>,
<span class=
"ruby-value str">'ss'
</span>)
495 <span class=
"ruby-keyword kw">elsif
</span> <span class=
"ruby-keyword kw">self
</span> <span class=
"ruby-operator">=~
</span> <span class=
"ruby-regexp re">/..(ied|ies)$/
</span>
496 <span class=
"ruby-keyword kw">self
</span>.
<span class=
"ruby-identifier">sub
</span>(
<span class=
"ruby-regexp re">/(ied|ies)$/
</span>,
<span class=
"ruby-value str">'i'
</span>)
497 <span class=
"ruby-keyword kw">elsif
</span> <span class=
"ruby-keyword kw">self
</span> <span class=
"ruby-operator">=~
</span> <span class=
"ruby-regexp re">/(ied|ies)$/
</span>
498 <span class=
"ruby-keyword kw">self
</span>.
<span class=
"ruby-identifier">sub
</span>(
<span class=
"ruby-regexp re">/(ied|ies)$/
</span>,
<span class=
"ruby-value str">'ie'
</span>)
499 <span class=
"ruby-keyword kw">elsif
</span> <span class=
"ruby-keyword kw">self
</span> <span class=
"ruby-operator">=~
</span> <span class=
"ruby-regexp re">/(us|ss)$/
</span>
500 <span class=
"ruby-keyword kw">self
</span>
501 <span class=
"ruby-keyword kw">elsif
</span> <span class=
"ruby-keyword kw">self
</span> <span class=
"ruby-operator">=~
</span> <span class=
"ruby-regexp re">/s$/
</span>
502 <span class=
"ruby-keyword kw">if
</span> <span class=
"ruby-keyword kw">self
</span> <span class=
"ruby-operator">=~
</span> <span class=
"ruby-node">/(#{Porter2::V}.+)s$/
</span>
503 <span class=
"ruby-keyword kw">self
</span>.
<span class=
"ruby-identifier">sub
</span>(
<span class=
"ruby-regexp re">/s$/
</span>,
<span class=
"ruby-value str">''
</span>)
504 <span class=
"ruby-keyword kw">else
</span>
505 <span class=
"ruby-keyword kw">self
</span>
506 <span class=
"ruby-keyword kw">end
</span>
507 <span class=
"ruby-keyword kw">else
</span>
508 <span class=
"ruby-keyword kw">self
</span>
509 <span class=
"ruby-keyword kw">end
</span>
510 <span class=
"ruby-keyword kw">end
</span>
516 <div id=
"method-M000009" class=
"method-detail">
517 <a name=
"M000009"></a>
519 <div class=
"method-heading">
520 <a href=
"#M000009" class=
"method-signature">
521 <span class=
"method-name">porter2_step1b
</span><span class=
"method-args">(gb_english = false)
</span>
525 <div class=
"method-description">
527 Search for the longest among the following suffixes, and perform the action
531 <tr><td valign=
"top">eed, eedly:
</td><td>replace by ee if the suffix is also in R1
534 <tr><td valign=
"top">ed, edly, ing, ingly:
</td><td>delete if the preceding word part contains a vowel and, after the deletion:
537 <li>if the word ends at, bl or iz: add e, or
540 <li>if the word ends with a double: remove the last letter, or
543 <li>if the word is short: add e
550 (If gb_english is
<tt>true
</tt>, treat the
‘is
’ suffix as
551 ‘iz
’ above.)
553 <p><a class=
"source-toggle" href=
"#"
554 onclick=
"toggleCode('M000009-source');return false;">[Source]
</a></p>
555 <div class=
"method-source-code" id=
"M000009-source">
557 <span class=
"ruby-comment cmt"># File lib/porter2stemmer/implementation.rb, line
115</span>
558 <span class=
"ruby-keyword kw">def
</span> <span class=
"ruby-identifier">porter2_step1b
</span>(
<span class=
"ruby-identifier">gb_english
</span> =
<span class=
"ruby-keyword kw">false
</span>)
559 <span class=
"ruby-keyword kw">if
</span> <span class=
"ruby-keyword kw">self
</span> <span class=
"ruby-operator">=~
</span> <span class=
"ruby-regexp re">/(eed|eedly)$/
</span>
560 <span class=
"ruby-keyword kw">if
</span> <span class=
"ruby-keyword kw">self
</span>.
<span class=
"ruby-identifier">porter2_r1
</span> <span class=
"ruby-operator">=~
</span> <span class=
"ruby-regexp re">/(eed|eedly)$/
</span>
561 <span class=
"ruby-keyword kw">self
</span>.
<span class=
"ruby-identifier">sub
</span>(
<span class=
"ruby-regexp re">/(eed|eedly)$/
</span>,
<span class=
"ruby-value str">'ee'
</span>)
562 <span class=
"ruby-keyword kw">else
</span>
563 <span class=
"ruby-keyword kw">self
</span>
564 <span class=
"ruby-keyword kw">end
</span>
565 <span class=
"ruby-keyword kw">else
</span>
566 <span class=
"ruby-identifier">w
</span> =
<span class=
"ruby-keyword kw">self
</span>.
<span class=
"ruby-identifier">dup
</span>
567 <span class=
"ruby-keyword kw">if
</span> <span class=
"ruby-identifier">w
</span> <span class=
"ruby-operator">=~
</span> <span class=
"ruby-node">/#{Porter2::V}.*(ed|edly|ing|ingly)$/
</span>
568 <span class=
"ruby-identifier">w
</span>.
<span class=
"ruby-identifier">sub!
</span>(
<span class=
"ruby-regexp re">/(ed|edly|ing|ingly)$/
</span>,
<span class=
"ruby-value str">''
</span>)
569 <span class=
"ruby-keyword kw">if
</span> <span class=
"ruby-identifier">w
</span> <span class=
"ruby-operator">=~
</span> <span class=
"ruby-regexp re">/(at|lb|iz)$/
</span>
570 <span class=
"ruby-identifier">w
</span> <span class=
"ruby-operator">+=
</span> <span class=
"ruby-value str">'e'
</span>
571 <span class=
"ruby-keyword kw">elsif
</span> <span class=
"ruby-identifier">w
</span> <span class=
"ruby-operator">=~
</span> <span class=
"ruby-regexp re">/is$/
</span> <span class=
"ruby-keyword kw">and
</span> <span class=
"ruby-identifier">gb_english
</span>
572 <span class=
"ruby-identifier">w
</span> <span class=
"ruby-operator">+=
</span> <span class=
"ruby-value str">'e'
</span>
573 <span class=
"ruby-keyword kw">elsif
</span> <span class=
"ruby-identifier">w
</span> <span class=
"ruby-operator">=~
</span> <span class=
"ruby-node">/#{Porter2::Double}$/
</span>
574 <span class=
"ruby-identifier">w
</span>.
<span class=
"ruby-identifier">chop!
</span>
575 <span class=
"ruby-keyword kw">elsif
</span> <span class=
"ruby-identifier">w
</span>.
<span class=
"ruby-identifier">porter2_is_short_word?
</span>
576 <span class=
"ruby-identifier">w
</span> <span class=
"ruby-operator">+=
</span> <span class=
"ruby-value str">'e'
</span>
577 <span class=
"ruby-keyword kw">end
</span>
578 <span class=
"ruby-keyword kw">end
</span>
579 <span class=
"ruby-identifier">w
</span>
580 <span class=
"ruby-keyword kw">end
</span>
581 <span class=
"ruby-keyword kw">end
</span>
587 <div id=
"method-M000010" class=
"method-detail">
588 <a name=
"M000010"></a>
590 <div class=
"method-heading">
591 <a href=
"#M000010" class=
"method-signature">
592 <span class=
"method-name">porter2_step1c
</span><span class=
"method-args">()
</span>
596 <div class=
"method-description">
598 Replace a suffix of y or Y by i if it is preceded by a non-vowel which is
599 not the first letter of the word.
601 <p><a class=
"source-toggle" href=
"#"
602 onclick=
"toggleCode('M000010-source');return false;">[Source]
</a></p>
603 <div class=
"method-source-code" id=
"M000010-source">
605 <span class=
"ruby-comment cmt"># File lib/porter2stemmer/implementation.rb, line
143</span>
606 <span class=
"ruby-keyword kw">def
</span> <span class=
"ruby-identifier">porter2_step1c
</span>
607 <span class=
"ruby-keyword kw">if
</span> <span class=
"ruby-keyword kw">self
</span> <span class=
"ruby-operator">=~
</span> <span class=
"ruby-node">/.+#{Porter2::C}(y|Y)$/
</span>
608 <span class=
"ruby-keyword kw">self
</span>.
<span class=
"ruby-identifier">sub
</span>(
<span class=
"ruby-regexp re">/(y|Y)$/
</span>,
<span class=
"ruby-value str">'i'
</span>)
609 <span class=
"ruby-keyword kw">else
</span>
610 <span class=
"ruby-keyword kw">self
</span>
611 <span class=
"ruby-keyword kw">end
</span>
612 <span class=
"ruby-keyword kw">end
</span>
618 <div id=
"method-M000011" class=
"method-detail">
619 <a name=
"M000011"></a>
621 <div class=
"method-heading">
622 <a href=
"#M000011" class=
"method-signature">
623 <span class=
"method-name">porter2_step2
</span><span class=
"method-args">(gb_english = false)
</span>
627 <div class=
"method-description">
629 Search for the longest among the suffixes listed in the keys of
630 Porter2::STEP_2_MAPS. If one is found and that suffix occurs in R1, replace
631 it with the value found in STEP_2_MAPS.
634 (Suffixes
‘ogi
’ and
‘li
’ are treated as special
635 cases in the procedure.)
638 (If gb_english is
<tt>true
</tt>, replace the
‘iser
’ and
639 ‘isation
’ suffixes with
‘ise
’, similarly to how
640 ‘izer
’ and
‘ization
’ are treated.)
642 <p><a class=
"source-toggle" href=
"#"
643 onclick=
"toggleCode('M000011-source');return false;">[Source]
</a></p>
644 <div class=
"method-source-code" id=
"M000011-source">
646 <span class=
"ruby-comment cmt"># File lib/porter2stemmer/implementation.rb, line
160</span>
647 <span class=
"ruby-keyword kw">def
</span> <span class=
"ruby-identifier">porter2_step2
</span>(
<span class=
"ruby-identifier">gb_english
</span> =
<span class=
"ruby-keyword kw">false
</span>)
648 <span class=
"ruby-identifier">r1
</span> =
<span class=
"ruby-keyword kw">self
</span>.
<span class=
"ruby-identifier">porter2_r1
</span>
649 <span class=
"ruby-identifier">s2m
</span> =
<span class=
"ruby-constant">Porter2
</span><span class=
"ruby-operator">::
</span><span class=
"ruby-constant">STEP_2_MAPS
</span>.
<span class=
"ruby-identifier">dup
</span>
650 <span class=
"ruby-keyword kw">if
</span> <span class=
"ruby-identifier">gb_english
</span>
651 <span class=
"ruby-identifier">s2m
</span>[
<span class=
"ruby-value str">"iser
"</span>] =
<span class=
"ruby-value str">"ise
"</span>
652 <span class=
"ruby-identifier">s2m
</span>[
<span class=
"ruby-value str">"isation
"</span>] =
<span class=
"ruby-value str">"ise
"</span>
653 <span class=
"ruby-keyword kw">end
</span>
654 <span class=
"ruby-identifier">step_2_re
</span> =
<span class=
"ruby-constant">Regexp
</span>.
<span class=
"ruby-identifier">union
</span>(
<span class=
"ruby-identifier">s2m
</span>.
<span class=
"ruby-identifier">keys
</span>.
<span class=
"ruby-identifier">map
</span> {
<span class=
"ruby-operator">|
</span><span class=
"ruby-identifier">r
</span><span class=
"ruby-operator">|
</span> <span class=
"ruby-constant">Regexp
</span>.
<span class=
"ruby-identifier">new
</span>(
<span class=
"ruby-identifier">r
</span> <span class=
"ruby-operator">+
</span> <span class=
"ruby-value str">"$
"</span>)})
655 <span class=
"ruby-keyword kw">if
</span> <span class=
"ruby-keyword kw">self
</span> <span class=
"ruby-operator">=~
</span> <span class=
"ruby-identifier">step_2_re
</span>
656 <span class=
"ruby-keyword kw">if
</span> <span class=
"ruby-identifier">r1
</span> <span class=
"ruby-operator">=~
</span> <span class=
"ruby-node">/#{$
&}$/
</span>
657 <span class=
"ruby-keyword kw">self
</span>.
<span class=
"ruby-identifier">sub
</span>(
<span class=
"ruby-node">/#{$
&}$/
</span>,
<span class=
"ruby-identifier">s2m
</span>[
<span class=
"ruby-identifier">$
&</span>])
658 <span class=
"ruby-keyword kw">else
</span>
659 <span class=
"ruby-keyword kw">self
</span>
660 <span class=
"ruby-keyword kw">end
</span>
661 <span class=
"ruby-keyword kw">elsif
</span> <span class=
"ruby-identifier">r1
</span> <span class=
"ruby-operator">=~
</span> <span class=
"ruby-regexp re">/li$/
</span> <span class=
"ruby-keyword kw">and
</span> <span class=
"ruby-keyword kw">self
</span> <span class=
"ruby-operator">=~
</span> <span class=
"ruby-node">/(#{Porter2::Valid_LI})li$/
</span>
662 <span class=
"ruby-keyword kw">self
</span>.
<span class=
"ruby-identifier">sub
</span>(
<span class=
"ruby-regexp re">/li$/
</span>,
<span class=
"ruby-value str">''
</span>)
663 <span class=
"ruby-keyword kw">elsif
</span> <span class=
"ruby-identifier">r1
</span> <span class=
"ruby-operator">=~
</span> <span class=
"ruby-regexp re">/ogi$/
</span> <span class=
"ruby-keyword kw">and
</span> <span class=
"ruby-keyword kw">self
</span> <span class=
"ruby-operator">=~
</span> <span class=
"ruby-regexp re">/logi$/
</span>
664 <span class=
"ruby-keyword kw">self
</span>.
<span class=
"ruby-identifier">sub
</span>(
<span class=
"ruby-regexp re">/ogi$/
</span>,
<span class=
"ruby-value str">'og'
</span>)
665 <span class=
"ruby-keyword kw">else
</span>
666 <span class=
"ruby-keyword kw">self
</span>
667 <span class=
"ruby-keyword kw">end
</span>
668 <span class=
"ruby-keyword kw">end
</span>
674 <div id=
"method-M000012" class=
"method-detail">
675 <a name=
"M000012"></a>
677 <div class=
"method-heading">
678 <a href=
"#M000012" class=
"method-signature">
679 <span class=
"method-name">porter2_step3
</span><span class=
"method-args">(gb_english = false)
</span>
683 <div class=
"method-description">
685 Search for the longest among the suffixes listed in the keys of
686 Porter2::STEP_3_MAPS. If one is found and that suffix occurs in R1, replace
687 it with the value found in STEP_3_MAPS.
690 (Suffix
‘ative
’ is treated as a special case in the procedure.)
693 (If gb_english is
<tt>true
</tt>, replace the
‘alise
’ suffix
694 with
‘al
’, similarly to how
‘alize
’ is treated.)
696 <p><a class=
"source-toggle" href=
"#"
697 onclick=
"toggleCode('M000012-source');return false;">[Source]
</a></p>
698 <div class=
"method-source-code" id=
"M000012-source">
700 <span class=
"ruby-comment cmt"># File lib/porter2stemmer/implementation.rb, line
192</span>
701 <span class=
"ruby-keyword kw">def
</span> <span class=
"ruby-identifier">porter2_step3
</span>(
<span class=
"ruby-identifier">gb_english
</span> =
<span class=
"ruby-keyword kw">false
</span>)
702 <span class=
"ruby-keyword kw">if
</span> <span class=
"ruby-keyword kw">self
</span> <span class=
"ruby-operator">=~
</span> <span class=
"ruby-regexp re">/ative$/
</span> <span class=
"ruby-keyword kw">and
</span> <span class=
"ruby-keyword kw">self
</span>.
<span class=
"ruby-identifier">porter2_r2
</span> <span class=
"ruby-operator">=~
</span> <span class=
"ruby-regexp re">/ative$/
</span>
703 <span class=
"ruby-keyword kw">self
</span>.
<span class=
"ruby-identifier">sub
</span>(
<span class=
"ruby-regexp re">/ative$/
</span>,
<span class=
"ruby-value str">''
</span>)
704 <span class=
"ruby-keyword kw">else
</span>
705 <span class=
"ruby-identifier">s3m
</span> =
<span class=
"ruby-constant">Porter2
</span><span class=
"ruby-operator">::
</span><span class=
"ruby-constant">STEP_3_MAPS
</span>.
<span class=
"ruby-identifier">dup
</span>
706 <span class=
"ruby-keyword kw">if
</span> <span class=
"ruby-identifier">gb_english
</span>
707 <span class=
"ruby-identifier">s3m
</span>[
<span class=
"ruby-value str">"alise
"</span>] =
<span class=
"ruby-value str">"al
"</span>
708 <span class=
"ruby-keyword kw">end
</span>
709 <span class=
"ruby-identifier">step_3_re
</span> =
<span class=
"ruby-constant">Regexp
</span>.
<span class=
"ruby-identifier">union
</span>(
<span class=
"ruby-identifier">s3m
</span>.
<span class=
"ruby-identifier">keys
</span>.
<span class=
"ruby-identifier">map
</span> {
<span class=
"ruby-operator">|
</span><span class=
"ruby-identifier">r
</span><span class=
"ruby-operator">|
</span> <span class=
"ruby-constant">Regexp
</span>.
<span class=
"ruby-identifier">new
</span>(
<span class=
"ruby-identifier">r
</span> <span class=
"ruby-operator">+
</span> <span class=
"ruby-value str">"$
"</span>)})
710 <span class=
"ruby-identifier">r1
</span> =
<span class=
"ruby-keyword kw">self
</span>.
<span class=
"ruby-identifier">porter2_r1
</span>
711 <span class=
"ruby-keyword kw">if
</span> <span class=
"ruby-keyword kw">self
</span> <span class=
"ruby-operator">=~
</span> <span class=
"ruby-identifier">step_3_re
</span> <span class=
"ruby-keyword kw">and
</span> <span class=
"ruby-identifier">r1
</span> <span class=
"ruby-operator">=~
</span> <span class=
"ruby-node">/#{$
&}$/
</span>
712 <span class=
"ruby-keyword kw">self
</span>.
<span class=
"ruby-identifier">sub
</span>(
<span class=
"ruby-node">/#{$
&}$/
</span>,
<span class=
"ruby-identifier">s3m
</span>[
<span class=
"ruby-identifier">$
&</span>])
713 <span class=
"ruby-keyword kw">else
</span>
714 <span class=
"ruby-keyword kw">self
</span>
715 <span class=
"ruby-keyword kw">end
</span>
716 <span class=
"ruby-keyword kw">end
</span>
717 <span class=
"ruby-keyword kw">end
</span>
723 <div id=
"method-M000013" class=
"method-detail">
724 <a name=
"M000013"></a>
726 <div class=
"method-heading">
727 <a href=
"#M000013" class=
"method-signature">
728 <span class=
"method-name">porter2_step4
</span><span class=
"method-args">(gb_english = false)
</span>
732 <div class=
"method-description">
734 Search for the longest among the suffixes listed in the keys of
735 Porter2::STEP_4_MAPS. If one is found and that suffix occurs in R2, replace
736 it with the value found in STEP_4_MAPS.
739 (Suffix
‘ion
’ is treated as a special case in the procedure.)
742 (If gb_english is
<tt>true
</tt>, delete the
‘ise
’ suffix if
745 <p><a class=
"source-toggle" href=
"#"
746 onclick=
"toggleCode('M000013-source');return false;">[Source]
</a></p>
747 <div class=
"method-source-code" id=
"M000013-source">
749 <span class=
"ruby-comment cmt"># File lib/porter2stemmer/implementation.rb, line
218</span>
750 <span class=
"ruby-keyword kw">def
</span> <span class=
"ruby-identifier">porter2_step4
</span>(
<span class=
"ruby-identifier">gb_english
</span> =
<span class=
"ruby-keyword kw">false
</span>)
751 <span class=
"ruby-keyword kw">if
</span> <span class=
"ruby-keyword kw">self
</span>.
<span class=
"ruby-identifier">porter2_r2
</span> <span class=
"ruby-operator">=~
</span> <span class=
"ruby-regexp re">/ion$/
</span> <span class=
"ruby-keyword kw">and
</span> <span class=
"ruby-keyword kw">self
</span> <span class=
"ruby-operator">=~
</span> <span class=
"ruby-regexp re">/(s|t)ion$/
</span>
752 <span class=
"ruby-keyword kw">self
</span>.
<span class=
"ruby-identifier">sub
</span>(
<span class=
"ruby-regexp re">/ion$/
</span>,
<span class=
"ruby-value str">''
</span>)
753 <span class=
"ruby-keyword kw">else
</span>
754 <span class=
"ruby-identifier">s4m
</span> =
<span class=
"ruby-constant">Porter2
</span><span class=
"ruby-operator">::
</span><span class=
"ruby-constant">STEP_4_MAPS
</span>.
<span class=
"ruby-identifier">dup
</span>
755 <span class=
"ruby-keyword kw">if
</span> <span class=
"ruby-identifier">gb_english
</span>
756 <span class=
"ruby-identifier">s4m
</span>[
<span class=
"ruby-value str">"ise
"</span>] =
<span class=
"ruby-value str">""</span>
757 <span class=
"ruby-keyword kw">end
</span>
758 <span class=
"ruby-identifier">step_4_re
</span> =
<span class=
"ruby-constant">Regexp
</span>.
<span class=
"ruby-identifier">union
</span>(
<span class=
"ruby-identifier">s4m
</span>.
<span class=
"ruby-identifier">keys
</span>.
<span class=
"ruby-identifier">map
</span> {
<span class=
"ruby-operator">|
</span><span class=
"ruby-identifier">r
</span><span class=
"ruby-operator">|
</span> <span class=
"ruby-constant">Regexp
</span>.
<span class=
"ruby-identifier">new
</span>(
<span class=
"ruby-identifier">r
</span> <span class=
"ruby-operator">+
</span> <span class=
"ruby-value str">"$
"</span>)})
759 <span class=
"ruby-identifier">r2
</span> =
<span class=
"ruby-keyword kw">self
</span>.
<span class=
"ruby-identifier">porter2_r2
</span>
760 <span class=
"ruby-keyword kw">if
</span> <span class=
"ruby-keyword kw">self
</span> <span class=
"ruby-operator">=~
</span> <span class=
"ruby-identifier">step_4_re
</span>
761 <span class=
"ruby-keyword kw">if
</span> <span class=
"ruby-identifier">r2
</span> <span class=
"ruby-operator">=~
</span> <span class=
"ruby-node">/#{$
&}/
</span>
762 <span class=
"ruby-keyword kw">self
</span>.
<span class=
"ruby-identifier">sub
</span>(
<span class=
"ruby-node">/#{$
&}$/
</span>,
<span class=
"ruby-identifier">s4m
</span>[
<span class=
"ruby-identifier">$
&</span>])
763 <span class=
"ruby-keyword kw">else
</span>
764 <span class=
"ruby-keyword kw">self
</span>
765 <span class=
"ruby-keyword kw">end
</span>
766 <span class=
"ruby-keyword kw">else
</span>
767 <span class=
"ruby-keyword kw">self
</span>
768 <span class=
"ruby-keyword kw">end
</span>
769 <span class=
"ruby-keyword kw">end
</span>
770 <span class=
"ruby-keyword kw">end
</span>
776 <div id=
"method-M000014" class=
"method-detail">
777 <a name=
"M000014"></a>
779 <div class=
"method-heading">
780 <a href=
"#M000014" class=
"method-signature">
781 <span class=
"method-name">porter2_step5
</span><span class=
"method-args">()
</span>
785 <div class=
"method-description">
787 Search for the the following suffixes, and, if found, perform the action
791 <tr><td valign=
"top">e:
</td><td>delete if in R2, or in R1 and not preceded by a short syllable
794 <tr><td valign=
"top">l:
</td><td>delete if in R2 and preceded by l
798 <p><a class=
"source-toggle" href=
"#"
799 onclick=
"toggleCode('M000014-source');return false;">[Source]
</a></p>
800 <div class=
"method-source-code" id=
"M000014-source">
802 <span class=
"ruby-comment cmt"># File lib/porter2stemmer/implementation.rb, line
244</span>
803 <span class=
"ruby-keyword kw">def
</span> <span class=
"ruby-identifier">porter2_step5
</span>
804 <span class=
"ruby-keyword kw">if
</span> <span class=
"ruby-keyword kw">self
</span> <span class=
"ruby-operator">=~
</span> <span class=
"ruby-regexp re">/ll$/
</span> <span class=
"ruby-keyword kw">and
</span> <span class=
"ruby-keyword kw">self
</span>.
<span class=
"ruby-identifier">porter2_r2
</span> <span class=
"ruby-operator">=~
</span> <span class=
"ruby-regexp re">/l$/
</span>
805 <span class=
"ruby-keyword kw">self
</span>.
<span class=
"ruby-identifier">sub
</span>(
<span class=
"ruby-regexp re">/ll$/
</span>,
<span class=
"ruby-value str">'l'
</span>)
806 <span class=
"ruby-keyword kw">elsif
</span> <span class=
"ruby-keyword kw">self
</span> <span class=
"ruby-operator">=~
</span> <span class=
"ruby-regexp re">/e$/
</span> <span class=
"ruby-keyword kw">and
</span> <span class=
"ruby-keyword kw">self
</span>.
<span class=
"ruby-identifier">porter2_r2
</span> <span class=
"ruby-operator">=~
</span> <span class=
"ruby-regexp re">/e$/
</span>
807 <span class=
"ruby-keyword kw">self
</span>.
<span class=
"ruby-identifier">sub
</span>(
<span class=
"ruby-regexp re">/e$/
</span>,
<span class=
"ruby-value str">''
</span>)
808 <span class=
"ruby-keyword kw">else
</span>
809 <span class=
"ruby-identifier">r1
</span> =
<span class=
"ruby-keyword kw">self
</span>.
<span class=
"ruby-identifier">porter2_r1
</span>
810 <span class=
"ruby-keyword kw">if
</span> <span class=
"ruby-keyword kw">self
</span> <span class=
"ruby-operator">=~
</span> <span class=
"ruby-regexp re">/e$/
</span> <span class=
"ruby-keyword kw">and
</span> <span class=
"ruby-identifier">r1
</span> <span class=
"ruby-operator">=~
</span> <span class=
"ruby-regexp re">/e$/
</span> <span class=
"ruby-keyword kw">and
</span> <span class=
"ruby-keyword kw">not
</span> <span class=
"ruby-keyword kw">self
</span> <span class=
"ruby-operator">=~
</span> <span class=
"ruby-node">/#{Porter2::SHORT_SYLLABLE}e$/
</span>
811 <span class=
"ruby-keyword kw">self
</span>.
<span class=
"ruby-identifier">sub
</span>(
<span class=
"ruby-regexp re">/e$/
</span>,
<span class=
"ruby-value str">''
</span>)
812 <span class=
"ruby-keyword kw">else
</span>
813 <span class=
"ruby-keyword kw">self
</span>
814 <span class=
"ruby-keyword kw">end
</span>
815 <span class=
"ruby-keyword kw">end
</span>
816 <span class=
"ruby-keyword kw">end
</span>
822 <div id=
"method-M000001" class=
"method-detail">
823 <a name=
"M000001"></a>
825 <div class=
"method-heading">
826 <a href=
"#M000001" class=
"method-signature">
827 <span class=
"method-name">porter2_tidy
</span><span class=
"method-args">()
</span>
831 <div class=
"method-description">
833 Tidy up the word before we get down to the algorithm
835 <p><a class=
"source-toggle" href=
"#"
836 onclick=
"toggleCode('M000001-source');return false;">[Source]
</a></p>
837 <div class=
"method-source-code" id=
"M000001-source">
839 <span class=
"ruby-comment cmt"># File lib/porter2stemmer/implementation.rb, line
7</span>
840 <span class=
"ruby-keyword kw">def
</span> <span class=
"ruby-identifier">porter2_tidy
</span>
841 <span class=
"ruby-identifier">preword
</span> =
<span class=
"ruby-keyword kw">self
</span>.
<span class=
"ruby-identifier">to_s
</span>.
<span class=
"ruby-identifier">strip
</span>.
<span class=
"ruby-identifier">downcase
</span>
843 <span class=
"ruby-comment cmt"># map apostrophe-like characters to apostrophes
</span>
844 <span class=
"ruby-identifier">preword
</span>.
<span class=
"ruby-identifier">gsub!
</span>(
<span class=
"ruby-regexp re">/‘/
</span>,
<span class=
"ruby-value str">"'
"</span>)
845 <span class=
"ruby-identifier">preword
</span>.
<span class=
"ruby-identifier">gsub!
</span>(
<span class=
"ruby-regexp re">/’/
</span>,
<span class=
"ruby-value str">"'
"</span>)
847 <span class=
"ruby-identifier">preword
</span>
848 <span class=
"ruby-keyword kw">end
</span>
854 <div id=
"method-M000018" class=
"method-detail">
855 <a name=
"M000018"></a>
857 <div class=
"method-heading">
858 <span class=
"method-name">stem
</span><span class=
"method-args">(gb_english = false)
</span>
861 <div class=
"method-description">
863 Alias for
<a href=
"String.html#M000016">porter2_stem
</a>
875 <div id=
"validator-badges">
876 <p><small><a href=
"http://validator.w3.org/check/referer">[Validate]
</a></small></p>