1 <?xml version=
"1.0" encoding=
"iso-8859-1"?>
3 PUBLIC
"-//W3C//DTD XHTML 1.0 Transitional//EN"
4 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
6 <html xmlns=
"http://www.w3.org/1999/xhtml" xml:
lang=
"en" lang=
"en">
8 <title>Module: Porter2
</title>
9 <meta http-equiv=
"Content-Type" content=
"text/html; charset=iso-8859-1" />
10 <meta http-equiv=
"Content-Script-Type" content=
"text/javascript" />
11 <link rel=
"stylesheet" href=
".././rdoc-style.css" type=
"text/css" media=
"screen" />
12 <script type=
"text/javascript">
15 function popupCode( url ) {
16 window.open(url,
"Code",
"resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
19 function toggleCode( id ) {
20 if ( document.getElementById )
21 elem = document.getElementById( id );
22 else if ( document.all )
23 elem = eval(
"document.all." + id );
27 elemStyle = elem.style;
29 if ( elemStyle.display !=
"block" ) {
30 elemStyle.display =
"block"
32 elemStyle.display =
"none"
38 // Make codeblocks hidden by default
39 document.writeln(
"<style type=\"text/css\
">div.method-source-code { display: none }</style>" )
49 <div id=
"classHeader">
50 <table class=
"header-table">
51 <tr class=
"top-aligned-row">
52 <td><strong>Module
</strong></td>
53 <td class=
"class-name-in-header">Porter2
</td>
55 <tr class=
"top-aligned-row">
56 <td><strong>In:
</strong></td>
58 <a href=
"../files/lib/porter2stemmer/constants_rb.html">
59 lib/porter2stemmer/constants.rb
67 <!-- banner header -->
69 <div id=
"bodyContent">
73 <div id=
"contextContent">
75 <div id=
"description">
77 Constants for the Porter
2 stemmer
94 <div id=
"constants-list">
95 <h3 class=
"section-bar">Constants
</h3>
97 <div class=
"name-list">
98 <table summary=
"Constants">
99 <tr class=
"top-aligned-row context-row">
100 <td class=
"context-item-name">C
</td>
102 <td class=
"context-item-value">"[^aeiouy]
"</td>
103 <td width=
"3em"> </td>
104 <td class=
"context-item-desc">
109 <tr class=
"top-aligned-row context-row">
110 <td class=
"context-item-name">V
</td>
112 <td class=
"context-item-value">"[aeiouy]
"</td>
113 <td width=
"3em"> </td>
114 <td class=
"context-item-desc">
119 <tr class=
"top-aligned-row context-row">
120 <td class=
"context-item-name">CW
</td>
122 <td class=
"context-item-value">"[^aeiouywxY]
"</td>
123 <td width=
"3em"> </td>
124 <td class=
"context-item-desc">
125 A non-vowel other than w, x, or Y
129 <tr class=
"top-aligned-row context-row">
130 <td class=
"context-item-name">Double
</td>
132 <td class=
"context-item-value">"(bb|dd|ff|gg|mm|nn|pp|rr|tt)
"</td>
133 <td width=
"3em"> </td>
134 <td class=
"context-item-desc">
135 Doubles created when adding a suffix: these are undoubled when stemmed
139 <tr class=
"top-aligned-row context-row">
140 <td class=
"context-item-name">Valid_LI
</td>
142 <td class=
"context-item-value">"[cdeghkmnrt]
"</td>
143 <td width=
"3em"> </td>
144 <td class=
"context-item-desc">
145 A valid letter that can come before
‘li
’ (or
‘ly
’)
149 <tr class=
"top-aligned-row context-row">
150 <td class=
"context-item-name">SHORT_SYLLABLE
</td>
152 <td class=
"context-item-value">"((#{C}#{V}#{CW})|(^#{V}#{C}))
"</td>
153 <td width=
"3em"> </td>
154 <td class=
"context-item-desc">
155 A specification for a short syllable.
158 A short syllable in a word is either:
161 <li>a vowel followed by a non-vowel other than w, x or Y and preceded by a
165 <li>a vowel at the beginning of the word followed by a non-vowel.
170 (The original document is silent on whether sequences of two or more
171 non-vowels make a syllable long. But as this specification is only used to
172 find sequences of non-vowel - vowel - non-vowel - end-of-word, this
173 ambiguity does not have an effect.)
177 <tr class=
"top-aligned-row context-row">
178 <td class=
"context-item-name">STEP_2_MAPS
</td>
180 <td class=
"context-item-value">{
"tional
" =
> "tion
",
"enci
" =
> "ence
",
"anci
" =
> "ance
",
"abli
" =
> "able
",
"entli
" =
> "ent
",
"ization
" =
> "ize
",
"izer
" =
> "ize
",
"ational
" =
> "ate
",
"ation
" =
> "ate
",
"ator
" =
> "ate
",
"alism
" =
> "al
",
"aliti
" =
> "al
",
"alli
" =
> "al
",
"fulness
" =
> "ful
",
"ousli
" =
> "ous
",
"ousness
" =
> "ous
",
"iveness
" =
> "ive
",
"iviti
" =
> "ive
",
"biliti
" =
> "ble
",
"bli
" =
> "ble
",
"fulli
" =
> "ful
",
"lessli
" =
> "less
" }
</td>
181 <td width=
"3em"> </td>
182 <td class=
"context-item-desc">
183 Suffix transformations used in porter2_step2. (ogi, li endings dealt with
188 <tr class=
"top-aligned-row context-row">
189 <td class=
"context-item-name">STEP_3_MAPS
</td>
191 <td class=
"context-item-value">{
"tional
" =
> "tion
",
"ational
" =
> "ate
",
"alize
" =
> "al
",
"icate
" =
> "ic
",
"iciti
" =
> "ic
",
"ical
" =
> "ic
",
"ful
" =
> "",
"ness
" =
> "" }
</td>
192 <td width=
"3em"> </td>
193 <td class=
"context-item-desc">
194 Suffix transformations used in porter2_step3. (ative ending dealt with in
199 <tr class=
"top-aligned-row context-row">
200 <td class=
"context-item-name">STEP_4_MAPS
</td>
202 <td class=
"context-item-value">{
"al
" =
> "",
"ance
" =
> "",
"ence
" =
> "",
"er
" =
> "",
"ic
" =
> "",
"able
" =
> "",
"ible
" =
> "",
"ant
" =
> "",
"ement
" =
> "",
"ment
" =
> "",
"ent
" =
> "",
"ism
" =
> "",
"ate
" =
> "",
"iti
" =
> "",
"ous
" =
> "",
"ive
" =
> "",
"ize
" =
> "" }
</td>
203 <td width=
"3em"> </td>
204 <td class=
"context-item-desc">
205 Suffix transformations used in porter2_step4. (ion ending dealt with in
210 <tr class=
"top-aligned-row context-row">
211 <td class=
"context-item-name">SPECIAL_CASES
</td>
213 <td class=
"context-item-value">{
"skis
" =
> "ski
",
"skies
" =
> "sky
",
"dying
" =
> "die
",
"lying
" =
> "lie
",
"tying
" =
> "tie
",
"idly
" =
> "idl
",
"gently
" =
> "gentl
",
"ugly
" =
> "ugli
",
"early
" =
> "earli
",
"only
" =
> "onli
",
"singly
" =
>"singl
",
"sky
" =
> "sky
",
"news
" =
> "news
",
"howe
" =
> "howe
",
"atlas
" =
> "atlas
",
"cosmos
" =
> "cosmos
",
"bias
" =
> "bias
",
"andes
" =
> "andes
" }
</td>
214 <td width=
"3em"> </td>
215 <td class=
"context-item-desc">
216 Special-case stemmings
220 <tr class=
"top-aligned-row context-row">
221 <td class=
"context-item-name">STEP_1A_SPECIAL_CASES
</td>
223 <td class=
"context-item-value">%w[ inning outing canning herring earring proceed exceed succeed ]
</td>
224 <td width=
"3em"> </td>
225 <td class=
"context-item-desc">
226 Special case words to stop processing after step
1a.
239 <!-- if method_list -->
245 <div id=
"validator-badges">
246 <p><small><a href=
"http://validator.w3.org/check/referer">[Validate]
</a></small></p>