3 attr_reader
:original, :processed
5 def initialize(original
)
7 @processed = [original
]
11 if opts
.class == Regexp
13 split_camel_case
= true
15 regexp
= opts
[:regexp] || /[\t\n _,.-]+/
16 if opts
.has_key
? :camel_case
17 split_camel_case
= opts
[:camel_case]
19 split_camel_case
= true
21 if opts
.has_key
? :numbers
22 split_numbers
= opts
[:numbers]
27 @processed = @processed.map
do |segment
|
32 @processed = @processed.map
do |segment
|
33 segment
.split(/(?<=[a-z])(?=[A-Z])/)
38 @processed = @processed.map
do |segment
|
39 segment
.split(/(?:(?<!\d)(?=\d))|(?:(?<=\d)(?!\d))/)
46 @processed = @processed.map
do |segment
| segment
.downcase
end
50 def stem(gb_english
= false)
51 @processed = @processed.map
do |segment
| segment
.stem(gb_english
) end
56 self.split
.downcase
.stem