3 attr_reader
:original, :processed
5 def initialize(original
, auto_tidy
= false)
7 @processed = [original
]
12 regexp
= /[\t\n _,.-]+/
13 split_camel_case
= true
15 if opts
.class == Regexp
18 regexp
= opts
[:rexexp] if opts
.has_key
? :regexp
19 split_camel_case
= opts
[:camel_case] if opts
.has_key
? :camel_case
20 split_numbers
= opts
[:numbers] if opts
.has_key
? :numbers
22 @processed = @processed.map
do |segment
|
27 @processed = @processed.map
do |segment
|
28 segment
.split(/(?<=[a-z])(?=[A-Z])/)
33 @processed = @processed.map
do |segment
|
34 segment
.split(/(?:(?<!\d)(?=\d))|(?:(?<=\d)(?!\d))/)
41 @processed = @processed.map
do |segment
| segment
.downcase
end
45 def stem(gb_english
= false)
46 @processed = @processed.map
do |segment
| segment
.stem(gb_english
) end
51 self.split
.downcase
.stem
55 @processed.join('').length
58 def levenshtein(other_object
)
59 if other_object
.class.ancestors
.include? Label
60 other
= other_object
.processed
.join('')
64 s
= @processed.join('')
70 d
= Array
.new(m
+1) {Array
.new(n
+1, 0)} # one row for each characer in other, one column for each charater in self
72 (0..n
).each
{|i
| d
[0][i
] = i
}
73 (0..m
).each
{|j
| d
[j
][0] = j
}
76 d
[i
][j
] = [d
[i-1
][j-1
] + ((s
[j-1
] == other
[i-1
]) ? 0 : 1), # substitution
77 d
[i-1
][j
] + 1, # insertion
78 d
[i
][j-1
] + 1 # deletion
85 alias :edit_distance :levenshtein