Added simplecov
[erd-marker.git] / lib / erd_handler / label.rb
1 class Label
2
3 attr_reader :original, :processed
4
5 def initialize(original, auto_tidy = false)
6 @original = original
7 @processed = [original]
8 self.tidy if auto_tidy
9 end
10
11 def split(opts = {})
12 regexp = /[\t\n _,.-]+/
13 split_camel_case = true
14 split_numbers = true
15 if opts.class == Regexp
16 regexp = opts
17 else
18 regexp = opts[:rexexp] if opts.has_key? :regexp
19 split_camel_case = opts[:camel_case] if opts.has_key? :camel_case
20 split_numbers = opts[:numbers] if opts.has_key? :numbers
21 end
22 @processed = @processed.map do |segment|
23 segment.split(regexp)
24 end.flatten
25
26 if split_camel_case
27 @processed = @processed.map do |segment|
28 segment.split(/(?<=[a-z])(?=[A-Z])/)
29 end.flatten
30 end
31
32 if split_numbers
33 @processed = @processed.map do |segment|
34 segment.split(/(?:(?<!\d)(?=\d))|(?:(?<=\d)(?!\d))/)
35 end.flatten
36 end
37 self
38 end
39
40 def downcase
41 @processed = @processed.map do |segment| segment.downcase end
42 self
43 end
44
45 def stem(gb_english = false)
46 @processed = @processed.map do |segment| segment.stem(gb_english) end
47 self
48 end
49
50 def tidy
51 self.split.downcase.stem
52 end
53
54 def length
55 @processed.join('').length
56 end
57
58 def levenshtein(other_object)
59 if other_object.class.ancestors.include? Label
60 other = other_object.processed.join('')
61 else
62 other = other_object
63 end
64 s = @processed.join('')
65 n = s.length
66 m = other.length
67 return m if (0 == n)
68 return n if (0 == m)
69
70 d = Array.new(m+1) {Array.new(n+1, 0)} # one row for each characer in other, one column for each charater in self
71
72 (0..n).each {|i| d[0][i] = i}
73 (0..m).each {|j| d[j][0] = j}
74 (1..m).each do |i|
75 (1..n).each do |j|
76 d[i][j] = [d[i-1][j-1] + ((s[j-1] == other[i-1]) ? 0 : 1), # substitution
77 d[i-1][j] + 1, # insertion
78 d[i][j-1] + 1 # deletion
79 ].min
80 end
81 end
82 d[-1][-1]
83 end
84
85 alias :edit_distance :levenshtein
86 end