a1ea5a480513b52980798f8a02149bee368ff9c9
[erd-marker.git] / spec / erd_handler / label_spec.rb
1 require 'spec_helper'
2
3 module ErdHandler
4 describe Label do
5 describe '#original' do
6 it "reports the string it was initialised with" do
7 test_label = "Test label"
8 l1 = Label.new test_label
9 l1.original.should == test_label
10 l1 = Label.new
11 l1.original.should == ""
12 end
13 end # original
14
15 describe '#processed' do
16 it "reports the original if no processing has been done" do
17 test_label = "Test label"
18 l1 = Label.new test_label
19 l1.processed.should == [test_label]
20 l1.original.should == test_label
21 end
22 end # processed
23
24 describe '#split' do
25 it "splits the original on the specified regexp" do
26 l1 = Label.new "Test label"
27 l1.split(/ /)
28 l1.processed.should == ["Test", "label"]
29 l1.original.should == "Test label"
30
31 l1 = Label.new "Test_label"
32 l1.split(/_/)
33 l1.processed.should == ["Test", "label"]
34
35 l1 = Label.new "Test label_string"
36 l1.split(/[ _]/)
37 l1.processed.should == ["Test", "label", "string"]
38 end
39
40 it "splits the original on camel case" do
41 l1 = Label.new "TestLabel"
42 l1.split :camel_case => true
43 l1.processed.should == ["Test", "Label"]
44 l1.original.should == "TestLabel"
45
46 l2 = Label.new "testLabel"
47 l2.split :camel_case => true
48 l2.processed.should == ["test", "Label"]
49 l2.original.should == "testLabel"
50 end
51
52 it "doesn't split the original on camel case if asked not to" do
53 l1 = Label.new "TestLabel"
54 l1.split :camel_case => false
55 l1.processed.should == ["TestLabel"]
56 l1.original.should == "TestLabel"
57
58 l2 = Label.new "TestLabel"
59 l2.split :camel_case => nil
60 l2.processed.should == ["TestLabel"]
61 l2.original.should == "TestLabel"
62 end
63
64 it "splits the original on numbers" do
65 l1 = Label.new "Test123Label"
66 l1.split :numbers => true
67 l1.processed.should == ["Test", "123", "Label"]
68 l1.original.should == "Test123Label"
69
70 l2 = Label.new "test1label"
71 l2.split :numbers => true
72 l2.processed.should == ["test", "1", "label"]
73 l2.original.should == "test1label"
74 end
75
76 it "doesn't split the original on numbers if asked not to" do
77 l1 = Label.new "Test123Label"
78 l1.split :numbers => false
79 l1.processed.should == ["Test123Label"]
80 l1.original.should == "Test123Label"
81
82 l2 = Label.new "Test123Label"
83 l2.split :numbers => nil
84 l2.processed.should == ["Test123Label"]
85 l2.original.should == "Test123Label"
86 end
87
88 it "splits the original using a default regexp" do
89 l1 = Label.new "Test label_string\tfred"
90 l1.split
91 l1.processed.should == ["Test", "label", "string", "fred"]
92 end
93
94 it "splits the original on camel case by default" do
95 l1 = Label.new "TestLabel"
96 l1.split
97 l1.processed.should == ["Test", "Label"]
98 l1.original.should == "TestLabel"
99 end
100
101 it "splits the original on numbers by default" do
102 l1 = Label.new "Test123Label"
103 l1.split
104 l1.processed.should == ["Test", "123", "Label"]
105 l1.original.should == "Test123Label"
106 end
107
108 it "splits the original on punctuation, whitespace, camel case, and numbers by default" do
109 l1 = Label.new "TestLabel is_split, 123 he,said456Fred"
110 l1.split
111 l1.processed.should == ["Test", "Label", "is", "split","123", "he", "said", "456", "Fred"]
112 l1.original.should == "TestLabel is_split, 123 he,said456Fred"
113 end
114
115 it "is idempotent" do
116 l1 = Label.new "TestLabel is_split, 123 he,said456Fred"
117 res1 = l1.split.dup
118 res2 = l1.split
119 res1.processed.should == res2.processed
120 l1.original.should == "TestLabel is_split, 123 he,said456Fred"
121 end
122 end # split
123
124 describe "#downcase" do
125 it "downcases all parts of the processed label" do
126 l1 = Label.new "Test label_string"
127 l1.split.downcase
128 l1.processed.should == ["test", "label", "string"]
129 end
130 end # downcase
131
132 describe "#stem" do
133 it "stems all parts of the processed label" do
134 l1 = Label.new "testing labeller string pontificated"
135 l1.split.stem
136 l1.processed.should == ["test", "label", "string", "pontif"]
137 end
138 end # stem
139
140 describe "#tidy" do
141 it "tidies a label" do
142 l1 = Label.new "testingLabeller string, he_pontificated"
143 l2 = Label.new l1.original
144 l1.tidy
145 l2.split.downcase.stem
146 l1.processed.should == l2.processed
147 end
148 end # tidy
149
150 describe "#levenshtein" do
151 it "calculates the Levenshtein distance of the processed string" do
152 l1 = Label.new "Fred"
153 l1.levenshtein("Fred").should == 0
154 l1.levenshtein("Free").should == 1
155 l1.levenshtein("").should == 4
156 l2 = Label.new
157 l2.levenshtein("Free").should == 4
158 l2.levenshtein("").should == 0
159 l3 = Label.new "meilenstein"
160 l3.levenshtein("levenshtein").should == 4
161 l4 = Label.new "testingLabeller string, he_pontificated"
162 l4.tidy.levenshtein("testlabelstringhepontif").should == 0
163 l4.tidy.levenshtein("testlabelXstringhepontif").should == 1
164 end
165
166 it "calculates the Levenshtein distance between Labels" do
167 l1 = Label.new "meilenstein"
168 l2 = Label.new "levenshtein"
169 l1.levenshtein(l2).should == 4
170 end
171 end
172
173 end
174 end