other.contains?(self)
end
+ def similarity(other)
+ 1.0 - self.name.edit_distance(other.name) / self.name.length.to_f
+ end
+
end
end
end
self
end
+
+ # The minimal meaningful units of an ERD are:
+ # Each box in isolation
+ # Each link, with the associated boxes at its ends
+ def mmus
+ mmus = []
+ self.vertices.each do |b|
+ mmu = Erd.new
+ mmu << b
+ mmus << mmu
+ end
+ self.edges.each do |l|
+ mmu = Erd.new
+ l.vertices.each do |b|
+ mmu << b
+ end
+ mmu << l
+ mmus << mmu
+ end
+ mmus
+ end
+
end
end
attr_reader :original, :processed
- def initialize(original = "")
+ def initialize(original, auto_tidy = false)
@original = original
@processed = [original]
+ self.tidy if auto_tidy
end
def split(opts = {})
+ regexp = /[\t\n _,.-]+/
+ split_camel_case = true
+ split_numbers = true
if opts.class == Regexp
regexp = opts
- split_camel_case = true
else
- regexp = opts[:regexp] || /[\t\n _,.-]+/
- if opts.has_key? :camel_case
- split_camel_case = opts[:camel_case]
- else
- split_camel_case = true
- end
- if opts.has_key? :numbers
- split_numbers = opts[:numbers]
- else
- split_numbers = true
- end
+ regexp = opts[:rexexp] if opts.has_key? :regexp
+ split_camel_case = opts[:camel_case] if opts.has_key? :camel_case
+ split_numbers = opts[:numbers] if opts.has_key? :numbers
end
@processed = @processed.map do |segment|
segment.split(regexp)
self.split.downcase.stem
end
+ def length
+ @processed.join('').length
+ end
+
def levenshtein(other_object)
- if other_object.class == Label
+ if other_object.class.ancestors.include? Label
other = other_object.processed.join('')
else
other = other_object
(1..m).each do |i|
(1..n).each do |j|
d[i][j] = [d[i-1][j-1] + ((s[j-1] == other[i-1]) ? 0 : 1), # substitution
- d[i-1][j] + 1, # deletion
- d[i][j-1] + 1 # addition
+ d[i-1][j] + 1, # insertion
+ d[i][j-1] + 1 # deletion
].min
end
end
b1.within?(b3).should == false
b3.within?(b1).should == false
end
- end # contains?
+ end # within?
+
+ describe "#similarity" do
+ it "find the similarity of two boxes" do
+ b1 = Box.new
+ b1.name = Label.new "box 1", true
+ b2 = Box.new
+ b2.name = Label.new "box 2", true
+ b1.similarity(b2).should be_within(0.005).of(0.75)
+ end
+ end
end
end
l5.connections.find {|c| c.crowsfoot == :no}.end.should be b2
end
end # #read
+
+ describe "#mmus" do
+ it "finds three MMUs in a simple ERD" do
+ erd = Erd.new
+ erd.read(File.new("spec/fixtures/two_boxes_one_link_erd.xml"))
+ mmus = erd.mmus
+
+ mmus.should have(3).items
+ single_box_mmus = mmus.select {|m| m.vertices.length == 1}
+ single_link_mmus = mmus.select {|m| m.edges.length == 1}
+
+ single_box_mmus.should have(2).items
+ single_box_mmus.each do |m|
+ m.should have(1).vertices
+ m.should have(0).edges
+ end
+
+ single_link_mmus.should have(1).items
+ single_link_mmus.each do |m|
+ m.should have(2).vertices
+ m.should have(1).edges
+ m.edges.first.should have(2).connections
+ m.vertices.each do |v|
+ m.vertices.should include(v)
+ end
+ end
+ end
+
+ it "finds many MMUs in a complex ERD" do
+ erd = Erd.new
+ erd.read(File.new("spec/fixtures/complex_erd.xml"))
+ mmus = erd.mmus
+
+ mmus.should have(11).items
+ single_box_mmus = mmus.select {|m| m.vertices.length == 1}
+ single_link_mmus = mmus.select {|m| m.edges.length == 1}
+
+ single_box_mmus.should have(5).items
+ single_box_mmus.each do |m|
+ m.should have(1).vertices
+ m.should have(0).edges
+ end
+ single_box_mmus.map {|m| m.vertices.first.name.original}.uniq.should have(5).items
+
+ single_link_mmus.should have(6).items
+ single_link_mmus.each do |m|
+ m.should have(2).vertices
+ m.should have(1).edges
+ m.edges.first.should have(2).connections
+ m.vertices.each do |v|
+ m.vertices.should include(v)
+ end
+ end
+ single_link_mmus.map {|m| m.edges.first.name.original}.uniq.should have(6).items
+ end
+
+ end # #mmus
end
end
module ErdHandler
describe Label do
+ describe '#initialize' do
+ it "should give an error if not given an original string" do
+ # Label.new.should raise_error(ArgumentError)
+ expect {Label.new}.to raise_error(ArgumentError)
+ end
+
+ it "should create a copy of the original as the processed string" do
+ test_label = "Test label"
+ l1 = Label.new test_label
+ l1.original.should == test_label
+ end
+
+ it "should tidy the processed string if asked" do
+ test_label = "testingLabeller string, he_pontificated"
+ l1 = Label.new test_label.dup, true
+ l2 = Label.new test_label.dup
+ l2.split.downcase.stem
+ l1.processed.should == l2.processed
+ end
+ end # initialze
+
describe '#original' do
it "reports the string it was initialised with" do
test_label = "Test label"
l1 = Label.new test_label
l1.original.should == test_label
- l1 = Label.new
- l1.original.should == ""
+ #l1 = Label.new
+ #l1.original.should == ""
end
end # original
end
end # tidy
+ describe "#length" do
+ it "returns the length of the processed label" do
+ l1 = Label.new "testingLabeller string, he_pontificated"
+ l1.tidy
+ l1.length.should == l1.processed.join('').length
+ end
+ end # length
+
describe "#levenshtein" do
it "calculates the Levenshtein distance of the processed string" do
l1 = Label.new "Fred"
l1.levenshtein("Fred").should == 0
l1.levenshtein("Free").should == 1
l1.levenshtein("").should == 4
- l2 = Label.new
+ l2 = Label.new ""
l2.levenshtein("Free").should == 4
l2.levenshtein("").should == 0
l3 = Label.new "meilenstein"