From fdc3bc03b2d7759927ae34d5382d2168be6a2bae Mon Sep 17 00:00:00 2001 From: Neil Smith Date: Fri, 2 Mar 2012 17:56:35 +0000 Subject: [PATCH 1/1] Fiddling around with labels and boxes --- lib/erd_handler/box.rb | 4 +++ lib/erd_handler/erd.rb | 22 +++++++++++++ lib/erd_handler/label.rb | 31 +++++++++--------- spec/erd_handler/box_spec.rb | 12 ++++++- spec/erd_handler/erd_spec.rb | 57 ++++++++++++++++++++++++++++++++++ spec/erd_handler/label_spec.rb | 35 +++++++++++++++++++-- 6 files changed, 141 insertions(+), 20 deletions(-) diff --git a/lib/erd_handler/box.rb b/lib/erd_handler/box.rb index 75543eb..244b411 100644 --- a/lib/erd_handler/box.rb +++ b/lib/erd_handler/box.rb @@ -28,5 +28,9 @@ module ErdHandler other.contains?(self) end + def similarity(other) + 1.0 - self.name.edit_distance(other.name) / self.name.length.to_f + end + end end diff --git a/lib/erd_handler/erd.rb b/lib/erd_handler/erd.rb index bf74007..2c10364 100644 --- a/lib/erd_handler/erd.rb +++ b/lib/erd_handler/erd.rb @@ -22,5 +22,27 @@ module ErdHandler end self end + + # The minimal meaningful units of an ERD are: + # Each box in isolation + # Each link, with the associated boxes at its ends + def mmus + mmus = [] + self.vertices.each do |b| + mmu = Erd.new + mmu << b + mmus << mmu + end + self.edges.each do |l| + mmu = Erd.new + l.vertices.each do |b| + mmu << b + end + mmu << l + mmus << mmu + end + mmus + end + end end diff --git a/lib/erd_handler/label.rb b/lib/erd_handler/label.rb index a995432..9369190 100644 --- a/lib/erd_handler/label.rb +++ b/lib/erd_handler/label.rb @@ -2,27 +2,22 @@ class Label attr_reader :original, :processed - def initialize(original = "") + def initialize(original, auto_tidy = false) @original = original @processed = [original] + self.tidy if auto_tidy end def split(opts = {}) + regexp = /[\t\n _,.-]+/ + split_camel_case = true + split_numbers = true if opts.class == Regexp regexp = opts - split_camel_case = true else - regexp = opts[:regexp] || /[\t\n _,.-]+/ - if opts.has_key? :camel_case - split_camel_case = opts[:camel_case] - else - split_camel_case = true - end - if opts.has_key? :numbers - split_numbers = opts[:numbers] - else - split_numbers = true - end + regexp = opts[:rexexp] if opts.has_key? :regexp + split_camel_case = opts[:camel_case] if opts.has_key? :camel_case + split_numbers = opts[:numbers] if opts.has_key? :numbers end @processed = @processed.map do |segment| segment.split(regexp) @@ -56,8 +51,12 @@ class Label self.split.downcase.stem end + def length + @processed.join('').length + end + def levenshtein(other_object) - if other_object.class == Label + if other_object.class.ancestors.include? Label other = other_object.processed.join('') else other = other_object @@ -75,8 +74,8 @@ class Label (1..m).each do |i| (1..n).each do |j| d[i][j] = [d[i-1][j-1] + ((s[j-1] == other[i-1]) ? 0 : 1), # substitution - d[i-1][j] + 1, # deletion - d[i][j-1] + 1 # addition + d[i-1][j] + 1, # insertion + d[i][j-1] + 1 # deletion ].min end end diff --git a/spec/erd_handler/box_spec.rb b/spec/erd_handler/box_spec.rb index 82005d6..1d47ed7 100644 --- a/spec/erd_handler/box_spec.rb +++ b/spec/erd_handler/box_spec.rb @@ -36,7 +36,17 @@ module ErdHandler b1.within?(b3).should == false b3.within?(b1).should == false end - end # contains? + end # within? + + describe "#similarity" do + it "find the similarity of two boxes" do + b1 = Box.new + b1.name = Label.new "box 1", true + b2 = Box.new + b2.name = Label.new "box 2", true + b1.similarity(b2).should be_within(0.005).of(0.75) + end + end end end diff --git a/spec/erd_handler/erd_spec.rb b/spec/erd_handler/erd_spec.rb index d6d5561..6b33dd8 100644 --- a/spec/erd_handler/erd_spec.rb +++ b/spec/erd_handler/erd_spec.rb @@ -141,5 +141,62 @@ module ErdHandler l5.connections.find {|c| c.crowsfoot == :no}.end.should be b2 end end # #read + + describe "#mmus" do + it "finds three MMUs in a simple ERD" do + erd = Erd.new + erd.read(File.new("spec/fixtures/two_boxes_one_link_erd.xml")) + mmus = erd.mmus + + mmus.should have(3).items + single_box_mmus = mmus.select {|m| m.vertices.length == 1} + single_link_mmus = mmus.select {|m| m.edges.length == 1} + + single_box_mmus.should have(2).items + single_box_mmus.each do |m| + m.should have(1).vertices + m.should have(0).edges + end + + single_link_mmus.should have(1).items + single_link_mmus.each do |m| + m.should have(2).vertices + m.should have(1).edges + m.edges.first.should have(2).connections + m.vertices.each do |v| + m.vertices.should include(v) + end + end + end + + it "finds many MMUs in a complex ERD" do + erd = Erd.new + erd.read(File.new("spec/fixtures/complex_erd.xml")) + mmus = erd.mmus + + mmus.should have(11).items + single_box_mmus = mmus.select {|m| m.vertices.length == 1} + single_link_mmus = mmus.select {|m| m.edges.length == 1} + + single_box_mmus.should have(5).items + single_box_mmus.each do |m| + m.should have(1).vertices + m.should have(0).edges + end + single_box_mmus.map {|m| m.vertices.first.name.original}.uniq.should have(5).items + + single_link_mmus.should have(6).items + single_link_mmus.each do |m| + m.should have(2).vertices + m.should have(1).edges + m.edges.first.should have(2).connections + m.vertices.each do |v| + m.vertices.should include(v) + end + end + single_link_mmus.map {|m| m.edges.first.name.original}.uniq.should have(6).items + end + + end # #mmus end end diff --git a/spec/erd_handler/label_spec.rb b/spec/erd_handler/label_spec.rb index a1ea5a4..6f09b95 100644 --- a/spec/erd_handler/label_spec.rb +++ b/spec/erd_handler/label_spec.rb @@ -2,13 +2,34 @@ require 'spec_helper' module ErdHandler describe Label do + describe '#initialize' do + it "should give an error if not given an original string" do + # Label.new.should raise_error(ArgumentError) + expect {Label.new}.to raise_error(ArgumentError) + end + + it "should create a copy of the original as the processed string" do + test_label = "Test label" + l1 = Label.new test_label + l1.original.should == test_label + end + + it "should tidy the processed string if asked" do + test_label = "testingLabeller string, he_pontificated" + l1 = Label.new test_label.dup, true + l2 = Label.new test_label.dup + l2.split.downcase.stem + l1.processed.should == l2.processed + end + end # initialze + describe '#original' do it "reports the string it was initialised with" do test_label = "Test label" l1 = Label.new test_label l1.original.should == test_label - l1 = Label.new - l1.original.should == "" + #l1 = Label.new + #l1.original.should == "" end end # original @@ -147,13 +168,21 @@ module ErdHandler end end # tidy + describe "#length" do + it "returns the length of the processed label" do + l1 = Label.new "testingLabeller string, he_pontificated" + l1.tidy + l1.length.should == l1.processed.join('').length + end + end # length + describe "#levenshtein" do it "calculates the Levenshtein distance of the processed string" do l1 = Label.new "Fred" l1.levenshtein("Fred").should == 0 l1.levenshtein("Free").should == 1 l1.levenshtein("").should == 4 - l2 = Label.new + l2 = Label.new "" l2.levenshtein("Free").should == 4 l2.levenshtein("").should == 0 l3 = Label.new "meilenstein" -- 2.34.1