Fiddling around with labels and boxes
authorNeil Smith <neil.github@njae.me.uk>
Fri, 2 Mar 2012 17:56:35 +0000 (17:56 +0000)
committerNeil Smith <neil.github@njae.me.uk>
Fri, 2 Mar 2012 17:56:35 +0000 (17:56 +0000)
lib/erd_handler/box.rb
lib/erd_handler/erd.rb
lib/erd_handler/label.rb
spec/erd_handler/box_spec.rb
spec/erd_handler/erd_spec.rb
spec/erd_handler/label_spec.rb

index 75543eb22674b1dcc92f5734dcc6ea6366a929e3..244b411fc597e663ba4b637685dcf5ddf1f177cc 100644 (file)
@@ -28,5 +28,9 @@ module ErdHandler
       other.contains?(self)
     end
     
       other.contains?(self)
     end
     
+    def similarity(other)
+      1.0 - self.name.edit_distance(other.name) / self.name.length.to_f
+    end
+    
   end
 end
   end
 end
index bf740072d432b883cf7e58d891d56c4431418051..2c10364ef90983bbb74009d2d12d46eccea3e142 100644 (file)
@@ -22,5 +22,27 @@ module ErdHandler
       end
       self
     end
       end
       self
     end
+    
+    # The minimal meaningful units of an ERD are:
+    #  Each box in isolation
+    #  Each link, with the associated boxes at its ends
+    def mmus
+      mmus = []
+      self.vertices.each do |b| 
+        mmu = Erd.new
+        mmu << b
+        mmus << mmu 
+      end
+      self.edges.each do |l| 
+        mmu = Erd.new
+        l.vertices.each do |b|
+          mmu << b
+        end
+        mmu << l
+        mmus << mmu
+      end
+      mmus
+    end
+    
   end
 end
   end
 end
index a995432a3cf544240cc2a7d73984cb7c1259ddb7..9369190ad12ba4e633e00f74fd0244b6446962b3 100644 (file)
@@ -2,27 +2,22 @@ class Label
   
   attr_reader :original, :processed
   
   
   attr_reader :original, :processed
   
-  def initialize(original = "")
+  def initialize(original, auto_tidy = false)
     @original = original
     @processed = [original]
     @original = original
     @processed = [original]
+    self.tidy if auto_tidy
   end
   
   def split(opts = {})
   end
   
   def split(opts = {})
+    regexp = /[\t\n _,.-]+/
+    split_camel_case = true
+    split_numbers = true
     if opts.class == Regexp
       regexp = opts
     if opts.class == Regexp
       regexp = opts
-      split_camel_case = true
     else
     else
-      regexp = opts[:regexp] || /[\t\n _,.-]+/
-      if opts.has_key? :camel_case
-        split_camel_case = opts[:camel_case]
-      else
-        split_camel_case = true
-      end
-      if opts.has_key? :numbers
-        split_numbers = opts[:numbers]
-      else
-        split_numbers = true
-      end
+      regexp = opts[:rexexp] if opts.has_key? :regexp
+      split_camel_case = opts[:camel_case] if opts.has_key? :camel_case
+      split_numbers = opts[:numbers] if opts.has_key? :numbers
     end
     @processed = @processed.map do |segment|
       segment.split(regexp)
     end
     @processed = @processed.map do |segment|
       segment.split(regexp)
@@ -56,8 +51,12 @@ class Label
     self.split.downcase.stem
   end
   
     self.split.downcase.stem
   end
   
+  def length
+    @processed.join('').length
+  end
+  
   def levenshtein(other_object)
   def levenshtein(other_object)
-    if other_object.class == Label
+    if other_object.class.ancestors.include? Label
       other = other_object.processed.join('')
     else
       other = other_object
       other = other_object.processed.join('')
     else
       other = other_object
@@ -75,8 +74,8 @@ class Label
     (1..m).each do |i|
       (1..n).each do |j|
         d[i][j] = [d[i-1][j-1] + ((s[j-1] == other[i-1]) ? 0 : 1),  # substitution
     (1..m).each do |i|
       (1..n).each do |j|
         d[i][j] = [d[i-1][j-1] + ((s[j-1] == other[i-1]) ? 0 : 1),  # substitution
-                   d[i-1][j] + 1, # deletion
-                   d[i][j-1] + 1  # addition
+                   d[i-1][j] + 1, # insertion
+                   d[i][j-1] + 1  # deletion
                    ].min
       end
     end
                    ].min
       end
     end
index 82005d6d9f019a39c6d6c6f61affd15141438f93..1d47ed701ed5ec6c9c31ee00be9236e69b006e17 100644 (file)
@@ -36,7 +36,17 @@ module ErdHandler
         b1.within?(b3).should == false
         b3.within?(b1).should == false
       end
         b1.within?(b3).should == false
         b3.within?(b1).should == false
       end
-    end # contains?
+    end # within?
+    
+    describe "#similarity" do
+      it "find the similarity of two boxes" do 
+        b1 = Box.new
+        b1.name = Label.new "box 1", true
+        b2 = Box.new
+        b2.name = Label.new "box 2", true
+        b1.similarity(b2).should be_within(0.005).of(0.75)
+      end
+    end
 
   end
 end
 
   end
 end
index d6d5561e4cb34274d26b0e0fa4fda870ed2b3751..6b33dd8e3dbfc92238b3c76aadcb5b5201103079 100644 (file)
@@ -141,5 +141,62 @@ module ErdHandler
         l5.connections.find {|c| c.crowsfoot == :no}.end.should be b2
       end
     end # #read
         l5.connections.find {|c| c.crowsfoot == :no}.end.should be b2
       end
     end # #read
+    
+    describe "#mmus" do
+      it "finds three MMUs in a simple ERD" do
+        erd = Erd.new
+        erd.read(File.new("spec/fixtures/two_boxes_one_link_erd.xml"))
+        mmus = erd.mmus
+        
+        mmus.should have(3).items
+        single_box_mmus = mmus.select {|m| m.vertices.length == 1}
+        single_link_mmus = mmus.select {|m| m.edges.length == 1}
+        
+        single_box_mmus.should have(2).items
+        single_box_mmus.each do |m|
+          m.should have(1).vertices
+          m.should have(0).edges
+        end
+        
+        single_link_mmus.should have(1).items
+        single_link_mmus.each do |m|
+          m.should have(2).vertices
+          m.should have(1).edges
+          m.edges.first.should have(2).connections
+          m.vertices.each do |v|
+            m.vertices.should include(v)
+          end
+        end
+      end
+      
+      it "finds many MMUs in a complex ERD" do
+        erd = Erd.new
+        erd.read(File.new("spec/fixtures/complex_erd.xml"))
+        mmus = erd.mmus
+        
+        mmus.should have(11).items
+        single_box_mmus = mmus.select {|m| m.vertices.length == 1}
+        single_link_mmus = mmus.select {|m| m.edges.length == 1}
+
+        single_box_mmus.should have(5).items
+        single_box_mmus.each do |m|
+          m.should have(1).vertices
+          m.should have(0).edges
+        end
+        single_box_mmus.map {|m| m.vertices.first.name.original}.uniq.should have(5).items
+        
+        single_link_mmus.should have(6).items
+        single_link_mmus.each do |m|
+          m.should have(2).vertices
+          m.should have(1).edges
+          m.edges.first.should have(2).connections
+          m.vertices.each do |v|
+            m.vertices.should include(v)
+          end
+        end
+        single_link_mmus.map {|m| m.edges.first.name.original}.uniq.should have(6).items
+      end
+
+    end # #mmus
   end
 end
   end
 end
index a1ea5a480513b52980798f8a02149bee368ff9c9..6f09b95e91336434dfbec22cc3b35bbdfd8ac2ae 100644 (file)
@@ -2,13 +2,34 @@ require 'spec_helper'
 
 module ErdHandler
   describe Label do
 
 module ErdHandler
   describe Label do
+    describe '#initialize' do
+      it "should give an error if not given an original string" do
+        # Label.new.should raise_error(ArgumentError)
+        expect {Label.new}.to raise_error(ArgumentError)
+      end
+      
+      it "should create a copy of the original as the processed string" do
+        test_label = "Test label"
+        l1 = Label.new test_label
+        l1.original.should == test_label
+      end
+      
+      it "should tidy the processed string if asked" do
+        test_label = "testingLabeller string, he_pontificated"
+        l1 = Label.new test_label.dup, true
+        l2 = Label.new test_label.dup
+        l2.split.downcase.stem
+        l1.processed.should == l2.processed
+      end
+    end # initialze
+    
     describe '#original' do
       it "reports the string it was initialised with" do
         test_label = "Test label"
         l1 = Label.new test_label
         l1.original.should == test_label
     describe '#original' do
       it "reports the string it was initialised with" do
         test_label = "Test label"
         l1 = Label.new test_label
         l1.original.should == test_label
-        l1 = Label.new
-        l1.original.should == ""
+        #l1 = Label.new
+        #l1.original.should == ""
       end
     end # original
 
       end
     end # original
 
@@ -147,13 +168,21 @@ module ErdHandler
       end
     end # tidy
     
       end
     end # tidy
     
+    describe "#length" do
+      it "returns the length of the processed label" do
+        l1 = Label.new "testingLabeller string, he_pontificated"
+        l1.tidy
+        l1.length.should == l1.processed.join('').length
+      end
+    end # length
+    
     describe "#levenshtein" do
       it "calculates the Levenshtein distance of the processed string" do
         l1 = Label.new "Fred"
         l1.levenshtein("Fred").should == 0
         l1.levenshtein("Free").should == 1
         l1.levenshtein("").should == 4
     describe "#levenshtein" do
       it "calculates the Levenshtein distance of the processed string" do
         l1 = Label.new "Fred"
         l1.levenshtein("Fred").should == 0
         l1.levenshtein("Free").should == 1
         l1.levenshtein("").should == 4
-        l2 = Label.new
+        l2 = Label.new ""
         l2.levenshtein("Free").should == 4
         l2.levenshtein("").should == 0
         l3 = Label.new "meilenstein"
         l2.levenshtein("Free").should == 4
         l2.levenshtein("").should == 0
         l3 = Label.new "meilenstein"