Initial similarity flooding working
authorNeil Smith <neil.github@njae.me.uk>
Fri, 23 Mar 2012 21:40:04 +0000 (21:40 +0000)
committerNeil Smith <neil.github@njae.me.uk>
Fri, 23 Mar 2012 21:40:04 +0000 (21:40 +0000)
Gemfile
Gemfile.lock
lib/graph.njae/graph.rb
spec/graph/graph_spec.rb

diff --git a/Gemfile b/Gemfile
index 804c2dc7fae9bd275dfee8cebaa091bb43faada0..df7f77956a3304619972aaaace75c1f412918a07 100644 (file)
--- a/Gemfile
+++ b/Gemfile
@@ -8,5 +8,5 @@ source "http://rubygems.org"
 group :development do
   gem "rspec", "~> 2.8.0"
   gem "bundler", "~> 1.0.0"
-  gem "jeweler", "~> 1.6.2"
+  gem "jeweler", "~> 1.8.0"
 end
index 0679be6f5907889a0014c5be8f6089531d8697bc..65179b581c8739a7b66545fc841406fe83edffe0 100644 (file)
@@ -26,6 +26,5 @@ PLATFORMS
 
 DEPENDENCIES
   bundler (~> 1.0.0)
-  jeweler (~> 1.6.2)
-  rdoc
+  jeweler (~> 1.8.0)
   rspec (~> 2.8.0)
index 77b96da5fa7ea86d7b174cb7ddcedd1ebd24416e..011dd8625720dd601cd4868257ae7224273fb133 100644 (file)
@@ -58,8 +58,43 @@ module GraphNjae
       product_graph
     end
     
+    
+    def initial_similarity
+      self.vertices.each do |v|
+        if block_given?
+          v.initial_similarity = yield v
+        else
+          v.initial_similarity = 1.0
+        end
+        v.similarity = v.initial_similarity
+      end
+    end
+    
     # Performs similarity flooding on a graph
-    def similarity_flood(&normalization)
+    # Assumes that the initial similarity has already been calculated
+    def similarity_flood(opts = {})
+      max_iterations = opts[:iterations] || 100
+      max_residual = opts[:max_residual] || 0.001
+      iteration = 1
+      residual = max_residual + 1
+      while residual > max_residual and iteration <= max_iterations
+        self.vertices.each do |v|
+          v.last_similarity = v.similarity
+        end
+        self.vertices.each do |v|
+           n = v.neighbours.length
+           v.neighbours.each do |neighbour|
+             neighbour.similarity += v.last_similarity / n
+           end
+        end
+        max_similarity = vertices.max {|v, w| v.similarity <=> w.similarity}.similarity
+        self.vertices.each do |v|
+          v.similarity = v.similarity / max_similarity
+        end
+        residual = Math.sqrt(self.vertices.reduce(0) {|a, v| a += (v.similarity - v.last_similarity) ** 2})
+        iteration += 1
+      end
+      
     end
     
   end # class
index be6a7b5a332bc03dbde25cfebc751f83e1f8dcd1..8339946af89830e51f4d7d5dac5dab0ec29d39c0 100644 (file)
@@ -133,8 +133,58 @@ module GraphNjae
 
     end
     
+    describe "initial_similarity" do
+      before(:each) do
+        g1 = Graph.new
+        g2 = Graph.new
+        g1v1 = Vertex.new(:name => :g1v1)
+        g1v2 = Vertex.new(:name => :g1v2)
+        g1.connect(g1v1, g1v2)
+        g2v1 = Vertex.new(:name => :g2v1)
+        g2v2 = Vertex.new(:name => :g2v2)
+        g2.connect(g2v1, g2v2)
+        @pg = g1.product g2
+      end
+      
+      def simple_name_similarity(n1, n2)
+        1 - n1.to_s.codepoints.to_a.delete_if {|c| n2.to_s.codepoints.to_a.include? c}.length / n1.to_s.length.to_f
+      end
+
+      it "should give all nodes an initial similarity of 1 if no block is given" do
+        @pg.initial_similarity
+        @pg.vertices.each do |v|
+          v.initial_similarity.should be_within(0.001).of(1.0)
+          v.similarity.should be_within(0.001).of(1.0)
+        end
+      end
+      
+      it "should give all nodes the similarity as defined by the given block" do
+        @pg.initial_similarity {|v| simple_name_similarity v.g1_vertex.name, v.g2_vertex.name}
+        @pg.vertices.each do |v|
+          v.initial_similarity.should be_within(0.001).of( simple_name_similarity v.g1_vertex.name, v.g2_vertex.name )
+          v.similarity.should be_within(0.001).of( simple_name_similarity v.g1_vertex.name, v.g2_vertex.name )
+        end
+
+      end
+    end
+    
     describe "similarity flood" do
         it "similarity floods a graph of two nodes" do
+          g1 = Graph.new
+          g2 = Graph.new
+          g1v1 = Vertex.new(:name => :g1v1)
+          g1v2 = Vertex.new(:name => :g1v2)
+          g1.connect(g1v1, g1v2)
+          g2v1 = Vertex.new(:name => :g2v1)
+          g2v2 = Vertex.new(:name => :g2v2)
+          g2.connect(g2v1, g2v2)
+          pg = g1.product g2
+
+          pg.initial_similarity
+          pg.similarity_flood
+          pg.vertices.each do |v|
+            v.similarity.should be_within(0.001).of(1.0)
+          end
         end
         
         it "similarity floods a graph of three nodes, a -- b -- c" do