From f23b3d51ee9618aeaa1ac35153b275b91d3ef972 Mon Sep 17 00:00:00 2001
From: Neil Smith <neil.github@njae.me.uk>
Date: Fri, 18 May 2012 14:54:05 +0100
Subject: [PATCH] Finished graph produce and similarity matching

---
 lib/graph.njae/graph.rb  |  28 ++++---
 spec/graph/graph_spec.rb | 163 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 181 insertions(+), 10 deletions(-)

diff --git a/lib/graph.njae/graph.rb b/lib/graph.njae/graph.rb
index 288fc74..3029ceb 100644
--- a/lib/graph.njae/graph.rb
+++ b/lib/graph.njae/graph.rb
@@ -1,5 +1,9 @@
 require 'ostruct'
 
+require 'logger'
+$log = Logger.new(STDERR)
+$log.level = Logger::WARN
+
 # A simple graph library
 
 module GraphNjae
@@ -81,19 +85,23 @@ module GraphNjae
       iteration = 1
       residual = max_residual + 1
       while residual > max_residual and iteration <= max_iterations
-# puts "Starting iteration #{iteration}"        
+        $log.debug { "Starting iteration #{iteration}" }
         self.vertices.each do |v|
           v.last_similarity = v.similarity
         end
         self.vertices.each do |v|
-# puts "Processing vertex #{v.name}"          
-          edge_groups = v.edges.group_by {|e| e.type }
-# puts "  Edge groups {#{edge_groups.keys.map {|t| t.to_s + ' => {' + edge_groups[t].map {|e| e.to_s}.join(', ')}.join('; ')}}"          
-          edge_groups.each do |type, edges|
-# puts "    Processing group type #{type}"
-            n = edges.length
-            edges.each do |e|
-              e.other_end(v).similarity += v.last_similarity / n
+          if block_given?
+            v.similarity = yield v
+          else
+            $log.debug { "Processing vertex #{v.name}" }
+            edge_groups = v.edges.group_by {|e| e.type }
+            $log.debug { "  Edge groups {#{edge_groups.keys.map {|t| t.to_s + ' => {' + edge_groups[t].map {|e| e.to_s}.join(', ')}.join('; ')}}" }
+            edge_groups.each do |type, edges|
+              $log.debug { "    Processing group type #{type}" }
+              n = edges.length
+              edges.each do |e|
+                e.other_end(v).similarity += v.last_similarity / n
+              end
             end
           end
         end
@@ -108,7 +116,7 @@ module GraphNjae
           v.similarity = v.similarity / max_similarity
         end
         residual = Math.sqrt(self.vertices.reduce(0) {|a, v| a += (v.similarity - v.last_similarity) ** 2})
-# puts "Residual = #{residual.round(3)}, sims = #{self.vertices.map {|v| v.name + " = " + v.similarity.round(2).to_s}}"
+        $log.debug { puts "Residual = #{residual.round(3)}, sims = #{self.vertices.map {|v| v.name + " = " + v.similarity.round(2).to_s}}" }
         iteration += 1
       end
       
diff --git a/spec/graph/graph_spec.rb b/spec/graph/graph_spec.rb
index cd143b1..82444db 100644
--- a/spec/graph/graph_spec.rb
+++ b/spec/graph/graph_spec.rb
@@ -316,6 +316,169 @@ module GraphNjae
           v.similarity.should be_within(0.02).of(expected_similarities[v.name])
         end
       end
+      
+      it "similarity floods a graph of three nodes, a -- b -- c, given a block that performs the default update" do
+        g1 = Graph.new
+        g2 = Graph.new
+        g1v1 = Vertex.new(:name => :g1v1)
+        g1v2 = Vertex.new(:name => :g1v2)
+        g1v3 = Vertex.new(:name => :g1v3)
+        g1.connect(g1v1, g1v2, :type => :t1)
+        g1.connect(g1v2, g1v3, :type => :t2)
+        g2v1 = Vertex.new(:name => :g2v1)
+        g2v2 = Vertex.new(:name => :g2v2)
+        g2v3 = Vertex.new(:name => :g2v3)
+        g2.connect(g2v1, g2v2, :type => :t1)
+        g2.connect(g2v2, g2v3, :type => :t2)
+        pg = g1.product g2
+        
+        pg.initial_similarity
+        pg.similarity_flood do |v|
+          edge_groups = v.edges.group_by {|e| e.type }
+          edge_groups.each do |type, edges|
+            n = edges.length
+            edges.each do |e|
+              e.other_end(v).similarity += v.last_similarity / n
+            end
+          end
+          v.similarity
+        end
+        expected_similarities = {
+          "g1v1:g2v1" => 0.5,
+          "g1v1:g2v2" => 0.6666666666666666,
+          "g1v2:g2v1" => 0.6666666666666666,
+          "g1v2:g2v2" => 1.0,
+          "g1v2:g2v3" => 0.6666666666666666,
+          "g1v3:g2v2" => 0.6666666666666666,
+          "g1v3:g2v3" => 0.5}
+        pg.vertices.each do |v|
+          name = v.g1_vertex.name.to_s + ':' + v.g2_vertex.name.to_s
+          v.similarity.should be_within(0.001).of(expected_similarities[name])
+        end
+      end
+
+      it "similarity floods a graph of three nodes, a -- b -- c, given a block that performs a different update (method A)" do
+        g1 = Graph.new
+        g2 = Graph.new
+        g1v1 = Vertex.new(:name => :g1v1)
+        g1v2 = Vertex.new(:name => :g1v2)
+        g1v3 = Vertex.new(:name => :g1v3)
+        g1.connect(g1v1, g1v2, :type => :t1)
+        g1.connect(g1v2, g1v3, :type => :t2)
+        g2v1 = Vertex.new(:name => :g2v1)
+        g2v2 = Vertex.new(:name => :g2v2)
+        g2v3 = Vertex.new(:name => :g2v3)
+        g2.connect(g2v1, g2v2, :type => :t1)
+        g2.connect(g2v2, g2v3, :type => :t2)
+        pg = g1.product g2
+        
+        pg.initial_similarity
+        pg.similarity_flood do |v|
+          v.similarity = v.initial_similarity
+          edge_groups = v.edges.group_by {|e| e.type }
+          edge_groups.each do |type, edges|
+            n = edges.length
+            edges.each do |e|
+              e.other_end(v).similarity += v.last_similarity / n
+            end
+          end
+          v.similarity
+        end
+        expected_similarities = {
+          "g1v1:g2v1" => 0.9269662921348315,
+          "g1v1:g2v2" => 1.0,
+          "g1v2:g2v1" => 0.6179775280898876,
+          "g1v2:g2v2" => 1.0,
+          "g1v2:g2v3" => 1.0,
+          "g1v3:g2v2" => 0.6179775280898876,
+          "g1v3:g2v3" => 0.6179775280898876}
+        pg.vertices.each do |v|
+          name = v.g1_vertex.name.to_s + ':' + v.g2_vertex.name.to_s
+          v.similarity.should be_within(0.001).of(expected_similarities[name])
+        end
+      end
+
+      it "similarity floods a graph of three nodes, a -- b -- c, given a block that performs a different update (method B)" do
+        g1 = Graph.new
+        g2 = Graph.new
+        g1v1 = Vertex.new(:name => :g1v1)
+        g1v2 = Vertex.new(:name => :g1v2)
+        g1v3 = Vertex.new(:name => :g1v3)
+        g1.connect(g1v1, g1v2, :type => :t1)
+        g1.connect(g1v2, g1v3, :type => :t2)
+        g2v1 = Vertex.new(:name => :g2v1)
+        g2v2 = Vertex.new(:name => :g2v2)
+        g2v3 = Vertex.new(:name => :g2v3)
+        g2.connect(g2v1, g2v2, :type => :t1)
+        g2.connect(g2v2, g2v3, :type => :t2)
+        pg = g1.product g2
+        
+        pg.initial_similarity
+        pg.similarity_flood do |v|
+          v.similarity = 0.0
+          edge_groups = v.edges.group_by {|e| e.type }
+          edge_groups.each do |type, edges|
+            n = edges.length
+            edges.each do |e|
+              e.other_end(v).similarity += (v.initial_similarity + v.last_similarity) / n
+            end
+          end
+          v.similarity
+        end
+        expected_similarities = {
+          "g1v1:g2v1" => 1.0,
+          "g1v1:g2v2" => 1.0,
+          "g1v2:g2v1" => 0.0,
+          "g1v2:g2v2" => 1.0,
+          "g1v2:g2v3" => 1.0,
+          "g1v3:g2v2" => 0.0,
+          "g1v3:g2v3" => 0.0}
+        pg.vertices.each do |v|
+          name = v.g1_vertex.name.to_s + ':' + v.g2_vertex.name.to_s
+          v.similarity.should be_within(0.001).of(expected_similarities[name])
+        end
+      end
+
+      it "similarity floods a graph of three nodes, a -- b -- c, given a block that performs a different update (method C)" do
+        g1 = Graph.new
+        g2 = Graph.new
+        g1v1 = Vertex.new(:name => :g1v1)
+        g1v2 = Vertex.new(:name => :g1v2)
+        g1v3 = Vertex.new(:name => :g1v3)
+        g1.connect(g1v1, g1v2, :type => :t1)
+        g1.connect(g1v2, g1v3, :type => :t2)
+        g2v1 = Vertex.new(:name => :g2v1)
+        g2v2 = Vertex.new(:name => :g2v2)
+        g2v3 = Vertex.new(:name => :g2v3)
+        g2.connect(g2v1, g2v2, :type => :t1)
+        g2.connect(g2v2, g2v3, :type => :t2)
+        pg = g1.product g2
+        
+        pg.initial_similarity
+        pg.similarity_flood do |v|
+          v.similarity = v.initial_similarity + v.last_similarity
+          edge_groups = v.edges.group_by {|e| e.type }
+          edge_groups.each do |type, edges|
+            n = edges.length
+            edges.each do |e|
+              e.other_end(v).similarity += (v.initial_similarity + v.last_similarity) / n
+            end
+          end
+          v.similarity
+        end
+        expected_similarities = {
+          "g1v1:g2v1" => 0.8282781862745098,
+          "g1v1:g2v2" => 1.0,
+          "g1v2:g2v1" => 0.41421568627450983,
+          "g1v2:g2v2" => 1.0,
+          "g1v2:g2v3" => 1.0,
+          "g1v3:g2v2" => 0.41421568627450983,
+          "g1v3:g2v3" => 0.41421568627450983}
+        pg.vertices.each do |v|
+          name = v.g1_vertex.name.to_s + ':' + v.g2_vertex.name.to_s
+          v.similarity.should be_within(0.001).of(expected_similarities[name])
+        end
+      end
     end # similarity_flood
     
   end
-- 
2.43.0