Finished graph produce and similarity matching
authorNeil Smith <neil.github@njae.me.uk>
Fri, 18 May 2012 13:54:05 +0000 (14:54 +0100)
committerNeil Smith <neil.github@njae.me.uk>
Fri, 18 May 2012 13:54:05 +0000 (14:54 +0100)
lib/graph.njae/graph.rb
spec/graph/graph_spec.rb

index 288fc74f569481388a2f1f56ea272402b6f600b6..3029cebd6c403d69d97aa9bf465d50c176d4af56 100644 (file)
@@ -1,5 +1,9 @@
 require 'ostruct'
 
+require 'logger'
+$log = Logger.new(STDERR)
+$log.level = Logger::WARN
+
 # A simple graph library
 
 module GraphNjae
@@ -81,19 +85,23 @@ module GraphNjae
       iteration = 1
       residual = max_residual + 1
       while residual > max_residual and iteration <= max_iterations
-# puts "Starting iteration #{iteration}"        
+        $log.debug { "Starting iteration #{iteration}" }
         self.vertices.each do |v|
           v.last_similarity = v.similarity
         end
         self.vertices.each do |v|
-# puts "Processing vertex #{v.name}"          
-          edge_groups = v.edges.group_by {|e| e.type }
-# puts "  Edge groups {#{edge_groups.keys.map {|t| t.to_s + ' => {' + edge_groups[t].map {|e| e.to_s}.join(', ')}.join('; ')}}"          
-          edge_groups.each do |type, edges|
-# puts "    Processing group type #{type}"
-            n = edges.length
-            edges.each do |e|
-              e.other_end(v).similarity += v.last_similarity / n
+          if block_given?
+            v.similarity = yield v
+          else
+            $log.debug { "Processing vertex #{v.name}" }
+            edge_groups = v.edges.group_by {|e| e.type }
+            $log.debug { "  Edge groups {#{edge_groups.keys.map {|t| t.to_s + ' => {' + edge_groups[t].map {|e| e.to_s}.join(', ')}.join('; ')}}" }
+            edge_groups.each do |type, edges|
+              $log.debug { "    Processing group type #{type}" }
+              n = edges.length
+              edges.each do |e|
+                e.other_end(v).similarity += v.last_similarity / n
+              end
             end
           end
         end
@@ -108,7 +116,7 @@ module GraphNjae
           v.similarity = v.similarity / max_similarity
         end
         residual = Math.sqrt(self.vertices.reduce(0) {|a, v| a += (v.similarity - v.last_similarity) ** 2})
-# puts "Residual = #{residual.round(3)}, sims = #{self.vertices.map {|v| v.name + " = " + v.similarity.round(2).to_s}}"
+        $log.debug { puts "Residual = #{residual.round(3)}, sims = #{self.vertices.map {|v| v.name + " = " + v.similarity.round(2).to_s}}" }
         iteration += 1
       end
       
index cd143b1f11da53d3e6407af0b648f08ab07481e4..82444db41a08a4e017dc8c4442851ccb6274f628 100644 (file)
@@ -316,6 +316,169 @@ module GraphNjae
           v.similarity.should be_within(0.02).of(expected_similarities[v.name])
         end
       end
+      
+      it "similarity floods a graph of three nodes, a -- b -- c, given a block that performs the default update" do
+        g1 = Graph.new
+        g2 = Graph.new
+        g1v1 = Vertex.new(:name => :g1v1)
+        g1v2 = Vertex.new(:name => :g1v2)
+        g1v3 = Vertex.new(:name => :g1v3)
+        g1.connect(g1v1, g1v2, :type => :t1)
+        g1.connect(g1v2, g1v3, :type => :t2)
+        g2v1 = Vertex.new(:name => :g2v1)
+        g2v2 = Vertex.new(:name => :g2v2)
+        g2v3 = Vertex.new(:name => :g2v3)
+        g2.connect(g2v1, g2v2, :type => :t1)
+        g2.connect(g2v2, g2v3, :type => :t2)
+        pg = g1.product g2
+        
+        pg.initial_similarity
+        pg.similarity_flood do |v|
+          edge_groups = v.edges.group_by {|e| e.type }
+          edge_groups.each do |type, edges|
+            n = edges.length
+            edges.each do |e|
+              e.other_end(v).similarity += v.last_similarity / n
+            end
+          end
+          v.similarity
+        end
+        expected_similarities = {
+          "g1v1:g2v1" => 0.5,
+          "g1v1:g2v2" => 0.6666666666666666,
+          "g1v2:g2v1" => 0.6666666666666666,
+          "g1v2:g2v2" => 1.0,
+          "g1v2:g2v3" => 0.6666666666666666,
+          "g1v3:g2v2" => 0.6666666666666666,
+          "g1v3:g2v3" => 0.5}
+        pg.vertices.each do |v|
+          name = v.g1_vertex.name.to_s + ':' + v.g2_vertex.name.to_s
+          v.similarity.should be_within(0.001).of(expected_similarities[name])
+        end
+      end
+
+      it "similarity floods a graph of three nodes, a -- b -- c, given a block that performs a different update (method A)" do
+        g1 = Graph.new
+        g2 = Graph.new
+        g1v1 = Vertex.new(:name => :g1v1)
+        g1v2 = Vertex.new(:name => :g1v2)
+        g1v3 = Vertex.new(:name => :g1v3)
+        g1.connect(g1v1, g1v2, :type => :t1)
+        g1.connect(g1v2, g1v3, :type => :t2)
+        g2v1 = Vertex.new(:name => :g2v1)
+        g2v2 = Vertex.new(:name => :g2v2)
+        g2v3 = Vertex.new(:name => :g2v3)
+        g2.connect(g2v1, g2v2, :type => :t1)
+        g2.connect(g2v2, g2v3, :type => :t2)
+        pg = g1.product g2
+        
+        pg.initial_similarity
+        pg.similarity_flood do |v|
+          v.similarity = v.initial_similarity
+          edge_groups = v.edges.group_by {|e| e.type }
+          edge_groups.each do |type, edges|
+            n = edges.length
+            edges.each do |e|
+              e.other_end(v).similarity += v.last_similarity / n
+            end
+          end
+          v.similarity
+        end
+        expected_similarities = {
+          "g1v1:g2v1" => 0.9269662921348315,
+          "g1v1:g2v2" => 1.0,
+          "g1v2:g2v1" => 0.6179775280898876,
+          "g1v2:g2v2" => 1.0,
+          "g1v2:g2v3" => 1.0,
+          "g1v3:g2v2" => 0.6179775280898876,
+          "g1v3:g2v3" => 0.6179775280898876}
+        pg.vertices.each do |v|
+          name = v.g1_vertex.name.to_s + ':' + v.g2_vertex.name.to_s
+          v.similarity.should be_within(0.001).of(expected_similarities[name])
+        end
+      end
+
+      it "similarity floods a graph of three nodes, a -- b -- c, given a block that performs a different update (method B)" do
+        g1 = Graph.new
+        g2 = Graph.new
+        g1v1 = Vertex.new(:name => :g1v1)
+        g1v2 = Vertex.new(:name => :g1v2)
+        g1v3 = Vertex.new(:name => :g1v3)
+        g1.connect(g1v1, g1v2, :type => :t1)
+        g1.connect(g1v2, g1v3, :type => :t2)
+        g2v1 = Vertex.new(:name => :g2v1)
+        g2v2 = Vertex.new(:name => :g2v2)
+        g2v3 = Vertex.new(:name => :g2v3)
+        g2.connect(g2v1, g2v2, :type => :t1)
+        g2.connect(g2v2, g2v3, :type => :t2)
+        pg = g1.product g2
+        
+        pg.initial_similarity
+        pg.similarity_flood do |v|
+          v.similarity = 0.0
+          edge_groups = v.edges.group_by {|e| e.type }
+          edge_groups.each do |type, edges|
+            n = edges.length
+            edges.each do |e|
+              e.other_end(v).similarity += (v.initial_similarity + v.last_similarity) / n
+            end
+          end
+          v.similarity
+        end
+        expected_similarities = {
+          "g1v1:g2v1" => 1.0,
+          "g1v1:g2v2" => 1.0,
+          "g1v2:g2v1" => 0.0,
+          "g1v2:g2v2" => 1.0,
+          "g1v2:g2v3" => 1.0,
+          "g1v3:g2v2" => 0.0,
+          "g1v3:g2v3" => 0.0}
+        pg.vertices.each do |v|
+          name = v.g1_vertex.name.to_s + ':' + v.g2_vertex.name.to_s
+          v.similarity.should be_within(0.001).of(expected_similarities[name])
+        end
+      end
+
+      it "similarity floods a graph of three nodes, a -- b -- c, given a block that performs a different update (method C)" do
+        g1 = Graph.new
+        g2 = Graph.new
+        g1v1 = Vertex.new(:name => :g1v1)
+        g1v2 = Vertex.new(:name => :g1v2)
+        g1v3 = Vertex.new(:name => :g1v3)
+        g1.connect(g1v1, g1v2, :type => :t1)
+        g1.connect(g1v2, g1v3, :type => :t2)
+        g2v1 = Vertex.new(:name => :g2v1)
+        g2v2 = Vertex.new(:name => :g2v2)
+        g2v3 = Vertex.new(:name => :g2v3)
+        g2.connect(g2v1, g2v2, :type => :t1)
+        g2.connect(g2v2, g2v3, :type => :t2)
+        pg = g1.product g2
+        
+        pg.initial_similarity
+        pg.similarity_flood do |v|
+          v.similarity = v.initial_similarity + v.last_similarity
+          edge_groups = v.edges.group_by {|e| e.type }
+          edge_groups.each do |type, edges|
+            n = edges.length
+            edges.each do |e|
+              e.other_end(v).similarity += (v.initial_similarity + v.last_similarity) / n
+            end
+          end
+          v.similarity
+        end
+        expected_similarities = {
+          "g1v1:g2v1" => 0.8282781862745098,
+          "g1v1:g2v2" => 1.0,
+          "g1v2:g2v1" => 0.41421568627450983,
+          "g1v2:g2v2" => 1.0,
+          "g1v2:g2v3" => 1.0,
+          "g1v3:g2v2" => 0.41421568627450983,
+          "g1v3:g2v3" => 0.41421568627450983}
+        pg.vertices.each do |v|
+          name = v.g1_vertex.name.to_s + ':' + v.g2_vertex.name.to_s
+          v.similarity.should be_within(0.001).of(expected_similarities[name])
+        end
+      end
     end # similarity_flood
     
   end