From f23b3d51ee9618aeaa1ac35153b275b91d3ef972 Mon Sep 17 00:00:00 2001 From: Neil Smith Date: Fri, 18 May 2012 14:54:05 +0100 Subject: [PATCH] Finished graph produce and similarity matching --- lib/graph.njae/graph.rb | 28 ++++--- spec/graph/graph_spec.rb | 163 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 181 insertions(+), 10 deletions(-) diff --git a/lib/graph.njae/graph.rb b/lib/graph.njae/graph.rb index 288fc74..3029ceb 100644 --- a/lib/graph.njae/graph.rb +++ b/lib/graph.njae/graph.rb @@ -1,5 +1,9 @@ require 'ostruct' +require 'logger' +$log = Logger.new(STDERR) +$log.level = Logger::WARN + # A simple graph library module GraphNjae @@ -81,19 +85,23 @@ module GraphNjae iteration = 1 residual = max_residual + 1 while residual > max_residual and iteration <= max_iterations -# puts "Starting iteration #{iteration}" + $log.debug { "Starting iteration #{iteration}" } self.vertices.each do |v| v.last_similarity = v.similarity end self.vertices.each do |v| -# puts "Processing vertex #{v.name}" - edge_groups = v.edges.group_by {|e| e.type } -# puts " Edge groups {#{edge_groups.keys.map {|t| t.to_s + ' => {' + edge_groups[t].map {|e| e.to_s}.join(', ')}.join('; ')}}" - edge_groups.each do |type, edges| -# puts " Processing group type #{type}" - n = edges.length - edges.each do |e| - e.other_end(v).similarity += v.last_similarity / n + if block_given? + v.similarity = yield v + else + $log.debug { "Processing vertex #{v.name}" } + edge_groups = v.edges.group_by {|e| e.type } + $log.debug { " Edge groups {#{edge_groups.keys.map {|t| t.to_s + ' => {' + edge_groups[t].map {|e| e.to_s}.join(', ')}.join('; ')}}" } + edge_groups.each do |type, edges| + $log.debug { " Processing group type #{type}" } + n = edges.length + edges.each do |e| + e.other_end(v).similarity += v.last_similarity / n + end end end end @@ -108,7 +116,7 @@ module GraphNjae v.similarity = v.similarity / max_similarity end residual = Math.sqrt(self.vertices.reduce(0) {|a, v| a += (v.similarity - v.last_similarity) ** 2}) -# puts "Residual = #{residual.round(3)}, sims = #{self.vertices.map {|v| v.name + " = " + v.similarity.round(2).to_s}}" + $log.debug { puts "Residual = #{residual.round(3)}, sims = #{self.vertices.map {|v| v.name + " = " + v.similarity.round(2).to_s}}" } iteration += 1 end diff --git a/spec/graph/graph_spec.rb b/spec/graph/graph_spec.rb index cd143b1..82444db 100644 --- a/spec/graph/graph_spec.rb +++ b/spec/graph/graph_spec.rb @@ -316,6 +316,169 @@ module GraphNjae v.similarity.should be_within(0.02).of(expected_similarities[v.name]) end end + + it "similarity floods a graph of three nodes, a -- b -- c, given a block that performs the default update" do + g1 = Graph.new + g2 = Graph.new + g1v1 = Vertex.new(:name => :g1v1) + g1v2 = Vertex.new(:name => :g1v2) + g1v3 = Vertex.new(:name => :g1v3) + g1.connect(g1v1, g1v2, :type => :t1) + g1.connect(g1v2, g1v3, :type => :t2) + g2v1 = Vertex.new(:name => :g2v1) + g2v2 = Vertex.new(:name => :g2v2) + g2v3 = Vertex.new(:name => :g2v3) + g2.connect(g2v1, g2v2, :type => :t1) + g2.connect(g2v2, g2v3, :type => :t2) + pg = g1.product g2 + + pg.initial_similarity + pg.similarity_flood do |v| + edge_groups = v.edges.group_by {|e| e.type } + edge_groups.each do |type, edges| + n = edges.length + edges.each do |e| + e.other_end(v).similarity += v.last_similarity / n + end + end + v.similarity + end + expected_similarities = { + "g1v1:g2v1" => 0.5, + "g1v1:g2v2" => 0.6666666666666666, + "g1v2:g2v1" => 0.6666666666666666, + "g1v2:g2v2" => 1.0, + "g1v2:g2v3" => 0.6666666666666666, + "g1v3:g2v2" => 0.6666666666666666, + "g1v3:g2v3" => 0.5} + pg.vertices.each do |v| + name = v.g1_vertex.name.to_s + ':' + v.g2_vertex.name.to_s + v.similarity.should be_within(0.001).of(expected_similarities[name]) + end + end + + it "similarity floods a graph of three nodes, a -- b -- c, given a block that performs a different update (method A)" do + g1 = Graph.new + g2 = Graph.new + g1v1 = Vertex.new(:name => :g1v1) + g1v2 = Vertex.new(:name => :g1v2) + g1v3 = Vertex.new(:name => :g1v3) + g1.connect(g1v1, g1v2, :type => :t1) + g1.connect(g1v2, g1v3, :type => :t2) + g2v1 = Vertex.new(:name => :g2v1) + g2v2 = Vertex.new(:name => :g2v2) + g2v3 = Vertex.new(:name => :g2v3) + g2.connect(g2v1, g2v2, :type => :t1) + g2.connect(g2v2, g2v3, :type => :t2) + pg = g1.product g2 + + pg.initial_similarity + pg.similarity_flood do |v| + v.similarity = v.initial_similarity + edge_groups = v.edges.group_by {|e| e.type } + edge_groups.each do |type, edges| + n = edges.length + edges.each do |e| + e.other_end(v).similarity += v.last_similarity / n + end + end + v.similarity + end + expected_similarities = { + "g1v1:g2v1" => 0.9269662921348315, + "g1v1:g2v2" => 1.0, + "g1v2:g2v1" => 0.6179775280898876, + "g1v2:g2v2" => 1.0, + "g1v2:g2v3" => 1.0, + "g1v3:g2v2" => 0.6179775280898876, + "g1v3:g2v3" => 0.6179775280898876} + pg.vertices.each do |v| + name = v.g1_vertex.name.to_s + ':' + v.g2_vertex.name.to_s + v.similarity.should be_within(0.001).of(expected_similarities[name]) + end + end + + it "similarity floods a graph of three nodes, a -- b -- c, given a block that performs a different update (method B)" do + g1 = Graph.new + g2 = Graph.new + g1v1 = Vertex.new(:name => :g1v1) + g1v2 = Vertex.new(:name => :g1v2) + g1v3 = Vertex.new(:name => :g1v3) + g1.connect(g1v1, g1v2, :type => :t1) + g1.connect(g1v2, g1v3, :type => :t2) + g2v1 = Vertex.new(:name => :g2v1) + g2v2 = Vertex.new(:name => :g2v2) + g2v3 = Vertex.new(:name => :g2v3) + g2.connect(g2v1, g2v2, :type => :t1) + g2.connect(g2v2, g2v3, :type => :t2) + pg = g1.product g2 + + pg.initial_similarity + pg.similarity_flood do |v| + v.similarity = 0.0 + edge_groups = v.edges.group_by {|e| e.type } + edge_groups.each do |type, edges| + n = edges.length + edges.each do |e| + e.other_end(v).similarity += (v.initial_similarity + v.last_similarity) / n + end + end + v.similarity + end + expected_similarities = { + "g1v1:g2v1" => 1.0, + "g1v1:g2v2" => 1.0, + "g1v2:g2v1" => 0.0, + "g1v2:g2v2" => 1.0, + "g1v2:g2v3" => 1.0, + "g1v3:g2v2" => 0.0, + "g1v3:g2v3" => 0.0} + pg.vertices.each do |v| + name = v.g1_vertex.name.to_s + ':' + v.g2_vertex.name.to_s + v.similarity.should be_within(0.001).of(expected_similarities[name]) + end + end + + it "similarity floods a graph of three nodes, a -- b -- c, given a block that performs a different update (method C)" do + g1 = Graph.new + g2 = Graph.new + g1v1 = Vertex.new(:name => :g1v1) + g1v2 = Vertex.new(:name => :g1v2) + g1v3 = Vertex.new(:name => :g1v3) + g1.connect(g1v1, g1v2, :type => :t1) + g1.connect(g1v2, g1v3, :type => :t2) + g2v1 = Vertex.new(:name => :g2v1) + g2v2 = Vertex.new(:name => :g2v2) + g2v3 = Vertex.new(:name => :g2v3) + g2.connect(g2v1, g2v2, :type => :t1) + g2.connect(g2v2, g2v3, :type => :t2) + pg = g1.product g2 + + pg.initial_similarity + pg.similarity_flood do |v| + v.similarity = v.initial_similarity + v.last_similarity + edge_groups = v.edges.group_by {|e| e.type } + edge_groups.each do |type, edges| + n = edges.length + edges.each do |e| + e.other_end(v).similarity += (v.initial_similarity + v.last_similarity) / n + end + end + v.similarity + end + expected_similarities = { + "g1v1:g2v1" => 0.8282781862745098, + "g1v1:g2v2" => 1.0, + "g1v2:g2v1" => 0.41421568627450983, + "g1v2:g2v2" => 1.0, + "g1v2:g2v3" => 1.0, + "g1v3:g2v2" => 0.41421568627450983, + "g1v3:g2v3" => 0.41421568627450983} + pg.vertices.each do |v| + name = v.g1_vertex.name.to_s + ':' + v.g2_vertex.name.to_s + v.similarity.should be_within(0.001).of(expected_similarities[name]) + end + end end # similarity_flood end -- 2.34.1