From 32518a2c5089dbd3b9f74aad780ded127e3040ea Mon Sep 17 00:00:00 2001 From: Neil Smith Date: Fri, 23 Mar 2012 21:40:04 +0000 Subject: [PATCH] Initial similarity flooding working --- Gemfile | 2 +- Gemfile.lock | 3 +-- lib/graph.njae/graph.rb | 37 ++++++++++++++++++++++++++++- spec/graph/graph_spec.rb | 50 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 88 insertions(+), 4 deletions(-) diff --git a/Gemfile b/Gemfile index 804c2dc..df7f779 100644 --- a/Gemfile +++ b/Gemfile @@ -8,5 +8,5 @@ source "http://rubygems.org" group :development do gem "rspec", "~> 2.8.0" gem "bundler", "~> 1.0.0" - gem "jeweler", "~> 1.6.2" + gem "jeweler", "~> 1.8.0" end diff --git a/Gemfile.lock b/Gemfile.lock index 0679be6..65179b5 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -26,6 +26,5 @@ PLATFORMS DEPENDENCIES bundler (~> 1.0.0) - jeweler (~> 1.6.2) - rdoc + jeweler (~> 1.8.0) rspec (~> 2.8.0) diff --git a/lib/graph.njae/graph.rb b/lib/graph.njae/graph.rb index 77b96da..011dd86 100644 --- a/lib/graph.njae/graph.rb +++ b/lib/graph.njae/graph.rb @@ -58,8 +58,43 @@ module GraphNjae product_graph end + + def initial_similarity + self.vertices.each do |v| + if block_given? + v.initial_similarity = yield v + else + v.initial_similarity = 1.0 + end + v.similarity = v.initial_similarity + end + end + # Performs similarity flooding on a graph - def similarity_flood(&normalization) + # Assumes that the initial similarity has already been calculated + def similarity_flood(opts = {}) + max_iterations = opts[:iterations] || 100 + max_residual = opts[:max_residual] || 0.001 + iteration = 1 + residual = max_residual + 1 + while residual > max_residual and iteration <= max_iterations + self.vertices.each do |v| + v.last_similarity = v.similarity + end + self.vertices.each do |v| + n = v.neighbours.length + v.neighbours.each do |neighbour| + neighbour.similarity += v.last_similarity / n + end + end + max_similarity = vertices.max {|v, w| v.similarity <=> w.similarity}.similarity + self.vertices.each do |v| + v.similarity = v.similarity / max_similarity + end + residual = Math.sqrt(self.vertices.reduce(0) {|a, v| a += (v.similarity - v.last_similarity) ** 2}) + iteration += 1 + end + end end # class diff --git a/spec/graph/graph_spec.rb b/spec/graph/graph_spec.rb index be6a7b5..8339946 100644 --- a/spec/graph/graph_spec.rb +++ b/spec/graph/graph_spec.rb @@ -133,8 +133,58 @@ module GraphNjae end + describe "initial_similarity" do + before(:each) do + g1 = Graph.new + g2 = Graph.new + g1v1 = Vertex.new(:name => :g1v1) + g1v2 = Vertex.new(:name => :g1v2) + g1.connect(g1v1, g1v2) + g2v1 = Vertex.new(:name => :g2v1) + g2v2 = Vertex.new(:name => :g2v2) + g2.connect(g2v1, g2v2) + @pg = g1.product g2 + end + + def simple_name_similarity(n1, n2) + 1 - n1.to_s.codepoints.to_a.delete_if {|c| n2.to_s.codepoints.to_a.include? c}.length / n1.to_s.length.to_f + end + + it "should give all nodes an initial similarity of 1 if no block is given" do + @pg.initial_similarity + @pg.vertices.each do |v| + v.initial_similarity.should be_within(0.001).of(1.0) + v.similarity.should be_within(0.001).of(1.0) + end + end + + it "should give all nodes the similarity as defined by the given block" do + @pg.initial_similarity {|v| simple_name_similarity v.g1_vertex.name, v.g2_vertex.name} + @pg.vertices.each do |v| + v.initial_similarity.should be_within(0.001).of( simple_name_similarity v.g1_vertex.name, v.g2_vertex.name ) + v.similarity.should be_within(0.001).of( simple_name_similarity v.g1_vertex.name, v.g2_vertex.name ) + end + + end + end + describe "similarity flood" do it "similarity floods a graph of two nodes" do + g1 = Graph.new + g2 = Graph.new + g1v1 = Vertex.new(:name => :g1v1) + g1v2 = Vertex.new(:name => :g1v2) + g1.connect(g1v1, g1v2) + g2v1 = Vertex.new(:name => :g2v1) + g2v2 = Vertex.new(:name => :g2v2) + g2.connect(g2v1, g2v2) + pg = g1.product g2 + + pg.initial_similarity + pg.similarity_flood + pg.vertices.each do |v| + v.similarity.should be_within(0.001).of(1.0) + end end it "similarity floods a graph of three nodes, a -- b -- c" do -- 2.34.1