X-Git-Url: https://git.njae.me.uk/?a=blobdiff_plain;f=lib%2Fgraph.njae%2Fgraph.rb;h=bdf75b231e0089ec0acbae0dfc9c2f5fb6d2ce8b;hb=53a70e2e8b36fc7b7f0b7edb2b1448969f8df0c8;hp=288fc74f569481388a2f1f56ea272402b6f600b6;hpb=95b8572d01c0b9c3aa8fc5a1e85a045c33d4ecf1;p=graph.njae.git diff --git a/lib/graph.njae/graph.rb b/lib/graph.njae/graph.rb index 288fc74..bdf75b2 100644 --- a/lib/graph.njae/graph.rb +++ b/lib/graph.njae/graph.rb @@ -1,5 +1,9 @@ require 'ostruct' +require 'logger' +$log = Logger.new(STDERR) +$log.level = Logger::WARN + # A simple graph library module GraphNjae @@ -62,6 +66,10 @@ module GraphNjae end + # Calculates the initial similarity of each vertex in a product graph. + # If passed an optional block, that block is used to find the + # initial similarity. If no block is given, every vertex is given + # an initial similarity of 1.0. def initial_similarity self.vertices.each do |v| if block_given? @@ -73,42 +81,48 @@ module GraphNjae end end - # Performs similarity flooding on a graph + # Performs similarity flooding on a graph, as described by + # Sergey Melnik, Hector Garcia-Molina, and Erhard Rahm, + # "Similarity Flooding: A Versatile Graph Matching Algorithm + # and its Application to Schema Matching", Proceedings of + # the 18th International Conference on Data Engineering (ICDE’02) + # # Assumes that the initial similarity has already been calculated + # If passed an optional block, it uses that block to update the + # similarity on each iteration. If no block is passed, it uses the + # default similarity updating method from the paper. def similarity_flood(opts = {}) max_iterations = opts[:iterations] || 100 max_residual = opts[:max_residual] || 0.001 iteration = 1 residual = max_residual + 1 while residual > max_residual and iteration <= max_iterations -# puts "Starting iteration #{iteration}" + $log.debug { "Starting iteration #{iteration}" } self.vertices.each do |v| v.last_similarity = v.similarity end self.vertices.each do |v| -# puts "Processing vertex #{v.name}" - edge_groups = v.edges.group_by {|e| e.type } -# puts " Edge groups {#{edge_groups.keys.map {|t| t.to_s + ' => {' + edge_groups[t].map {|e| e.to_s}.join(', ')}.join('; ')}}" - edge_groups.each do |type, edges| -# puts " Processing group type #{type}" - n = edges.length - edges.each do |e| - e.other_end(v).similarity += v.last_similarity / n + if block_given? + v.similarity = yield v + else + $log.debug { "Processing vertex #{v.name}" } + edge_groups = v.edges.group_by {|e| e.type } + $log.debug { " Edge groups {#{edge_groups.keys.map {|t| t.to_s + ' => {' + edge_groups[t].map {|e| e.to_s}.join(', ')}.join('; ')}}" } + edge_groups.each do |type, edges| + $log.debug { " Processing group type #{type}" } + n = edges.length + edges.each do |e| + e.other_end(v).similarity += v.last_similarity / n + end end end end -# self.vertices.each do |v| -# n = v.neighbours.length -# v.neighbours.each do |neighbour| -# neighbour.similarity += v.last_similarity / n -# end -# end max_similarity = vertices.map {|v| v.similarity}.max self.vertices.each do |v| v.similarity = v.similarity / max_similarity end residual = Math.sqrt(self.vertices.reduce(0) {|a, v| a += (v.similarity - v.last_similarity) ** 2}) -# puts "Residual = #{residual.round(3)}, sims = #{self.vertices.map {|v| v.name + " = " + v.similarity.round(2).to_s}}" + $log.debug { puts "Residual = #{residual.round(3)}, sims = #{self.vertices.map {|v| v.name + " = " + v.similarity.round(2).to_s}}" } iteration += 1 end