+
+ # Calculates the initial similarity of each vertex in a product graph.
+ # If passed an optional block, that block is used to find the
+ # initial similarity. If no block is given, every vertex is given
+ # an initial similarity of 1.0.
+ def initial_similarity
+ self.vertices.each do |v|
+ if block_given?
+ v.initial_similarity = yield v
+ else
+ v.initial_similarity = 1.0
+ end
+ v.similarity = v.initial_similarity
+ end
+ end
+
+ # Performs similarity flooding on a graph, as described by
+ # Sergey Melnik, Hector Garcia-Molina, and Erhard Rahm,
+ # "Similarity Flooding: A Versatile Graph Matching Algorithm
+ # and its Application to Schema Matching", Proceedings of
+ # the 18th International Conference on Data Engineering (ICDE’02)
+ #
+ # Assumes that the initial similarity has already been calculated
+ # If passed an optional block, it uses that block to update the
+ # similarity on each iteration. If no block is passed, it uses the
+ # default similarity updating method from the paper.
+ def similarity_flood(opts = {})
+ max_iterations = opts[:iterations] || 100
+ max_residual = opts[:max_residual] || 0.001
+ iteration = 1
+ residual = max_residual + 1
+ while residual > max_residual and iteration <= max_iterations
+ $log.debug { "Starting iteration #{iteration}" }
+ self.vertices.each do |v|
+ v.last_similarity = v.similarity
+ end
+ self.vertices.each do |v|
+ if block_given?
+ v.similarity = yield v
+ else
+ $log.debug { "Processing vertex #{v.name}" }
+ edge_groups = v.edges.group_by {|e| e.type }
+ $log.debug { " Edge groups {#{edge_groups.keys.map {|t| t.to_s + ' => {' + edge_groups[t].map {|e| e.to_s}.join(', ')}.join('; ')}}" }
+ edge_groups.each do |type, edges|
+ $log.debug { " Processing group type #{type}" }
+ n = edges.length
+ edges.each do |e|
+ e.other_end(v).similarity += v.last_similarity / n
+ end
+ end
+ end
+ end
+ max_similarity = vertices.map {|v| v.similarity}.max
+ self.vertices.each do |v|
+ v.similarity = v.similarity / max_similarity
+ end
+ residual = Math.sqrt(self.vertices.reduce(0) {|a, v| a += (v.similarity - v.last_similarity) ** 2})
+ $log.debug { puts "Residual = #{residual.round(3)}, sims = #{self.vertices.map {|v| v.name + " = " + v.similarity.round(2).to_s}}" }
+ iteration += 1
+ end
+
+ end
+