-require 'ostruct'
+require 'logger'
+$log = Logger.new(STDERR)
+$log.level = Logger::WARN
# A simple graph library
edge << vertex1 << vertex2
end
+ def to_dot(opts = {})
+ vertex_args = opts[:vertex_args] || {}
+ vertex_block = opts[:vertex_block] || nil
+ edge_args = opts[:edge_args] || {}
+ edge_block = opts[:edge_block] || nil
+ dot = "graph {\n"
+ self.vertices.each do |v|
+ if vertex_block.nil?
+ dot << v.to_dot(vertex_args)
+ else
+ dot << v.to_dot(&vertex_block)
+ end
+ dot << "\n"
+ end
+ self.edges.each do |e|
+ if edge_block.nil?
+ dot << e.to_dot(edge_args)
+ else
+ dot << e.to_dot(&edge_block)
+ end
+ dot << "\n"
+ end
+ dot << '}'
+ end
+
# Form a product graph of this graph and the other.
# Return the product graph.
def product(other)
end
+ # Calculates the initial similarity of each vertex in a product graph.
+ # If passed an optional block, that block is used to find the
+ # initial similarity. If no block is given, every vertex is given
+ # an initial similarity of 1.0.
def initial_similarity
self.vertices.each do |v|
if block_given?
end
end
- # Performs similarity flooding on a graph
+ # Performs similarity flooding on a graph, as described by
+ # Sergey Melnik, Hector Garcia-Molina, and Erhard Rahm,
+ # "Similarity Flooding: A Versatile Graph Matching Algorithm
+ # and its Application to Schema Matching", Proceedings of
+ # the 18th International Conference on Data Engineering (ICDE’02)
+ #
# Assumes that the initial similarity has already been calculated
+ # If passed an optional block, it uses that block to update the
+ # similarity on each iteration. If no block is passed, it uses the
+ # default similarity updating method from the paper.
def similarity_flood(opts = {})
max_iterations = opts[:iterations] || 100
max_residual = opts[:max_residual] || 0.001
iteration = 1
residual = max_residual + 1
while residual > max_residual and iteration <= max_iterations
-# puts "Starting iteration #{iteration}"
+ $log.debug { "Starting iteration #{iteration}" }
self.vertices.each do |v|
v.last_similarity = v.similarity
end
self.vertices.each do |v|
-# puts "Processing vertex #{v.name}"
- edge_groups = v.edges.group_by {|e| e.type }
-# puts " Edge groups {#{edge_groups.keys.map {|t| t.to_s + ' => {' + edge_groups[t].map {|e| e.to_s}.join(', ')}.join('; ')}}"
- edge_groups.each do |type, edges|
-# puts " Processing group type #{type}"
- n = edges.length
- edges.each do |e|
- e.other_end(v).similarity += v.last_similarity / n
+ if block_given?
+ v.similarity = yield v
+ else
+ $log.debug { "Processing vertex #{v.name}" }
+ edge_groups = v.edges.group_by {|e| e.type }
+ $log.debug { " Edge groups {#{edge_groups.keys.map {|t| t.to_s + ' => {' + edge_groups[t].map {|e| e.to_s}.join(', ')}.join('; ')}}" }
+ edge_groups.each do |type, edges|
+ $log.debug { " Processing group type #{type}" }
+ n = edges.length
+ edges.each do |e|
+ e.other_end(v).similarity += v.last_similarity / n
+ end
end
end
end
-# self.vertices.each do |v|
-# n = v.neighbours.length
-# v.neighbours.each do |neighbour|
-# neighbour.similarity += v.last_similarity / n
-# end
-# end
max_similarity = vertices.map {|v| v.similarity}.max
self.vertices.each do |v|
v.similarity = v.similarity / max_similarity
end
residual = Math.sqrt(self.vertices.reduce(0) {|a, v| a += (v.similarity - v.last_similarity) ** 2})
-# puts "Residual = #{residual.round(3)}, sims = #{self.vertices.map {|v| v.name + " = " + v.similarity.round(2).to_s}}"
+ $log.debug { puts "Residual = #{residual.round(3)}, sims = #{self.vertices.map {|v| v.name + " = " + v.similarity.round(2).to_s}}" }
iteration += 1
end