X-Git-Url: https://git.njae.me.uk/?a=blobdiff_plain;f=lib%2Fgraph.njae%2Fgraph.rb;h=eb2ddc6e3e55b296720e400cba6aea87296f38a5;hb=HEAD;hp=011dd8625720dd601cd4868257ae7224273fb133;hpb=32518a2c5089dbd3b9f74aad780ded127e3040ea;p=graph.njae.git diff --git a/lib/graph.njae/graph.rb b/lib/graph.njae/graph.rb index 011dd86..eb2ddc6 100644 --- a/lib/graph.njae/graph.rb +++ b/lib/graph.njae/graph.rb @@ -1,4 +1,6 @@ -require 'ostruct' +require 'logger' +$log = Logger.new(STDERR) +$log.level = Logger::WARN # A simple graph library @@ -26,14 +28,39 @@ module GraphNjae # Connects two vertices, creating and storing a new edge # Also adds the vertices, unless they're already in the graph - def connect(vertex1, vertex2) + def connect(vertex1, vertex2, edge_attributes = {}) self.vertices << vertex1 unless self.vertices.include? vertex1 self.vertices << vertex2 unless self.vertices.include? vertex2 - edge = Edge.new + edge = Edge.new(edge_attributes) self.edges << edge edge << vertex1 << vertex2 end + def to_dot(opts = {}) + vertex_args = opts[:vertex_args] || {} + vertex_block = opts[:vertex_block] || nil + edge_args = opts[:edge_args] || {} + edge_block = opts[:edge_block] || nil + dot = "graph {\n" + self.vertices.each do |v| + if vertex_block.nil? + dot << v.to_dot(vertex_args) + else + dot << v.to_dot(&vertex_block) + end + dot << "\n" + end + self.edges.each do |e| + if edge_block.nil? + dot << e.to_dot(edge_args) + else + dot << e.to_dot(&edge_block) + end + dot << "\n" + end + dot << '}' + end + # Form a product graph of this graph and the other. # Return the product graph. def product(other) @@ -46,19 +73,26 @@ module GraphNjae self.edges.each do |e1| e1_vertices = e1.vertices other.edges.each do |e2| - e2_vertices = e2.vertices - source = product_graph.vertices.find {|v| v.g1_vertex == e1_vertices[0] and v.g2_vertex == e2_vertices[0]} - destination = product_graph.vertices.find {|v| v.g1_vertex == e1_vertices[1] and v.g2_vertex == e2_vertices[1]} - product_graph.connect source, destination - source = product_graph.vertices.find {|v| v.g1_vertex == e1_vertices[0] and v.g2_vertex == e2_vertices[1]} - destination = product_graph.vertices.find {|v| v.g1_vertex == e1_vertices[1] and v.g2_vertex == e2_vertices[0]} - product_graph.connect source, destination + if e1.type == e2.type + e2_vertices = e2.vertices + source = product_graph.vertices.find {|v| v.g1_vertex == e1_vertices[0] and v.g2_vertex == e2_vertices[0]} + destination = product_graph.vertices.find {|v| v.g1_vertex == e1_vertices[1] and v.g2_vertex == e2_vertices[1]} + product_graph.connect source, destination + source = product_graph.vertices.find {|v| v.g1_vertex == e1_vertices[0] and v.g2_vertex == e2_vertices[1]} + destination = product_graph.vertices.find {|v| v.g1_vertex == e1_vertices[1] and v.g2_vertex == e2_vertices[0]} + product_graph.connect source, destination + end end end + product_graph.vertices.reject! {|v| v.neighbours.empty?} product_graph end + # Calculates the initial similarity of each vertex in a product graph. + # If passed an optional block, that block is used to find the + # initial similarity. If no block is given, every vertex is given + # an initial similarity of 1.0. def initial_similarity self.vertices.each do |v| if block_given? @@ -70,28 +104,48 @@ module GraphNjae end end - # Performs similarity flooding on a graph + # Performs similarity flooding on a graph, as described by + # Sergey Melnik, Hector Garcia-Molina, and Erhard Rahm, + # "Similarity Flooding: A Versatile Graph Matching Algorithm + # and its Application to Schema Matching", Proceedings of + # the 18th International Conference on Data Engineering (ICDE’02) + # # Assumes that the initial similarity has already been calculated + # If passed an optional block, it uses that block to update the + # similarity on each iteration. If no block is passed, it uses the + # default similarity updating method from the paper. def similarity_flood(opts = {}) max_iterations = opts[:iterations] || 100 max_residual = opts[:max_residual] || 0.001 iteration = 1 residual = max_residual + 1 while residual > max_residual and iteration <= max_iterations + $log.debug { "Starting iteration #{iteration}" } self.vertices.each do |v| v.last_similarity = v.similarity end self.vertices.each do |v| - n = v.neighbours.length - v.neighbours.each do |neighbour| - neighbour.similarity += v.last_similarity / n - end + if block_given? + v.similarity = yield v + else + $log.debug { "Processing vertex #{v.name}" } + edge_groups = v.edges.group_by {|e| e.type } + $log.debug { " Edge groups {#{edge_groups.keys.map {|t| t.to_s + ' => {' + edge_groups[t].map {|e| e.to_s}.join(', ')}.join('; ')}}" } + edge_groups.each do |type, edges| + $log.debug { " Processing group type #{type}" } + n = edges.length + edges.each do |e| + e.other_end(v).similarity += v.last_similarity / n + end + end + end end - max_similarity = vertices.max {|v, w| v.similarity <=> w.similarity}.similarity + max_similarity = vertices.map {|v| v.similarity}.max self.vertices.each do |v| v.similarity = v.similarity / max_similarity end residual = Math.sqrt(self.vertices.reduce(0) {|a, v| a += (v.similarity - v.last_similarity) ** 2}) + $log.debug { puts "Residual = #{residual.round(3)}, sims = #{self.vertices.map {|v| v.name + " = " + v.similarity.round(2).to_s}}" } iteration += 1 end