X-Git-Url: https://git.njae.me.uk/?a=blobdiff_plain;f=lib%2Fgraph.njae%2Fgraph.rb;h=bdf75b231e0089ec0acbae0dfc9c2f5fb6d2ce8b;hb=53a70e2e8b36fc7b7f0b7edb2b1448969f8df0c8;hp=77b96da5fa7ea86d7b174cb7ddcedd1ebd24416e;hpb=2f72fc8e85b5c9c8bdcf622dc3b512a3dee5060c;p=graph.njae.git diff --git a/lib/graph.njae/graph.rb b/lib/graph.njae/graph.rb index 77b96da..bdf75b2 100644 --- a/lib/graph.njae/graph.rb +++ b/lib/graph.njae/graph.rb @@ -1,5 +1,9 @@ require 'ostruct' +require 'logger' +$log = Logger.new(STDERR) +$log.level = Logger::WARN + # A simple graph library module GraphNjae @@ -26,10 +30,10 @@ module GraphNjae # Connects two vertices, creating and storing a new edge # Also adds the vertices, unless they're already in the graph - def connect(vertex1, vertex2) + def connect(vertex1, vertex2, edge_attributes = {}) self.vertices << vertex1 unless self.vertices.include? vertex1 self.vertices << vertex2 unless self.vertices.include? vertex2 - edge = Edge.new + edge = Edge.new(edge_attributes) self.edges << edge edge << vertex1 << vertex2 end @@ -46,20 +50,82 @@ module GraphNjae self.edges.each do |e1| e1_vertices = e1.vertices other.edges.each do |e2| - e2_vertices = e2.vertices - source = product_graph.vertices.find {|v| v.g1_vertex == e1_vertices[0] and v.g2_vertex == e2_vertices[0]} - destination = product_graph.vertices.find {|v| v.g1_vertex == e1_vertices[1] and v.g2_vertex == e2_vertices[1]} - product_graph.connect source, destination - source = product_graph.vertices.find {|v| v.g1_vertex == e1_vertices[0] and v.g2_vertex == e2_vertices[1]} - destination = product_graph.vertices.find {|v| v.g1_vertex == e1_vertices[1] and v.g2_vertex == e2_vertices[0]} - product_graph.connect source, destination + if e1.type == e2.type + e2_vertices = e2.vertices + source = product_graph.vertices.find {|v| v.g1_vertex == e1_vertices[0] and v.g2_vertex == e2_vertices[0]} + destination = product_graph.vertices.find {|v| v.g1_vertex == e1_vertices[1] and v.g2_vertex == e2_vertices[1]} + product_graph.connect source, destination + source = product_graph.vertices.find {|v| v.g1_vertex == e1_vertices[0] and v.g2_vertex == e2_vertices[1]} + destination = product_graph.vertices.find {|v| v.g1_vertex == e1_vertices[1] and v.g2_vertex == e2_vertices[0]} + product_graph.connect source, destination + end end end + product_graph.vertices.reject! {|v| v.neighbours.empty?} product_graph end - # Performs similarity flooding on a graph - def similarity_flood(&normalization) + + # Calculates the initial similarity of each vertex in a product graph. + # If passed an optional block, that block is used to find the + # initial similarity. If no block is given, every vertex is given + # an initial similarity of 1.0. + def initial_similarity + self.vertices.each do |v| + if block_given? + v.initial_similarity = yield v + else + v.initial_similarity = 1.0 + end + v.similarity = v.initial_similarity + end + end + + # Performs similarity flooding on a graph, as described by + # Sergey Melnik, Hector Garcia-Molina, and Erhard Rahm, + # "Similarity Flooding: A Versatile Graph Matching Algorithm + # and its Application to Schema Matching", Proceedings of + # the 18th International Conference on Data Engineering (ICDE’02) + # + # Assumes that the initial similarity has already been calculated + # If passed an optional block, it uses that block to update the + # similarity on each iteration. If no block is passed, it uses the + # default similarity updating method from the paper. + def similarity_flood(opts = {}) + max_iterations = opts[:iterations] || 100 + max_residual = opts[:max_residual] || 0.001 + iteration = 1 + residual = max_residual + 1 + while residual > max_residual and iteration <= max_iterations + $log.debug { "Starting iteration #{iteration}" } + self.vertices.each do |v| + v.last_similarity = v.similarity + end + self.vertices.each do |v| + if block_given? + v.similarity = yield v + else + $log.debug { "Processing vertex #{v.name}" } + edge_groups = v.edges.group_by {|e| e.type } + $log.debug { " Edge groups {#{edge_groups.keys.map {|t| t.to_s + ' => {' + edge_groups[t].map {|e| e.to_s}.join(', ')}.join('; ')}}" } + edge_groups.each do |type, edges| + $log.debug { " Processing group type #{type}" } + n = edges.length + edges.each do |e| + e.other_end(v).similarity += v.last_similarity / n + end + end + end + end + max_similarity = vertices.map {|v| v.similarity}.max + self.vertices.each do |v| + v.similarity = v.similarity / max_similarity + end + residual = Math.sqrt(self.vertices.reduce(0) {|a, v| a += (v.similarity - v.last_similarity) ** 2}) + $log.debug { puts "Residual = #{residual.round(3)}, sims = #{self.vertices.map {|v| v.name + " = " + v.similarity.round(2).to_s}}" } + iteration += 1 + end + end end # class