Tidied up the let clauses in edge_spec
[graph.njae.git] / lib / graph.njae / graph.rb
index 09d6137f0d96edc4b6a9936ad74adac38d972a17..37b772030053c146f61e14c2c286a3ed60c4bd8a 100644 (file)
@@ -1,4 +1,6 @@
-require 'ostruct'
+require 'logger'
+$log = Logger.new(STDERR)
+$log.level = Logger::WARN
 
 # A simple graph library
 
@@ -26,16 +28,16 @@ module GraphNjae
     
     # Connects two vertices, creating and storing a new edge
     # Also adds the vertices, unless they're already in the graph
-    def connect(vertex1, vertex2)
+    def connect(vertex1, vertex2, edge_attributes = {})
       self.vertices << vertex1 unless self.vertices.include? vertex1
       self.vertices << vertex2 unless self.vertices.include? vertex2
-      edge = Edge.new
+      edge = Edge.new(edge_attributes)
       self.edges << edge
       edge << vertex1 << vertex2
     end
     
     # Form a product graph of this graph and the other.
-    # Return the new graph.
+    # Return the product graph.
     def product(other)
       product_graph = Graph.new
       self.vertices.each do |v1|
@@ -46,17 +48,83 @@ module GraphNjae
       self.edges.each do |e1|
         e1_vertices = e1.vertices
         other.edges.each do |e2|
-          e2_vertices = e2.vertices
-          source = product_graph.vertices.find {|v| v.g1_vertex == e1_vertices[0] and v.g2_vertex == e2_vertices[0]}
-          destination = product_graph.vertices.find {|v| v.g1_vertex == e1_vertices[1] and v.g2_vertex == e2_vertices[1]}
-          pe1 = product_graph.connect source, destination
-          source = product_graph.vertices.find {|v| v.g1_vertex == e1_vertices[0] and v.g2_vertex == e2_vertices[1]}
-          destination = product_graph.vertices.find {|v| v.g1_vertex == e1_vertices[1] and v.g2_vertex == e2_vertices[0]}
-          pe2 = product_graph.connect source, destination
+          if e1.type == e2.type
+            e2_vertices = e2.vertices
+            source = product_graph.vertices.find {|v| v.g1_vertex == e1_vertices[0] and v.g2_vertex == e2_vertices[0]}
+            destination = product_graph.vertices.find {|v| v.g1_vertex == e1_vertices[1] and v.g2_vertex == e2_vertices[1]}
+            product_graph.connect source, destination
+            source = product_graph.vertices.find {|v| v.g1_vertex == e1_vertices[0] and v.g2_vertex == e2_vertices[1]}
+            destination = product_graph.vertices.find {|v| v.g1_vertex == e1_vertices[1] and v.g2_vertex == e2_vertices[0]}
+            product_graph.connect source, destination
+          end
         end
       end
+      product_graph.vertices.reject! {|v| v.neighbours.empty?}      
       product_graph
     end
     
+    
+    # Calculates the initial similarity of each vertex in a product graph.
+    # If passed an optional block, that block is used to find the 
+    # initial similarity. If no block is given, every vertex is given
+    # an initial similarity of 1.0.
+    def initial_similarity
+      self.vertices.each do |v|
+        if block_given?
+          v.initial_similarity = yield v
+        else
+          v.initial_similarity = 1.0
+        end
+        v.similarity = v.initial_similarity
+      end
+    end
+    
+    # Performs similarity flooding on a graph, as described by 
+    # Sergey Melnik, Hector Garcia-Molina, and Erhard Rahm, 
+    # "Similarity Flooding: A Versatile Graph Matching Algorithm 
+    #    and its Application to Schema Matching", Proceedings of 
+    # the 18th International Conference on Data Engineering (ICDE’02)
+    #
+    # Assumes that the initial similarity has already been calculated
+    # If passed an optional block, it uses that block to update the 
+    # similarity on each iteration. If no block is passed, it uses the 
+    # default similarity updating method from the paper.
+    def similarity_flood(opts = {})
+      max_iterations = opts[:iterations] || 100
+      max_residual = opts[:max_residual] || 0.001
+      iteration = 1
+      residual = max_residual + 1
+      while residual > max_residual and iteration <= max_iterations
+        $log.debug { "Starting iteration #{iteration}" }
+        self.vertices.each do |v|
+          v.last_similarity = v.similarity
+        end
+        self.vertices.each do |v|
+          if block_given?
+            v.similarity = yield v
+          else
+            $log.debug { "Processing vertex #{v.name}" }
+            edge_groups = v.edges.group_by {|e| e.type }
+            $log.debug { "  Edge groups {#{edge_groups.keys.map {|t| t.to_s + ' => {' + edge_groups[t].map {|e| e.to_s}.join(', ')}.join('; ')}}" }
+            edge_groups.each do |type, edges|
+              $log.debug { "    Processing group type #{type}" }
+              n = edges.length
+              edges.each do |e|
+                e.other_end(v).similarity += v.last_similarity / n
+              end
+            end
+          end
+        end
+        max_similarity = vertices.map {|v| v.similarity}.max
+        self.vertices.each do |v|
+          v.similarity = v.similarity / max_similarity
+        end
+        residual = Math.sqrt(self.vertices.reduce(0) {|a, v| a += (v.similarity - v.last_similarity) ** 2})
+        $log.debug { puts "Residual = #{residual.round(3)}, sims = #{self.vertices.map {|v| v.name + " = " + v.similarity.round(2).to_s}}" }
+        iteration += 1
+      end
+      
+    end
+    
   end # class
 end