X-Git-Url: https://git.njae.me.uk/?a=blobdiff_plain;f=lib%2Fgraph.njae%2Fgraph.rb;h=37b772030053c146f61e14c2c286a3ed60c4bd8a;hb=99e4223d9b3baef392d4a6b0b996078f06fcadf7;hp=27ee54ff994338ee816141f2c8c15cc29d69e711;hpb=8f9dfdb2c7f308c2c0fe929f11a9652cb49e8ac7;p=graph.njae.git diff --git a/lib/graph.njae/graph.rb b/lib/graph.njae/graph.rb index 27ee54f..37b7720 100644 --- a/lib/graph.njae/graph.rb +++ b/lib/graph.njae/graph.rb @@ -1,5 +1,3 @@ -require 'ostruct' - require 'logger' $log = Logger.new(STDERR) $log.level = Logger::WARN @@ -66,6 +64,10 @@ module GraphNjae end + # Calculates the initial similarity of each vertex in a product graph. + # If passed an optional block, that block is used to find the + # initial similarity. If no block is given, every vertex is given + # an initial similarity of 1.0. def initial_similarity self.vertices.each do |v| if block_given? @@ -77,8 +79,16 @@ module GraphNjae end end - # Performs similarity flooding on a graph + # Performs similarity flooding on a graph, as described by + # Sergey Melnik, Hector Garcia-Molina, and Erhard Rahm, + # "Similarity Flooding: A Versatile Graph Matching Algorithm + # and its Application to Schema Matching", Proceedings of + # the 18th International Conference on Data Engineering (ICDE’02) + # # Assumes that the initial similarity has already been calculated + # If passed an optional block, it uses that block to update the + # similarity on each iteration. If no block is passed, it uses the + # default similarity updating method from the paper. def similarity_flood(opts = {}) max_iterations = opts[:iterations] || 100 max_residual = opts[:max_residual] || 0.001