Expanded a couple of comments, bumped the gem patch level
[graph.njae.git] / lib / graph.njae / graph.rb
1 require 'ostruct'
2
3 require 'logger'
4 $log = Logger.new(STDERR)
5 $log.level = Logger::WARN
6
7 # A simple graph library
8
9 module GraphNjae
10
11 # A container for all the parts of a graph. The graph can have arbitrary attributes,
12 # treated as method names.
13 class Graph < OpenStruct
14 def initialize(values = {})
15 super(values)
16 self.edges = Array.new
17 self.vertices = Array.new
18 self
19 end
20
21 # Add a Vertex or Edge to the graph.
22 def <<(other)
23 if other.class.ancestors.include? Vertex
24 self.vertices << other
25 elsif other.class.ancestors.include? Edge
26 self.edges << other
27 end
28 self
29 end
30
31 # Connects two vertices, creating and storing a new edge
32 # Also adds the vertices, unless they're already in the graph
33 def connect(vertex1, vertex2, edge_attributes = {})
34 self.vertices << vertex1 unless self.vertices.include? vertex1
35 self.vertices << vertex2 unless self.vertices.include? vertex2
36 edge = Edge.new(edge_attributes)
37 self.edges << edge
38 edge << vertex1 << vertex2
39 end
40
41 # Form a product graph of this graph and the other.
42 # Return the product graph.
43 def product(other)
44 product_graph = Graph.new
45 self.vertices.each do |v1|
46 other.vertices.each do |v2|
47 product_graph << Vertex.new({:g1_vertex => v1, :g2_vertex => v2})
48 end
49 end
50 self.edges.each do |e1|
51 e1_vertices = e1.vertices
52 other.edges.each do |e2|
53 if e1.type == e2.type
54 e2_vertices = e2.vertices
55 source = product_graph.vertices.find {|v| v.g1_vertex == e1_vertices[0] and v.g2_vertex == e2_vertices[0]}
56 destination = product_graph.vertices.find {|v| v.g1_vertex == e1_vertices[1] and v.g2_vertex == e2_vertices[1]}
57 product_graph.connect source, destination
58 source = product_graph.vertices.find {|v| v.g1_vertex == e1_vertices[0] and v.g2_vertex == e2_vertices[1]}
59 destination = product_graph.vertices.find {|v| v.g1_vertex == e1_vertices[1] and v.g2_vertex == e2_vertices[0]}
60 product_graph.connect source, destination
61 end
62 end
63 end
64 product_graph.vertices.reject! {|v| v.neighbours.empty?}
65 product_graph
66 end
67
68
69 # Calculates the initial similarity of each vertex in a product graph.
70 # If passed an optional block, that block is used to find the
71 # initial similarity. If no block is given, every vertex is given
72 # an initial similarity of 1.0.
73 def initial_similarity
74 self.vertices.each do |v|
75 if block_given?
76 v.initial_similarity = yield v
77 else
78 v.initial_similarity = 1.0
79 end
80 v.similarity = v.initial_similarity
81 end
82 end
83
84 # Performs similarity flooding on a graph, as described by
85 # Sergey Melnik, Hector Garcia-Molina, and Erhard Rahm,
86 # "Similarity Flooding: A Versatile Graph Matching Algorithm
87 # and its Application to Schema Matching", Proceedings of
88 # the 18th International Conference on Data Engineering (ICDE’02)
89 #
90 # Assumes that the initial similarity has already been calculated
91 # If passed an optional block, it uses that block to update the
92 # similarity on each iteration. If no block is passed, it uses the
93 # default similarity updating method from the paper.
94 def similarity_flood(opts = {})
95 max_iterations = opts[:iterations] || 100
96 max_residual = opts[:max_residual] || 0.001
97 iteration = 1
98 residual = max_residual + 1
99 while residual > max_residual and iteration <= max_iterations
100 $log.debug { "Starting iteration #{iteration}" }
101 self.vertices.each do |v|
102 v.last_similarity = v.similarity
103 end
104 self.vertices.each do |v|
105 if block_given?
106 v.similarity = yield v
107 else
108 $log.debug { "Processing vertex #{v.name}" }
109 edge_groups = v.edges.group_by {|e| e.type }
110 $log.debug { " Edge groups {#{edge_groups.keys.map {|t| t.to_s + ' => {' + edge_groups[t].map {|e| e.to_s}.join(', ')}.join('; ')}}" }
111 edge_groups.each do |type, edges|
112 $log.debug { " Processing group type #{type}" }
113 n = edges.length
114 edges.each do |e|
115 e.other_end(v).similarity += v.last_similarity / n
116 end
117 end
118 end
119 end
120 max_similarity = vertices.map {|v| v.similarity}.max
121 self.vertices.each do |v|
122 v.similarity = v.similarity / max_similarity
123 end
124 residual = Math.sqrt(self.vertices.reduce(0) {|a, v| a += (v.similarity - v.last_similarity) ** 2})
125 $log.debug { puts "Residual = #{residual.round(3)}, sims = #{self.vertices.map {|v| v.name + " = " + v.similarity.round(2).to_s}}" }
126 iteration += 1
127 end
128
129 end
130
131 end # class
132 end