Got #to_dot working with blocks
[graph.njae.git] / lib / graph.njae / graph.rb
1 require 'logger'
2 $log = Logger.new(STDERR)
3 $log.level = Logger::WARN
4
5 # A simple graph library
6
7 module GraphNjae
8
9 # A container for all the parts of a graph. The graph can have arbitrary attributes,
10 # treated as method names.
11 class Graph < OpenStruct
12 def initialize(values = {})
13 super(values)
14 self.edges = Array.new
15 self.vertices = Array.new
16 self
17 end
18
19 # Add a Vertex or Edge to the graph.
20 def <<(other)
21 if other.class.ancestors.include? Vertex
22 self.vertices << other
23 elsif other.class.ancestors.include? Edge
24 self.edges << other
25 end
26 self
27 end
28
29 # Connects two vertices, creating and storing a new edge
30 # Also adds the vertices, unless they're already in the graph
31 def connect(vertex1, vertex2, edge_attributes = {})
32 self.vertices << vertex1 unless self.vertices.include? vertex1
33 self.vertices << vertex2 unless self.vertices.include? vertex2
34 edge = Edge.new(edge_attributes)
35 self.edges << edge
36 edge << vertex1 << vertex2
37 end
38
39 def to_dot(opts = {})
40 vertex_args = opts[:vertex_args] || {}
41 vertex_block = opts[:vertex_block] || nil
42 edge_args = opts[:edge_args] || {}
43 edge_block = opts[:edge_block] || nil
44 dot = "graph {\n"
45 self.vertices.each do |v|
46 if vertex_block.nil?
47 dot << v.to_dot(vertex_args)
48 else
49 dot << v.to_dot(&vertex_block)
50 end
51 dot << "\n"
52 end
53 self.edges.each do |e|
54 if edge_block.nil?
55 dot << e.to_dot(edge_args)
56 else
57 dot << e.to_dot(&edge_block)
58 end
59 dot << "\n"
60 end
61 dot << '}'
62 end
63
64 # Form a product graph of this graph and the other.
65 # Return the product graph.
66 def product(other)
67 product_graph = Graph.new
68 self.vertices.each do |v1|
69 other.vertices.each do |v2|
70 product_graph << Vertex.new({:g1_vertex => v1, :g2_vertex => v2})
71 end
72 end
73 self.edges.each do |e1|
74 e1_vertices = e1.vertices
75 other.edges.each do |e2|
76 if e1.type == e2.type
77 e2_vertices = e2.vertices
78 source = product_graph.vertices.find {|v| v.g1_vertex == e1_vertices[0] and v.g2_vertex == e2_vertices[0]}
79 destination = product_graph.vertices.find {|v| v.g1_vertex == e1_vertices[1] and v.g2_vertex == e2_vertices[1]}
80 product_graph.connect source, destination
81 source = product_graph.vertices.find {|v| v.g1_vertex == e1_vertices[0] and v.g2_vertex == e2_vertices[1]}
82 destination = product_graph.vertices.find {|v| v.g1_vertex == e1_vertices[1] and v.g2_vertex == e2_vertices[0]}
83 product_graph.connect source, destination
84 end
85 end
86 end
87 product_graph.vertices.reject! {|v| v.neighbours.empty?}
88 product_graph
89 end
90
91
92 # Calculates the initial similarity of each vertex in a product graph.
93 # If passed an optional block, that block is used to find the
94 # initial similarity. If no block is given, every vertex is given
95 # an initial similarity of 1.0.
96 def initial_similarity
97 self.vertices.each do |v|
98 if block_given?
99 v.initial_similarity = yield v
100 else
101 v.initial_similarity = 1.0
102 end
103 v.similarity = v.initial_similarity
104 end
105 end
106
107 # Performs similarity flooding on a graph, as described by
108 # Sergey Melnik, Hector Garcia-Molina, and Erhard Rahm,
109 # "Similarity Flooding: A Versatile Graph Matching Algorithm
110 # and its Application to Schema Matching", Proceedings of
111 # the 18th International Conference on Data Engineering (ICDE’02)
112 #
113 # Assumes that the initial similarity has already been calculated
114 # If passed an optional block, it uses that block to update the
115 # similarity on each iteration. If no block is passed, it uses the
116 # default similarity updating method from the paper.
117 def similarity_flood(opts = {})
118 max_iterations = opts[:iterations] || 100
119 max_residual = opts[:max_residual] || 0.001
120 iteration = 1
121 residual = max_residual + 1
122 while residual > max_residual and iteration <= max_iterations
123 $log.debug { "Starting iteration #{iteration}" }
124 self.vertices.each do |v|
125 v.last_similarity = v.similarity
126 end
127 self.vertices.each do |v|
128 if block_given?
129 v.similarity = yield v
130 else
131 $log.debug { "Processing vertex #{v.name}" }
132 edge_groups = v.edges.group_by {|e| e.type }
133 $log.debug { " Edge groups {#{edge_groups.keys.map {|t| t.to_s + ' => {' + edge_groups[t].map {|e| e.to_s}.join(', ')}.join('; ')}}" }
134 edge_groups.each do |type, edges|
135 $log.debug { " Processing group type #{type}" }
136 n = edges.length
137 edges.each do |e|
138 e.other_end(v).similarity += v.last_similarity / n
139 end
140 end
141 end
142 end
143 max_similarity = vertices.map {|v| v.similarity}.max
144 self.vertices.each do |v|
145 v.similarity = v.similarity / max_similarity
146 end
147 residual = Math.sqrt(self.vertices.reduce(0) {|a, v| a += (v.similarity - v.last_similarity) ** 2})
148 $log.debug { puts "Residual = #{residual.round(3)}, sims = #{self.vertices.map {|v| v.name + " = " + v.similarity.round(2).to_s}}" }
149 iteration += 1
150 end
151
152 end
153
154 end # class
155 end