2 $log = Logger
.new(STDERR)
3 $log.level
= Logger
::WARN
5 # A simple graph library
9 # A container for all the parts of a graph. The graph can have arbitrary attributes,
10 # treated as method names.
11 class Graph
< OpenStruct
12 def initialize(values
= {})
14 self.edges
= Array
.new
15 self.vertices
= Array
.new
19 # Add a Vertex or Edge to the graph.
21 if other
.class.ancestors
.include? Vertex
22 self.vertices
<< other
23 elsif other
.class.ancestors
.include? Edge
29 # Connects two vertices, creating and storing a new edge
30 # Also adds the vertices, unless they're already in the graph
31 def connect(vertex1
, vertex2
, edge_attributes
= {})
32 self.vertices
<< vertex1
unless self.vertices
.include? vertex1
33 self.vertices
<< vertex2
unless self.vertices
.include? vertex2
34 edge
= Edge
.new(edge_attributes
)
36 edge
<< vertex1
<< vertex2
40 vertex_args
= opts
[:vertex_args] || {}
41 vertex_block
= opts
[:vertex_block] || nil
42 edge_args
= opts
[:edge_args] || {}
43 edge_block
= opts
[:edge_block] || nil
45 self.vertices
.each
do |v
|
47 dot
<< v
.to_dot(vertex_args
)
49 dot
<< v
.do_dot(&vertex_block
)
53 self.edges
.each
do |e
|
55 dot
<< e
.to_dot(edge_args
)
57 dot
<< e
.do_dot(&lambda
{edge_block
})
64 # Form a product graph of this graph and the other.
65 # Return the product graph.
67 product_graph
= Graph
.new
68 self.vertices
.each
do |v1
|
69 other
.vertices
.each
do |v2
|
70 product_graph
<< Vertex
.new({:g1_vertex => v1
, :g2_vertex => v2
})
73 self.edges
.each
do |e1
|
74 e1_vertices
= e1
.vertices
75 other
.edges
.each
do |e2
|
77 e2_vertices
= e2
.vertices
78 source
= product_graph
.vertices
.find
{|v
| v
.g1_vertex
== e1_vertices
[0] and v
.g2_vertex
== e2_vertices
[0]}
79 destination
= product_graph
.vertices
.find
{|v
| v
.g1_vertex
== e1_vertices
[1] and v
.g2_vertex
== e2_vertices
[1]}
80 product_graph
.connect source
, destination
81 source
= product_graph
.vertices
.find
{|v
| v
.g1_vertex
== e1_vertices
[0] and v
.g2_vertex
== e2_vertices
[1]}
82 destination
= product_graph
.vertices
.find
{|v
| v
.g1_vertex
== e1_vertices
[1] and v
.g2_vertex
== e2_vertices
[0]}
83 product_graph
.connect source
, destination
87 product_graph
.vertices
.reject
! {|v
| v
.neighbours
.empty
?}
92 # Calculates the initial similarity of each vertex in a product graph.
93 # If passed an optional block, that block is used to find the
94 # initial similarity. If no block is given, every vertex is given
95 # an initial similarity of 1.0.
96 def initial_similarity
97 self.vertices
.each
do |v
|
99 v
.initial_similarity
= yield v
101 v
.initial_similarity
= 1.0
103 v
.similarity
= v
.initial_similarity
107 # Performs similarity flooding on a graph, as described by
108 # Sergey Melnik, Hector Garcia-Molina, and Erhard Rahm,
109 # "Similarity Flooding: A Versatile Graph Matching Algorithm
110 # and its Application to Schema Matching", Proceedings of
111 # the 18th International Conference on Data Engineering (ICDE’02)
113 # Assumes that the initial similarity has already been calculated
114 # If passed an optional block, it uses that block to update the
115 # similarity on each iteration. If no block is passed, it uses the
116 # default similarity updating method from the paper.
117 def similarity_flood(opts
= {})
118 max_iterations
= opts
[:iterations] || 100
119 max_residual
= opts
[:max_residual] || 0.001
121 residual
= max_residual
+ 1
122 while residual
> max_residual
and iteration
<= max_iterations
123 $log.debug
{ "Starting iteration #{iteration}" }
124 self.vertices
.each
do |v
|
125 v
.last_similarity
= v
.similarity
127 self.vertices
.each
do |v
|
129 v
.similarity
= yield v
131 $log.debug
{ "Processing vertex #{v.name}" }
132 edge_groups
= v
.edges
.group_by
{|e
| e
.type
}
133 $log.debug
{ " Edge groups {#{edge_groups.keys.map {|t| t.to_s + ' => {' + edge_groups[t].map {|e| e.to_s}.join(', ')}.join('; ')}}" }
134 edge_groups
.each
do |type
, edges
|
135 $log.debug
{ " Processing group type #{type}" }
138 e
.other_end(v
).similarity
+= v
.last_similarity
/ n
143 max_similarity
= vertices
.map
{|v
| v
.similarity
}.max
144 self.vertices
.each
do |v
|
145 v
.similarity
= v
.similarity
/ max_similarity
147 residual
= Math
.sqrt(self.vertices
.reduce(0) {|a
, v
| a
+= (v
.similarity
- v
.last_similarity
) ** 2})
148 $log.debug
{ puts
"Residual = #{residual.round(3)}, sims = #{self.vertices.map {|v| v.name + " = " + v.similarity.round(2).to_s}}" }