bdf75b231e0089ec0acbae0dfc9c2f5fb6d2ce8b
4 $log = Logger
.new(STDERR)
5 $log.level
= Logger
::WARN
7 # A simple graph library
11 # A container for all the parts of a graph. The graph can have arbitrary attributes,
12 # treated as method names.
13 class Graph
< OpenStruct
14 def initialize(values
= {})
16 self.edges
= Array
.new
17 self.vertices
= Array
.new
21 # Add a Vertex or Edge to the graph.
23 if other
.class.ancestors
.include? Vertex
24 self.vertices
<< other
25 elsif other
.class.ancestors
.include? Edge
31 # Connects two vertices, creating and storing a new edge
32 # Also adds the vertices, unless they're already in the graph
33 def connect(vertex1
, vertex2
, edge_attributes
= {})
34 self.vertices
<< vertex1
unless self.vertices
.include? vertex1
35 self.vertices
<< vertex2
unless self.vertices
.include? vertex2
36 edge
= Edge
.new(edge_attributes
)
38 edge
<< vertex1
<< vertex2
41 # Form a product graph of this graph and the other.
42 # Return the product graph.
44 product_graph
= Graph
.new
45 self.vertices
.each
do |v1
|
46 other
.vertices
.each
do |v2
|
47 product_graph
<< Vertex
.new({:g1_vertex => v1
, :g2_vertex => v2
})
50 self.edges
.each
do |e1
|
51 e1_vertices
= e1
.vertices
52 other
.edges
.each
do |e2
|
54 e2_vertices
= e2
.vertices
55 source
= product_graph
.vertices
.find
{|v
| v
.g1_vertex
== e1_vertices
[0] and v
.g2_vertex
== e2_vertices
[0]}
56 destination
= product_graph
.vertices
.find
{|v
| v
.g1_vertex
== e1_vertices
[1] and v
.g2_vertex
== e2_vertices
[1]}
57 product_graph
.connect source
, destination
58 source
= product_graph
.vertices
.find
{|v
| v
.g1_vertex
== e1_vertices
[0] and v
.g2_vertex
== e2_vertices
[1]}
59 destination
= product_graph
.vertices
.find
{|v
| v
.g1_vertex
== e1_vertices
[1] and v
.g2_vertex
== e2_vertices
[0]}
60 product_graph
.connect source
, destination
64 product_graph
.vertices
.reject
! {|v
| v
.neighbours
.empty
?}
69 # Calculates the initial similarity of each vertex in a product graph.
70 # If passed an optional block, that block is used to find the
71 # initial similarity. If no block is given, every vertex is given
72 # an initial similarity of 1.0.
73 def initial_similarity
74 self.vertices
.each
do |v
|
76 v
.initial_similarity
= yield v
78 v
.initial_similarity
= 1.0
80 v
.similarity
= v
.initial_similarity
84 # Performs similarity flooding on a graph, as described by
85 # Sergey Melnik, Hector Garcia-Molina, and Erhard Rahm,
86 # "Similarity Flooding: A Versatile Graph Matching Algorithm
87 # and its Application to Schema Matching", Proceedings of
88 # the 18th International Conference on Data Engineering (ICDE’02)
90 # Assumes that the initial similarity has already been calculated
91 # If passed an optional block, it uses that block to update the
92 # similarity on each iteration. If no block is passed, it uses the
93 # default similarity updating method from the paper.
94 def similarity_flood(opts
= {})
95 max_iterations
= opts
[:iterations] || 100
96 max_residual
= opts
[:max_residual] || 0.001
98 residual
= max_residual
+ 1
99 while residual
> max_residual
and iteration
<= max_iterations
100 $log.debug
{ "Starting iteration #{iteration}" }
101 self.vertices
.each
do |v
|
102 v
.last_similarity
= v
.similarity
104 self.vertices
.each
do |v
|
106 v
.similarity
= yield v
108 $log.debug
{ "Processing vertex #{v.name}" }
109 edge_groups
= v
.edges
.group_by
{|e
| e
.type
}
110 $log.debug
{ " Edge groups {#{edge_groups.keys.map {|t| t.to_s + ' => {' + edge_groups[t].map {|e| e.to_s}.join(', ')}.join('; ')}}" }
111 edge_groups
.each
do |type
, edges
|
112 $log.debug
{ " Processing group type #{type}" }
115 e
.other_end(v
).similarity
+= v
.last_similarity
/ n
120 max_similarity
= vertices
.map
{|v
| v
.similarity
}.max
121 self.vertices
.each
do |v
|
122 v
.similarity
= v
.similarity
/ max_similarity
124 residual
= Math
.sqrt(self.vertices
.reduce(0) {|a
, v
| a
+= (v
.similarity
- v
.last_similarity
) ** 2})
125 $log.debug
{ puts
"Residual = #{residual.round(3)}, sims = #{self.vertices.map {|v| v.name + " = " + v.similarity.round(2).to_s}}" }