37b772030053c146f61e14c2c286a3ed60c4bd8a
2 $log = Logger
.new(STDERR)
3 $log.level
= Logger
::WARN
5 # A simple graph library
9 # A container for all the parts of a graph. The graph can have arbitrary attributes,
10 # treated as method names.
11 class Graph
< OpenStruct
12 def initialize(values
= {})
14 self.edges
= Array
.new
15 self.vertices
= Array
.new
19 # Add a Vertex or Edge to the graph.
21 if other
.class.ancestors
.include? Vertex
22 self.vertices
<< other
23 elsif other
.class.ancestors
.include? Edge
29 # Connects two vertices, creating and storing a new edge
30 # Also adds the vertices, unless they're already in the graph
31 def connect(vertex1
, vertex2
, edge_attributes
= {})
32 self.vertices
<< vertex1
unless self.vertices
.include? vertex1
33 self.vertices
<< vertex2
unless self.vertices
.include? vertex2
34 edge
= Edge
.new(edge_attributes
)
36 edge
<< vertex1
<< vertex2
39 # Form a product graph of this graph and the other.
40 # Return the product graph.
42 product_graph
= Graph
.new
43 self.vertices
.each
do |v1
|
44 other
.vertices
.each
do |v2
|
45 product_graph
<< Vertex
.new({:g1_vertex => v1
, :g2_vertex => v2
})
48 self.edges
.each
do |e1
|
49 e1_vertices
= e1
.vertices
50 other
.edges
.each
do |e2
|
52 e2_vertices
= e2
.vertices
53 source
= product_graph
.vertices
.find
{|v
| v
.g1_vertex
== e1_vertices
[0] and v
.g2_vertex
== e2_vertices
[0]}
54 destination
= product_graph
.vertices
.find
{|v
| v
.g1_vertex
== e1_vertices
[1] and v
.g2_vertex
== e2_vertices
[1]}
55 product_graph
.connect source
, destination
56 source
= product_graph
.vertices
.find
{|v
| v
.g1_vertex
== e1_vertices
[0] and v
.g2_vertex
== e2_vertices
[1]}
57 destination
= product_graph
.vertices
.find
{|v
| v
.g1_vertex
== e1_vertices
[1] and v
.g2_vertex
== e2_vertices
[0]}
58 product_graph
.connect source
, destination
62 product_graph
.vertices
.reject
! {|v
| v
.neighbours
.empty
?}
67 # Calculates the initial similarity of each vertex in a product graph.
68 # If passed an optional block, that block is used to find the
69 # initial similarity. If no block is given, every vertex is given
70 # an initial similarity of 1.0.
71 def initial_similarity
72 self.vertices
.each
do |v
|
74 v
.initial_similarity
= yield v
76 v
.initial_similarity
= 1.0
78 v
.similarity
= v
.initial_similarity
82 # Performs similarity flooding on a graph, as described by
83 # Sergey Melnik, Hector Garcia-Molina, and Erhard Rahm,
84 # "Similarity Flooding: A Versatile Graph Matching Algorithm
85 # and its Application to Schema Matching", Proceedings of
86 # the 18th International Conference on Data Engineering (ICDE’02)
88 # Assumes that the initial similarity has already been calculated
89 # If passed an optional block, it uses that block to update the
90 # similarity on each iteration. If no block is passed, it uses the
91 # default similarity updating method from the paper.
92 def similarity_flood(opts
= {})
93 max_iterations
= opts
[:iterations] || 100
94 max_residual
= opts
[:max_residual] || 0.001
96 residual
= max_residual
+ 1
97 while residual
> max_residual
and iteration
<= max_iterations
98 $log.debug
{ "Starting iteration #{iteration}" }
99 self.vertices
.each
do |v
|
100 v
.last_similarity
= v
.similarity
102 self.vertices
.each
do |v
|
104 v
.similarity
= yield v
106 $log.debug
{ "Processing vertex #{v.name}" }
107 edge_groups
= v
.edges
.group_by
{|e
| e
.type
}
108 $log.debug
{ " Edge groups {#{edge_groups.keys.map {|t| t.to_s + ' => {' + edge_groups[t].map {|e| e.to_s}.join(', ')}.join('; ')}}" }
109 edge_groups
.each
do |type
, edges
|
110 $log.debug
{ " Processing group type #{type}" }
113 e
.other_end(v
).similarity
+= v
.last_similarity
/ n
118 max_similarity
= vertices
.map
{|v
| v
.similarity
}.max
119 self.vertices
.each
do |v
|
120 v
.similarity
= v
.similarity
/ max_similarity
122 residual
= Math
.sqrt(self.vertices
.reduce(0) {|a
, v
| a
+= (v
.similarity
- v
.last_similarity
) ** 2})
123 $log.debug
{ puts
"Residual = #{residual.round(3)}, sims = #{self.vertices.map {|v| v.name + " = " + v.similarity.round(2).to_s}}" }