From 7007758ddfedba0e7db623c2e6e2aba7bb420e73 Mon Sep 17 00:00:00 2001 From: Neil Smith Date: Fri, 3 Feb 2012 20:30:05 +0000 Subject: [PATCH] Added edit distance to Label --- lib/erd_handler/label.rb | 31 ++++++++++++++++++++++++++++++- spec/erd_handler/label_spec.rb | 25 +++++++++++++++++++++++++ 2 files changed, 55 insertions(+), 1 deletion(-) diff --git a/lib/erd_handler/label.rb b/lib/erd_handler/label.rb index cdeacb1..a995432 100644 --- a/lib/erd_handler/label.rb +++ b/lib/erd_handler/label.rb @@ -2,7 +2,7 @@ class Label attr_reader :original, :processed - def initialize(original) + def initialize(original = "") @original = original @processed = [original] end @@ -55,4 +55,33 @@ class Label def tidy self.split.downcase.stem end + + def levenshtein(other_object) + if other_object.class == Label + other = other_object.processed.join('') + else + other = other_object + end + s = @processed.join('') + n = s.length + m = other.length + return m if (0 == n) + return n if (0 == m) + + d = Array.new(m+1) {Array.new(n+1, 0)} # one row for each characer in other, one column for each charater in self + + (0..n).each {|i| d[0][i] = i} + (0..m).each {|j| d[j][0] = j} + (1..m).each do |i| + (1..n).each do |j| + d[i][j] = [d[i-1][j-1] + ((s[j-1] == other[i-1]) ? 0 : 1), # substitution + d[i-1][j] + 1, # deletion + d[i][j-1] + 1 # addition + ].min + end + end + d[-1][-1] + end + + alias :edit_distance :levenshtein end \ No newline at end of file diff --git a/spec/erd_handler/label_spec.rb b/spec/erd_handler/label_spec.rb index b432b46..a1ea5a4 100644 --- a/spec/erd_handler/label_spec.rb +++ b/spec/erd_handler/label_spec.rb @@ -7,6 +7,8 @@ module ErdHandler test_label = "Test label" l1 = Label.new test_label l1.original.should == test_label + l1 = Label.new + l1.original.should == "" end end # original @@ -145,5 +147,28 @@ module ErdHandler end end # tidy + describe "#levenshtein" do + it "calculates the Levenshtein distance of the processed string" do + l1 = Label.new "Fred" + l1.levenshtein("Fred").should == 0 + l1.levenshtein("Free").should == 1 + l1.levenshtein("").should == 4 + l2 = Label.new + l2.levenshtein("Free").should == 4 + l2.levenshtein("").should == 0 + l3 = Label.new "meilenstein" + l3.levenshtein("levenshtein").should == 4 + l4 = Label.new "testingLabeller string, he_pontificated" + l4.tidy.levenshtein("testlabelstringhepontif").should == 0 + l4.tidy.levenshtein("testlabelXstringhepontif").should == 1 + end + + it "calculates the Levenshtein distance between Labels" do + l1 = Label.new "meilenstein" + l2 = Label.new "levenshtein" + l1.levenshtein(l2).should == 4 + end + end + end end -- 2.34.1