Added edit distance to Label
authorNeil Smith <neil.github@njae.me.uk>
Fri, 3 Feb 2012 20:30:05 +0000 (20:30 +0000)
committerNeil Smith <neil.github@njae.me.uk>
Fri, 3 Feb 2012 20:30:05 +0000 (20:30 +0000)
lib/erd_handler/label.rb
spec/erd_handler/label_spec.rb

index cdeacb177eb24c56c7004995e4ab1130303e903d..a995432a3cf544240cc2a7d73984cb7c1259ddb7 100644 (file)
@@ -2,7 +2,7 @@ class Label
   
   attr_reader :original, :processed
   
-  def initialize(original)
+  def initialize(original = "")
     @original = original
     @processed = [original]
   end
@@ -55,4 +55,33 @@ class Label
   def tidy
     self.split.downcase.stem
   end
+  
+  def levenshtein(other_object)
+    if other_object.class == Label
+      other = other_object.processed.join('')
+    else
+      other = other_object
+    end
+    s = @processed.join('')
+    n = s.length
+    m = other.length
+    return m if (0 == n)
+    return n if (0 == m)
+    
+    d = Array.new(m+1) {Array.new(n+1, 0)} # one row for each characer in other, one column for each charater in self
+    
+    (0..n).each {|i| d[0][i] = i}
+    (0..m).each {|j| d[j][0] = j}
+    (1..m).each do |i|
+      (1..n).each do |j|
+        d[i][j] = [d[i-1][j-1] + ((s[j-1] == other[i-1]) ? 0 : 1),  # substitution
+                   d[i-1][j] + 1, # deletion
+                   d[i][j-1] + 1  # addition
+                   ].min
+      end
+    end
+    d[-1][-1]
+  end
+  
+  alias :edit_distance :levenshtein
 end
\ No newline at end of file
index b432b46f447005612e2cf1ccce31f8490dfa4e6e..a1ea5a480513b52980798f8a02149bee368ff9c9 100644 (file)
@@ -7,6 +7,8 @@ module ErdHandler
         test_label = "Test label"
         l1 = Label.new test_label
         l1.original.should == test_label
+        l1 = Label.new
+        l1.original.should == ""
       end
     end # original
 
@@ -145,5 +147,28 @@ module ErdHandler
       end
     end # tidy
     
+    describe "#levenshtein" do
+      it "calculates the Levenshtein distance of the processed string" do
+        l1 = Label.new "Fred"
+        l1.levenshtein("Fred").should == 0
+        l1.levenshtein("Free").should == 1
+        l1.levenshtein("").should == 4
+        l2 = Label.new
+        l2.levenshtein("Free").should == 4
+        l2.levenshtein("").should == 0
+        l3 = Label.new "meilenstein"
+        l3.levenshtein("levenshtein").should == 4
+        l4 = Label.new "testingLabeller string, he_pontificated"
+        l4.tidy.levenshtein("testlabelstringhepontif").should == 0
+        l4.tidy.levenshtein("testlabelXstringhepontif").should == 1
+      end
+      
+      it "calculates the Levenshtein distance between Labels" do
+        l1 = Label.new "meilenstein"
+        l2 = Label.new "levenshtein"
+        l1.levenshtein(l2).should == 4
+      end
+    end
+    
   end
 end