attr_reader :original, :processed
- def initialize(original)
+ def initialize(original = "")
@original = original
@processed = [original]
end
def tidy
self.split.downcase.stem
end
+
+ def levenshtein(other_object)
+ if other_object.class == Label
+ other = other_object.processed.join('')
+ else
+ other = other_object
+ end
+ s = @processed.join('')
+ n = s.length
+ m = other.length
+ return m if (0 == n)
+ return n if (0 == m)
+
+ d = Array.new(m+1) {Array.new(n+1, 0)} # one row for each characer in other, one column for each charater in self
+
+ (0..n).each {|i| d[0][i] = i}
+ (0..m).each {|j| d[j][0] = j}
+ (1..m).each do |i|
+ (1..n).each do |j|
+ d[i][j] = [d[i-1][j-1] + ((s[j-1] == other[i-1]) ? 0 : 1), # substitution
+ d[i-1][j] + 1, # deletion
+ d[i][j-1] + 1 # addition
+ ].min
+ end
+ end
+ d[-1][-1]
+ end
+
+ alias :edit_distance :levenshtein
end
\ No newline at end of file
test_label = "Test label"
l1 = Label.new test_label
l1.original.should == test_label
+ l1 = Label.new
+ l1.original.should == ""
end
end # original
end
end # tidy
+ describe "#levenshtein" do
+ it "calculates the Levenshtein distance of the processed string" do
+ l1 = Label.new "Fred"
+ l1.levenshtein("Fred").should == 0
+ l1.levenshtein("Free").should == 1
+ l1.levenshtein("").should == 4
+ l2 = Label.new
+ l2.levenshtein("Free").should == 4
+ l2.levenshtein("").should == 0
+ l3 = Label.new "meilenstein"
+ l3.levenshtein("levenshtein").should == 4
+ l4 = Label.new "testingLabeller string, he_pontificated"
+ l4.tidy.levenshtein("testlabelstringhepontif").should == 0
+ l4.tidy.levenshtein("testlabelXstringhepontif").should == 1
+ end
+
+ it "calculates the Levenshtein distance between Labels" do
+ l1 = Label.new "meilenstein"
+ l2 = Label.new "levenshtein"
+ l1.levenshtein(l2).should == 4
+ end
+ end
+
end
end