projects
/
porter2stemmer.git
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
Added Readme.md
[porter2stemmer.git]
/
test
/
tc_porter2_full.rb
diff --git
a/test/tc_porter2_full.rb
b/test/tc_porter2_full.rb
index 2520daeec2a3bbf98f8189b32513c8f31bfd8f12..982fab2895e012fd07bb6edd273c67b60243cec9 100644
(file)
--- a/
test/tc_porter2_full.rb
+++ b/
test/tc_porter2_full.rb
@@
-8,12
+8,9
@@
$:.unshift File.join(File.dirname(__FILE__), "..", "lib")
require 'test/unit'
require 'porter2'
require 'test/unit'
require 'porter2'
-class String
- public :porter2_preprocess, :porter2_r1, :porter2_r2
-end
-
class TestPorter2 < Test::Unit::TestCase
class TestPorter2 < Test::Unit::TestCase
-
+
+# The full set of test words from http://snowball.tartarus.org/algorithms/english/stemmer.html
TEST_WORDS = {"'" => "'",
"''" => "''",
"'a" => "'a",
TEST_WORDS = {"'" => "'",
"''" => "''",
"'a" => "'a",
@@
-29431,6
+29428,7
@@
TEST_WORDS = {"'" => "'",
"zossimov" => "zossimov",
"zu" => "zu" }
"zossimov" => "zossimov",
"zu" => "zu" }
+# Test words with -ise suffixes (and similar), to test how British English is stemmed
TEST_WORDS_ENGB = { "aggrandisement" => "aggrandis",
"agonising" => "agon",
"anathematising" => "anathemat",
TEST_WORDS_ENGB = { "aggrandisement" => "aggrandis",
"agonising" => "agon",
"anathematising" => "anathemat",