1 require 'abstract_unit'
3 class SanitizerTest
< Test
::Unit::TestCase
5 @sanitizer = nil # used by assert_sanitizer
9 sanitizer
= HTML
::FullSanitizer.new
10 assert_equal("<<<bad html", sanitizer.sanitize("<<<bad html"))
11 assert_equal("<<", sanitizer.sanitize("<<<bad html>"))
12 assert_equal("Dont touch me", sanitizer.sanitize("Dont touch me"))
13 assert_equal("This is a test.", sanitizer.sanitize("<p>This <u>is<u> a <a href='test.html'><strong>test</strong></a>.</p>"))
14 assert_equal("Weirdos", sanitizer.sanitize("Wei<<a>a onclick='alert(document.cookie);'</a>/>rdos"))
15 assert_equal("This is a test.", sanitizer.sanitize("This is a test."))
17 %{This is a test.\n\n\nIt no longer contains any HTML.\n}, sanitizer.sanitize(
18 %{<title>This is <b>a <a href="" target="_blank">test</a></b>.</title>\n\n<!-- it has a comment -->\n\n<p>It no <b>longer <strong>contains <em>any <strike>HTML</strike></em>.</strong></b></p>\n}))
19 assert_equal "This has a here.", sanitizer.sanitize("This has a <!-- comment --> here.")
20 assert_equal "This has a here.", sanitizer.sanitize("This has a <![CDATA[<section>]]> here.")
21 assert_equal "This has an unclosed ", sanitizer.sanitize("This has an unclosed <![CDATA[<section>]] here...")
22 [nil, '', ' '].each { |blank| assert_equal blank, sanitizer.sanitize(blank) }
26 sanitizer = HTML::LinkSanitizer.new
27 assert_equal "Dont touch me", sanitizer.sanitize("Dont touch me")
28 assert_equal "on my mind\nall day long", sanitizer.sanitize("<a href='almost'>on my mind</a>\n<A href='almost'>all day long</A>")
29 assert_equal "0wn3d", sanitizer.sanitize("<a href='http://www.rubyonrails.com/'><a href='http://www.rubyonrails.com/' onlclick='steal()'>0wn3d</a></a>")
30 assert_equal "Magic", sanitizer.sanitize("<a href='http://www.rubyonrails.com/'>Mag<a href='http://www.ruby-lang.org/'>ic")
31 assert_equal "FrrFox", sanitizer.sanitize("<href onlclick='steal()'>FrrFox</a></href>")
32 assert_equal "My mind\nall <b>day</b> long", sanitizer.sanitize("<a href='almost'>My mind</a>\n<A href='almost'>all <b>day</b> long</A>")
33 assert_equal "all <b>day</b> long", sanitizer.sanitize("<<a>a href='hello'>all <b>day</b> long<</A>/a>")
35 assert_equal "<a<a", sanitizer.sanitize("<a<a")
38 def test_sanitize_form
39 assert_sanitized "<form action=\"/foo/bar\" method=\"post\"><input></form>", ''
42 def test_sanitize_plaintext
43 raw = "<plaintext><span>foo</span></plaintext>"
44 assert_sanitized raw, "<span>foo</span>"
47 def test_sanitize_script
48 assert_sanitized "a b c<script language=\"Javascript\">blah blah blah</script>d e f", "a b cd e f"
52 def test_sanitize_js_handlers
53 raw = %{onthis="do that" <a href="#" onclick="hello" name="foo" onbogus="remove me">hello</a>}
54 assert_sanitized raw, %{onthis="do that" <a name="foo" href="#">hello</a>}
57 def test_sanitize_javascript_href
58 raw = %{href="javascript:bang" <a href="javascript:bang" name="hello">foo</a>, <span href="javascript:bang">bar</span>}
59 assert_sanitized raw, %{href="javascript:bang" <a name="hello">foo</a>, <span>bar</span>}
62 def test_sanitize_image_src
63 raw = %{src="javascript:bang" <img src="javascript:bang" width="5">foo</img>, <span src="javascript:bang">bar</span>}
64 assert_sanitized raw, %{src="javascript:bang" <img width="5">foo</img>, <span>bar</span>}
67 HTML::WhiteListSanitizer.allowed_tags.each do |tag_name|
68 define_method "test_should_allow_#{tag_name}_tag" do
69 assert_sanitized "start <#{tag_name} title=\"1\" onclick=\"foo\">foo <bad>bar
</bad> baz</#{tag_name}> end", %(start <#{tag_name} title="1">foo bar baz</#{tag_name}> end)
73 def test_should_allow_anchors
74 assert_sanitized
%(<a href="foo" onclick="bar"><script>baz</script></a>), %(<a href="foo"></a>)
78 def test_allow_colons_in_path_component
79 assert_sanitized("<a href=\"./this:that\">foo</a>")
82 %w(src width height alt).each do |img_attr|
83 define_method "test_should_allow_image_#{img_attr}_attribute" do
84 assert_sanitized %(<img #{img_attr}="foo" onclick="bar" />), %(<img #{img_attr}="foo" />)
88 def test_should_handle_non_html
89 assert_sanitized 'abc'
92 def test_should_handle_blank_text
97 def test_should_allow_custom_tags
99 sanitizer = HTML::WhiteListSanitizer.new
100 assert_equal(text, sanitizer.sanitize(text, :tags => %w(u)))
103 def test_should_allow_only_custom_tags
104 text = "<u>foo</u> with <i>bar</i>"
105 sanitizer = HTML::WhiteListSanitizer.new
106 assert_equal("<u>foo</u> with bar", sanitizer.sanitize(text, :tags => %w(u)))
109 def test_should_allow_custom_tags_with_attributes
110 text = %(<blockquote cite="http://example.com/">foo</blockquote>)
111 sanitizer = HTML::WhiteListSanitizer.new
112 assert_equal(text, sanitizer.sanitize(text))
115 def test_should_allow_custom_tags_with_custom_attributes
116 text = %(<blockquote foo="bar">Lorem ipsum</blockquote>)
117 sanitizer = HTML::WhiteListSanitizer.new
118 assert_equal(text, sanitizer.sanitize(text, :attributes => ['foo']))
121 [%w(img src), %w(a href)].each do |(tag, attr)|
122 define_method "test_should_strip_#{attr}_attribute_in_#{tag}_with_bad_protocols" do
123 assert_sanitized %(<#{tag} #{attr}="javascript:bang" title="1">boo</#{tag}>), %(<#{tag} title="1">boo</#{tag}>)
127 def test_should_flag_bad_protocols
128 sanitizer = HTML::WhiteListSanitizer.new
129 %w(about chrome data disk hcp help javascript livescript lynxcgi lynxexec ms-help ms-its mhtml mocha opera res resource shell vbscript view-source vnd.ms.radio wysiwyg).each do |proto|
130 assert sanitizer.send(:contains_bad_protocols?, 'src', "#{proto}://bad")
134 def test_should_accept_good_protocols
135 sanitizer = HTML::WhiteListSanitizer.new
136 HTML::WhiteListSanitizer.allowed_protocols.each do |proto|
137 assert !sanitizer.send(:contains_bad_protocols?, 'src', "#{proto}://good")
141 def test_should_reject_hex_codes_in_protocol
142 assert_sanitized %(<a href="%6A%61%76%61%73%63%72%69%70%74%3A%61%6C%65%72%74%28%22%58%53%53%22%29">1</a>), "<a>1</a>"
143 assert @sanitizer.send(:contains_bad_protocols?, 'src', "%6A%61%76%61%73%63%72%69%70%74%3A%61%6C%65%72%74%28%22%58%53%53%22%29")
146 def test_should_block_script_tag
147 assert_sanitized %(<SCRIPT\nSRC=http://ha.ckers.org/xss.js></SCRIPT>), ""
150 [%(<IMG SRC="javascript:alert('XSS');">),
151 %(<IMG SRC=javascript:alert('XSS')>),
152 %(<IMG SRC=JaVaScRiPt:alert('XSS')>),
153 %(<IMG """><SCRIPT>alert("XSS")</SCRIPT>">),
154 %(<IMG SRC=javascript:alert("XSS")>),
155 %(<IMG SRC=javascript:alert(String.fromCharCode(88,83,83))>),
156 %(<IMG SRC=javascript:alert('XSS')>),
157 %(<IMG SRC=javascript:alert('XSS')>),
158 %(<IMG SRC=javascript:alert('XSS')>),
159 %(<IMG SRC="jav\tascript:alert('XSS');">),
160 %(<IMG SRC="jav	ascript:alert('XSS');">),
161 %(<IMG SRC="jav
ascript:alert('XSS');">),
162 %(<IMG SRC="jav
ascript:alert('XSS');">),
163 %(<IMG SRC="  javascript:alert('XSS');">),
164 %(<IMG SRC=`javascript:alert("RSnake says, 'XSS'")`>)].each_with_index do |img_hack, i|
165 define_method "test_should_not_fall_for_xss_image_hack_#{i+1}" do
166 assert_sanitized img_hack, "<img>"
170 def test_should_sanitize_tag_broken_up_by_null
171 assert_sanitized %(<SCR\0IPT>alert(\"XSS\")</SCR\0IPT>), "alert(\"XSS\")"
174 def test_should_sanitize_invalid_script_tag
175 assert_sanitized %(<SCRIPT/XSS SRC="http://ha.ckers.org/xss.js"></SCRIPT>), ""
178 def test_should_sanitize_script_tag_with_multiple_open_brackets
179 assert_sanitized %(<<SCRIPT>alert("XSS");//<</SCRIPT>), "<"
180 assert_sanitized
%(<iframe src=http://ha.ckers.org/scriptlet.html\n<a), %(<a)
183 def test_should_sanitize_unclosed_script
184 assert_sanitized %(<SCRIPT SRC=http://ha.ckers.org/xss.js?<B>), "<b>"
187 def test_should_sanitize_half_open_scripts
188 assert_sanitized %(<IMG SRC="javascript:alert('XSS')"), "<img>"
191 def test_should_not_fall_for_ridiculous_hack
192 img_hack = %(<IMG\nSRC\n=\n"\nj\na\nv\na\ns\nc\nr\ni\np\nt\n:\na\nl\ne\nr\nt\n(\n'\nX\nS\nS\n'\n)\n"\n>)
193 assert_sanitized img_hack, "<img>"
197 def test_should_sanitize_attributes
198 assert_sanitized %(<SPAN title="'><script>alert()</script>">blah</SPAN>), %(<span title="'><script>alert()</script>">blah</span>)
201 def test_should_sanitize_illegal_style_properties
202 raw = %(display:block; position:absolute; left:0; top:0; width:100%; height:100%; z-index:1; background-color:black; background-image:url(http://www.ragingplatypus.com/i/cam-full.jpg); background-x:center; background-y:center; background-repeat:repeat;)
203 expected = %(display: block; width: 100%; height: 100%; background-color: black; background-image: ; background-x: center; background-y: center;)
204 assert_equal expected, sanitize_css(raw)
207 def test_should_sanitize_with_trailing_space
208 raw = "display:block; "
209 expected = "display: block;"
210 assert_equal expected, sanitize_css(raw)
213 def test_should_sanitize_xul_style_attributes
214 raw = %(-moz-binding:url('http://ha.ckers.org/xssmoz.xml#xss'))
215 assert_equal '', sanitize_css(raw)
218 def test_should_sanitize_invalid_tag_names
219 assert_sanitized(%(a b c<script/XSS src="http://ha.ckers.org/xss.js"></script>d e f), "a b cd e f")
222 def test_should_sanitize_non_alpha_and_non_digit_characters_in_tags
223 assert_sanitized('<a onclick!#$%&()*~+-_.,:;?@[/|\]^`=alert("XSS")>foo</a>', "<a>foo</a>")
226 def test_should_sanitize_invalid_tag_names_in_single_tags
227 assert_sanitized('<img/src="http://ha.ckers.org/xss.js"/>', "<img />")
230 def test_should_sanitize_img_dynsrc_lowsrc
231 assert_sanitized(%(<img lowsrc="javascript:alert('XSS')" />), "<img />")
234 def test_should_sanitize_div_background_image_unicode_encoded
235 raw = %(background-image:\0075\0072\006C\0028'\006a\0061\0076\0061\0073\0063\0072\0069\0070\0074\003a\0061\006c\0065\0072\0074\0028.1027\0058.1053\0053\0027\0029'\0029)
236 assert_equal '', sanitize_css(raw)
239 def test_should_sanitize_div_style_expression
240 raw = %(width: expression(alert('XSS'));)
241 assert_equal '', sanitize_css(raw)
244 def test_should_sanitize_img_vbscript
245 assert_sanitized %(<img src='vbscript:msgbox("XSS")' />), '<img />'
248 def test_should_sanitize_cdata_section
249 assert_sanitized "<![CDATA[<span>section</span>]]>", "<![CDATA[<span>section</span>]]>"
252 def test_should_sanitize_unterminated_cdata_section
253 assert_sanitized "<![CDATA[<span>neverending...", "<![CDATA[<span>neverending...]]>"
256 def test_should_not_mangle_urls_with_ampersand
257 assert_sanitized %{<a href=\"http://www.domain.com?var1=1&var2=2\">my link</a>}
261 def assert_sanitized(input, expected = nil)
262 @sanitizer ||= HTML::WhiteListSanitizer.new
264 assert_dom_equal expected || input, @sanitizer.sanitize(input)
266 assert_nil @sanitizer.sanitize(input)
270 def sanitize_css(input)
271 (@sanitizer ||= HTML::WhiteListSanitizer.new).sanitize_css(input)