X-Git-Url: https://git.njae.me.uk/?a=blobdiff_plain;f=vendor%2Frails%2Factionpack%2Flib%2Faction_controller%2Fvendor%2Fhtml-scanner%2Fhtml%2Fsanitizer.rb;fp=vendor%2Frails%2Factionpack%2Flib%2Faction_controller%2Fvendor%2Fhtml-scanner%2Fhtml%2Fsanitizer.rb;h=ae20f9947c52f02080352c40e9dd21baf46abe5c;hb=d115f2e23823271635bad69229a42cd8ac68debe;hp=0000000000000000000000000000000000000000;hpb=37cb670bf3ddde90b214e591f100ed4446469484;p=depot.git diff --git a/vendor/rails/actionpack/lib/action_controller/vendor/html-scanner/html/sanitizer.rb b/vendor/rails/actionpack/lib/action_controller/vendor/html-scanner/html/sanitizer.rb new file mode 100644 index 0000000..ae20f99 --- /dev/null +++ b/vendor/rails/actionpack/lib/action_controller/vendor/html-scanner/html/sanitizer.rb @@ -0,0 +1,173 @@ +module HTML + class Sanitizer + def sanitize(text, options = {}) + return text unless sanitizeable?(text) + tokenize(text, options).join + end + + def sanitizeable?(text) + !(text.nil? || text.empty? || !text.index("<")) + end + + protected + def tokenize(text, options) + tokenizer = HTML::Tokenizer.new(text) + result = [] + while token = tokenizer.next + node = Node.parse(nil, 0, 0, token, false) + process_node node, result, options + end + result + end + + def process_node(node, result, options) + result << node.to_s + end + end + + class FullSanitizer < Sanitizer + def sanitize(text, options = {}) + result = super + # strip any comments, and if they have a newline at the end (ie. line with + # only a comment) strip that too + result.gsub!(/[\n]?/m, "") if result + # Recurse - handle all dirty nested tags + result == text ? result : sanitize(result, options) + end + + def process_node(node, result, options) + result << node.to_s if node.class == HTML::Text + end + end + + class LinkSanitizer < FullSanitizer + cattr_accessor :included_tags, :instance_writer => false + self.included_tags = Set.new(%w(a href)) + + def sanitizeable?(text) + !(text.nil? || text.empty? || !((text.index(""))) + end + + protected + def process_node(node, result, options) + result << node.to_s unless node.is_a?(HTML::Tag) && included_tags.include?(node.name) + end + end + + class WhiteListSanitizer < Sanitizer + [:protocol_separator, :uri_attributes, :allowed_attributes, :allowed_tags, :allowed_protocols, :bad_tags, + :allowed_css_properties, :allowed_css_keywords, :shorthand_css_properties].each do |attr| + class_inheritable_accessor attr, :instance_writer => false + end + + # A regular expression of the valid characters used to separate protocols like + # the ':' in 'http://foo.com' + self.protocol_separator = /:|(�*58)|(p)|(%|%)3A/ + + # Specifies a Set of HTML attributes that can have URIs. + self.uri_attributes = Set.new(%w(href src cite action longdesc xlink:href lowsrc)) + + # Specifies a Set of 'bad' tags that the #sanitize helper will remove completely, as opposed + # to just escaping harmless tags like <font> + self.bad_tags = Set.new(%w(script)) + + # Specifies the default Set of tags that the #sanitize helper will allow unscathed. + self.allowed_tags = Set.new(%w(strong em b i p code pre tt samp kbd var sub + sup dfn cite big small address hr br div span h1 h2 h3 h4 h5 h6 ul ol li dt dd abbr + acronym a img blockquote del ins)) + + # Specifies the default Set of html attributes that the #sanitize helper will leave + # in the allowed tag. + self.allowed_attributes = Set.new(%w(href src width height alt cite datetime title class name xml:lang abbr)) + + # Specifies the default Set of acceptable css properties that #sanitize and #sanitize_css will accept. + self.allowed_protocols = Set.new(%w(ed2k ftp http https irc mailto news gopher nntp telnet webcal xmpp callto + feed svn urn aim rsync tag ssh sftp rtsp afs)) + + # Specifies the default Set of acceptable css keywords that #sanitize and #sanitize_css will accept. + self.allowed_css_properties = Set.new(%w(azimuth background-color border-bottom-color border-collapse + border-color border-left-color border-right-color border-top-color clear color cursor direction display + elevation float font font-family font-size font-style font-variant font-weight height letter-spacing line-height + overflow pause pause-after pause-before pitch pitch-range richness speak speak-header speak-numeral speak-punctuation + speech-rate stress text-align text-decoration text-indent unicode-bidi vertical-align voice-family volume white-space + width)) + + # Specifies the default Set of acceptable css keywords that #sanitize and #sanitize_css will accept. + self.allowed_css_keywords = Set.new(%w(auto aqua black block blue bold both bottom brown center + collapse dashed dotted fuchsia gray green !important italic left lime maroon medium none navy normal + nowrap olive pointer purple red right solid silver teal top transparent underline white yellow)) + + # Specifies the default Set of allowed shorthand css properties for the #sanitize and #sanitize_css helpers. + self.shorthand_css_properties = Set.new(%w(background border margin padding)) + + # Sanitizes a block of css code. Used by #sanitize when it comes across a style attribute + def sanitize_css(style) + # disallow urls + style = style.to_s.gsub(/url\s*\(\s*[^\s)]+?\s*\)\s*/, ' ') + + # gauntlet + if style !~ /^([:,;#%.\sa-zA-Z0-9!]|\w-\w|\'[\s\w]+\'|\"[\s\w]+\"|\([\d,\s]+\))*$/ || + style !~ /^(\s*[-\w]+\s*:\s*[^:;]*(;|$)\s*)*$/ + return '' + end + + clean = [] + style.scan(/([-\w]+)\s*:\s*([^:;]*)/) do |prop,val| + if allowed_css_properties.include?(prop.downcase) + clean << prop + ': ' + val + ';' + elsif shorthand_css_properties.include?(prop.split('-')[0].downcase) + unless val.split().any? do |keyword| + !allowed_css_keywords.include?(keyword) && + keyword !~ /^(#[0-9a-f]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$/ + end + clean << prop + ': ' + val + ';' + end + end + end + clean.join(' ') + end + + protected + def tokenize(text, options) + options[:parent] = [] + options[:attributes] ||= allowed_attributes + options[:tags] ||= allowed_tags + super + end + + def process_node(node, result, options) + result << case node + when HTML::Tag + if node.closing == :close + options[:parent].shift + else + options[:parent].unshift node.name + end + + process_attributes_for node, options + + options[:tags].include?(node.name) ? node : nil + else + bad_tags.include?(options[:parent].first) ? nil : node.to_s.gsub(/