vendor/rails/actionpack/lib/action_controller/vendor/html-scanner/html/selector.rb

   1 #--
   2 # Copyright (c) 2006 Assaf Arkin (http://labnotes.org)
   3 # Under MIT and/or CC By license.
   4 #++
   5
   6 module HTML
   7
   8   # Selects HTML elements using CSS 2 selectors.
   9   #
  10   # The +Selector+ class uses CSS selector expressions to match and select
  11   # HTML elements.
  12   #
  13   # For example:
  14   #   selector = HTML::Selector.new "form.login[action=/login]"
  15   # creates a new selector that matches any +form+ element with the class
  16   # +login+ and an attribute +action+ with the value <tt>/login</tt>.
  17   #
  18   # === Matching Elements
  19   #
  20   # Use the #match method to determine if an element matches the selector.
  21   #
  22   # For simple selectors, the method returns an array with that element,
  23   # or +nil+ if the element does not match. For complex selectors (see below)
  24   # the method returns an array with all matched elements, of +nil+ if no
  25   # match found.
  26   #
  27   # For example:
  28   #   if selector.match(element)
  29   #     puts "Element is a login form"
  30   #   end
  31   #
  32   # === Selecting Elements
  33   #
  34   # Use the #select method to select all matching elements starting with
  35   # one element and going through all children in depth-first order.
  36   #
  37   # This method returns an array of all matching elements, an empty array
  38   # if no match is found
  39   #
  40   # For example:
  41   #   selector = HTML::Selector.new "input[type=text]"
  42   #   matches = selector.select(element)
  43   #   matches.each do |match|
  44   #     puts "Found text field with name #{match.attributes['name']}"
  45   #   end
  46   #
  47   # === Expressions
  48   #
  49   # Selectors can match elements using any of the following criteria:
  50   # * <tt>name</tt> -- Match an element based on its name (tag name).
  51   #   For example, <tt>p</tt> to match a paragraph. You can use <tt>*</tt>
  52   #   to match any element.
  53   # * <tt>#</tt><tt>id</tt> -- Match an element based on its identifier (the
  54   #   <tt>id</tt> attribute). For example, <tt>#</tt><tt>page</tt>.
  55   # * <tt>.class</tt> -- Match an element based on its class name, all
  56   #   class names if more than one specified.
  57   # * <tt>[attr]</tt> -- Match an element that has the specified attribute.
  58   # * <tt>[attr=value]</tt> -- Match an element that has the specified
  59   #   attribute and value. (More operators are supported see below)
  60   # * <tt>:pseudo-class</tt> -- Match an element based on a pseudo class,
  61   #   such as <tt>:nth-child</tt> and <tt>:empty</tt>.
  62   # * <tt>:not(expr)</tt> -- Match an element that does not match the
  63   #   negation expression.
  64   #
  65   # When using a combination of the above, the element name comes first
  66   # followed by identifier, class names, attributes, pseudo classes and
  67   # negation in any order. Do not separate these parts with spaces!
  68   # Space separation is used for descendant selectors.
  69   #
  70   # For example:
  71   #   selector = HTML::Selector.new "form.login[action=/login]"
  72   # The matched element must be of type +form+ and have the class +login+.
  73   # It may have other classes, but the class +login+ is required to match.
  74   # It must also have an attribute called +action+ with the value
  75   # <tt>/login</tt>.
  76   #
  77   # This selector will match the following element:
  78   #   <form class="login form" method="post" action="/login">
  79   # but will not match the element:
  80   #   <form method="post" action="/logout">
  81   #
  82   # === Attribute Values
  83   #
  84   # Several operators are supported for matching attributes:
  85   # * <tt>name</tt> -- The element must have an attribute with that name.
  86   # * <tt>name=value</tt> -- The element must have an attribute with that
  87   #   name and value.
  88   # * <tt>name^=value</tt> -- The attribute value must start with the
  89   #   specified value.
  90   # * <tt>name$=value</tt> -- The attribute value must end with the
  91   #   specified value.
  92   # * <tt>name*=value</tt> -- The attribute value must contain the
  93   #   specified value.
  94   # * <tt>name~=word</tt> -- The attribute value must contain the specified
  95   #   word (space separated).
  96   # * <tt>name|=word</tt> -- The attribute value must start with specified
  97   #   word.
  98   #
  99   # For example, the following two selectors match the same element:
 100   #   #my_id
 101   #   [id=my_id]
 102   # and so do the following two selectors:
 103   #   .my_class
 104   #   [class~=my_class]
 105   #
 106   # === Alternatives, siblings, children
 107   #
 108   # Complex selectors use a combination of expressions to match elements:
 109   # * <tt>expr1 expr2</tt> -- Match any element against the second expression
 110   #   if it has some parent element that matches the first expression.
 111   # * <tt>expr1 > expr2</tt> -- Match any element against the second expression
 112   #   if it is the child of an element that matches the first expression.
 113   # * <tt>expr1 + expr2</tt> -- Match any element against the second expression
 114   #   if it immediately follows an element that matches the first expression.
 115   # * <tt>expr1 ~ expr2</tt> -- Match any element against the second expression
 116   #   that comes after an element that matches the first expression.
 117   # * <tt>expr1, expr2</tt> -- Match any element against the first expression,
 118   #   or against the second expression.
 119   #
 120   # Since children and sibling selectors may match more than one element given
 121   # the first element, the #match method may return more than one match.
 122   #
 123   # === Pseudo classes
 124   #
 125   # Pseudo classes were introduced in CSS 3. They are most often used to select
 126   # elements in a given position:
 127   # * <tt>:root</tt> -- Match the element only if it is the root element
 128   #   (no parent element).
 129   # * <tt>:empty</tt> -- Match the element only if it has no child elements,
 130   #   and no text content.
 131   # * <tt>:only-child</tt> -- Match the element if it is the only child (element)
 132   #   of its parent element.
 133   # * <tt>:only-of-type</tt> -- Match the element if it is the only child (element)
 134   #   of its parent element and its type.
 135   # * <tt>:first-child</tt> -- Match the element if it is the first child (element)
 136   #   of its parent element.
 137   # * <tt>:first-of-type</tt> -- Match the element if it is the first child (element)
 138   #   of its parent element of its type.
 139   # * <tt>:last-child</tt> -- Match the element if it is the last child (element)
 140   #   of its parent element.
 141   # * <tt>:last-of-type</tt> -- Match the element if it is the last child (element)
 142   #   of its parent element of its type.
 143   # * <tt>:nth-child(b)</tt> -- Match the element if it is the b-th child (element)
 144   #   of its parent element. The value <tt>b</tt> specifies its index, starting with 1.
 145   # * <tt>:nth-child(an+b)</tt> -- Match the element if it is the b-th child (element)
 146   #   in each group of <tt>a</tt> child elements of its parent element.
 147   # * <tt>:nth-child(-an+b)</tt> -- Match the element if it is the first child (element)
 148   #   in each group of <tt>a</tt> child elements, up to the first <tt>b</tt> child
 149   #   elements of its parent element.
 150   # * <tt>:nth-child(odd)</tt> -- Match element in the odd position (i.e. first, third).
 151   #   Same as <tt>:nth-child(2n+1)</tt>.
 152   # * <tt>:nth-child(even)</tt> -- Match element in the even position (i.e. second,
 153   #   fourth). Same as <tt>:nth-child(2n+2)</tt>.
 154   # * <tt>:nth-of-type(..)</tt> -- As above, but only counts elements of its type.
 155   # * <tt>:nth-last-child(..)</tt> -- As above, but counts from the last child.
 156   # * <tt>:nth-last-of-type(..)</tt> -- As above, but counts from the last child and
 157   #   only elements of its type.
 158   # * <tt>:not(selector)</tt> -- Match the element only if the element does not
 159   #   match the simple selector.
 160   #
 161   # As you can see, <tt>:nth-child<tt> pseudo class and its variant can get quite
 162   # tricky and the CSS specification doesn't do a much better job explaining it.
 163   # But after reading the examples and trying a few combinations, it's easy to
 164   # figure out.
 165   #
 166   # For example:
 167   #   table tr:nth-child(odd)
 168   # Selects every second row in the table starting with the first one.
 169   #
 170   #   div p:nth-child(4)
 171   # Selects the fourth paragraph in the +div+, but not if the +div+ contains
 172   # other elements, since those are also counted.
 173   #
 174   #   div p:nth-of-type(4)
 175   # Selects the fourth paragraph in the +div+, counting only paragraphs, and
 176   # ignoring all other elements.
 177   #
 178   #   div p:nth-of-type(-n+4)
 179   # Selects the first four paragraphs, ignoring all others.
 180   #
 181   # And you can always select an element that matches one set of rules but
 182   # not another using <tt>:not</tt>. For example:
 183   #   p:not(.post)
 184   # Matches all paragraphs that do not have the class <tt>.post</tt>.
 185   #
 186   # === Substitution Values
 187   #
 188   # You can use substitution with identifiers, class names and element values.
 189   # A substitution takes the form of a question mark (<tt>?</tt>) and uses the
 190   # next value in the argument list following the CSS expression.
 191   #
 192   # The substitution value may be a string or a regular expression. All other
 193   # values are converted to strings.
 194   #
 195   # For example:
 196   #   selector = HTML::Selector.new "#?", /^\d+$/
 197   # matches any element whose identifier consists of one or more digits.
 198   #
 199   # See http://www.w3.org/TR/css3-selectors/
 200   class Selector
 201
 202
 203     # An invalid selector.
 204     class InvalidSelectorError < StandardError #:nodoc:
 205     end
 206
 207
 208     class << self
 209
 210       # :call-seq:
 211       #   Selector.for_class(cls) => selector
 212       #
 213       # Creates a new selector for the given class name.
 214       def for_class(cls)
 215         self.new([".?", cls])
 216       end
 217
 218
 219       # :call-seq:
 220       #   Selector.for_id(id) => selector
 221       #
 222       # Creates a new selector for the given id.
 223       def for_id(id)
 224         self.new(["#?", id])
 225       end
 226
 227     end
 228
 229
 230     # :call-seq:
 231     #   Selector.new(string, [values ...]) => selector
 232     #
 233     # Creates a new selector from a CSS 2 selector expression.
 234     #
 235     # The first argument is the selector expression. All other arguments
 236     # are used for value substitution.
 237     #
 238     # Throws InvalidSelectorError is the selector expression is invalid.
 239     def initialize(selector, *values)
 240       raise ArgumentError, "CSS expression cannot be empty" if selector.empty?
 241       @source = ""
 242       values = values[0] if values.size == 1 && values[0].is_a?(Array)
 243
 244       # We need a copy to determine if we failed to parse, and also
 245       # preserve the original pass by-ref statement.
 246       statement = selector.strip.dup
 247
 248       # Create a simple selector, along with negation.
 249       simple_selector(statement, values).each { |name, value| instance_variable_set("@#{name}", value) }
 250
 251       @alternates = []
 252       @depends = nil
 253
 254       # Alternative selector.
 255       if statement.sub!(/^\s*,\s*/, "")
 256         second = Selector.new(statement, values)
 257         @alternates << second
 258         # If there are alternate selectors, we group them in the top selector.
 259         if alternates = second.instance_variable_get(:@alternates)
 260           second.instance_variable_set(:@alternates, [])
 261           @alternates.concat alternates
 262         end
 263         @source << " , " << second.to_s
 264       # Sibling selector: create a dependency into second selector that will
 265       # match element immediately following this one.
 266       elsif statement.sub!(/^\s*\+\s*/, "")
 267         second = next_selector(statement, values)
 268         @depends = lambda do |element, first|
 269           if element = next_element(element)
 270             second.match(element, first)
 271           end
 272         end
 273         @source << " + " << second.to_s
 274       # Adjacent selector: create a dependency into second selector that will
 275       # match all elements following this one.
 276       elsif statement.sub!(/^\s*~\s*/, "")
 277         second = next_selector(statement, values)
 278         @depends = lambda do |element, first|
 279           matches = []
 280           while element = next_element(element)
 281             if subset = second.match(element, first)
 282               if first && !subset.empty?
 283                 matches << subset.first
 284                 break
 285               else
 286                 matches.concat subset
 287               end
 288             end
 289           end
 290           matches.empty? ? nil : matches
 291         end
 292         @source << " ~ " << second.to_s
 293       # Child selector: create a dependency into second selector that will
 294       # match a child element of this one.
 295       elsif statement.sub!(/^\s*>\s*/, "")
 296         second = next_selector(statement, values)
 297         @depends = lambda do |element, first|
 298           matches = []
 299           element.children.each do |child|
 300             if child.tag? && subset = second.match(child, first)
 301               if first && !subset.empty?
 302                 matches << subset.first
 303                 break
 304               else
 305                 matches.concat subset
 306               end
 307             end
 308           end
 309           matches.empty? ? nil : matches
 310         end
 311         @source << " > " << second.to_s
 312       # Descendant selector: create a dependency into second selector that
 313       # will match all descendant elements of this one. Note,
 314       elsif statement =~ /^\s+\S+/ && statement != selector
 315         second = next_selector(statement, values)
 316         @depends = lambda do |element, first|
 317           matches = []
 318           stack = element.children.reverse
 319           while node = stack.pop
 320             next unless node.tag?
 321             if subset = second.match(node, first)
 322               if first && !subset.empty?
 323                 matches << subset.first
 324                 break
 325               else
 326                 matches.concat subset
 327               end
 328             elsif children = node.children
 329               stack.concat children.reverse
 330             end
 331           end
 332           matches.empty? ? nil : matches
 333         end
 334         @source << " " << second.to_s
 335       else
 336         # The last selector is where we check that we parsed
 337         # all the parts.
 338         unless statement.empty? || statement.strip.empty?
 339           raise ArgumentError, "Invalid selector: #{statement}"
 340         end
 341       end
 342     end
 343
 344
 345     # :call-seq:
 346     #   match(element, first?) => array or nil
 347     #
 348     # Matches an element against the selector.
 349     #
 350     # For a simple selector this method returns an array with the
 351     # element if the element matches, nil otherwise.
 352     #
 353     # For a complex selector (sibling and descendant) this method
 354     # returns an array with all matching elements, nil if no match is
 355     # found.
 356     #
 357     # Use +first_only=true+ if you are only interested in the first element.
 358     #
 359     # For example:
 360     #   if selector.match(element)
 361     #     puts "Element is a login form"
 362     #   end
 363     def match(element, first_only = false)
 364       # Match element if no element name or element name same as element name
 365       if matched = (!@tag_name || @tag_name == element.name)
 366         # No match if one of the attribute matches failed
 367         for attr in @attributes
 368           if element.attributes[attr[0]] !~ attr[1]
 369             matched = false
 370             break
 371           end
 372         end
 373       end
 374
 375       # Pseudo class matches (nth-child, empty, etc).
 376       if matched
 377         for pseudo in @pseudo
 378           unless pseudo.call(element)
 379             matched = false
 380             break
 381           end
 382         end
 383       end
 384
 385       # Negation. Same rules as above, but we fail if a match is made.
 386       if matched && @negation
 387         for negation in @negation
 388           if negation[:tag_name] == element.name
 389             matched = false
 390           else
 391             for attr in negation[:attributes]
 392               if element.attributes[attr[0]] =~ attr[1]
 393                 matched = false
 394                 break
 395               end
 396             end
 397           end
 398           if matched
 399             for pseudo in negation[:pseudo]
 400               if pseudo.call(element)
 401                 matched = false
 402                 break
 403               end
 404             end
 405           end
 406           break unless matched
 407         end
 408       end
 409
 410       # If element matched but depends on another element (child,
 411       # sibling, etc), apply the dependent matches instead.
 412       if matched && @depends
 413         matches = @depends.call(element, first_only)
 414       else
 415         matches = matched ? [element] : nil
 416       end
 417
 418       # If this selector is part of the group, try all the alternative
 419       # selectors (unless first_only).
 420       if !first_only || !matches
 421         @alternates.each do |alternate|
 422           break if matches && first_only
 423           if subset = alternate.match(element, first_only)
 424             if matches
 425               matches.concat subset
 426             else
 427               matches = subset
 428             end
 429           end
 430         end
 431       end
 432
 433       matches
 434     end
 435
 436
 437     # :call-seq:
 438     #   select(root) => array
 439     #
 440     # Selects and returns an array with all matching elements, beginning
 441     # with one node and traversing through all children depth-first.
 442     # Returns an empty array if no match is found.
 443     #
 444     # The root node may be any element in the document, or the document
 445     # itself.
 446     #
 447     # For example:
 448     #   selector = HTML::Selector.new "input[type=text]"
 449     #   matches = selector.select(element)
 450     #   matches.each do |match|
 451     #     puts "Found text field with name #{match.attributes['name']}"
 452     #   end
 453     def select(root)
 454       matches = []
 455       stack = [root]
 456       while node = stack.pop
 457         if node.tag? && subset = match(node, false)
 458           subset.each do |match|
 459             matches << match unless matches.any? { |item| item.equal?(match) }
 460           end
 461         elsif children = node.children
 462           stack.concat children.reverse
 463         end
 464       end
 465       matches
 466     end
 467
 468
 469     # Similar to #select but returns the first matching element. Returns +nil+
 470     # if no element matches the selector.
 471     def select_first(root)
 472       stack = [root]
 473       while node = stack.pop
 474         if node.tag? && subset = match(node, true)
 475           return subset.first if !subset.empty?
 476         elsif children = node.children
 477           stack.concat children.reverse
 478         end
 479       end
 480       nil
 481     end
 482
 483
 484     def to_s #:nodoc:
 485       @source
 486     end
 487
 488
 489     # Return the next element after this one. Skips sibling text nodes.
 490     #
 491     # With the +name+ argument, returns the next element with that name,
 492     # skipping other sibling elements.
 493     def next_element(element, name = nil)
 494       if siblings = element.parent.children
 495         found = false
 496         siblings.each do |node|
 497           if node.equal?(element)
 498             found = true
 499           elsif found && node.tag?
 500             return node if (name.nil? || node.name == name)
 501           end
 502         end
 503       end
 504       nil
 505     end
 506
 507
 508   protected
 509
 510
 511     # Creates a simple selector given the statement and array of
 512     # substitution values.
 513     #
 514     # Returns a hash with the values +tag_name+, +attributes+,
 515     # +pseudo+ (classes) and +negation+.
 516     #
 517     # Called the first time with +can_negate+ true to allow
 518     # negation. Called a second time with false since negation
 519     # cannot be negated.
 520     def simple_selector(statement, values, can_negate = true)
 521       tag_name = nil
 522       attributes = []
 523       pseudo = []
 524       negation = []
 525
 526       # Element name. (Note that in negation, this can come at
 527       # any order, but for simplicity we allow if only first).
 528       statement.sub!(/^(\*|[[:alpha:]][\w\-]*)/) do |match|
 529         match.strip!
 530         tag_name = match.downcase unless match == "*"
 531         @source << match
 532         "" # Remove
 533       end
 534
 535       # Get identifier, class, attribute name, pseudo or negation.
 536       while true
 537         # Element identifier.
 538         next if statement.sub!(/^#(\?|[\w\-]+)/) do |match|
 539           id = $1
 540           if id == "?"
 541             id = values.shift
 542           end
 543           @source << "##{id}"
 544           id = Regexp.new("^#{Regexp.escape(id.to_s)}$") unless id.is_a?(Regexp)
 545           attributes << ["id", id]
 546           "" # Remove
 547         end
 548
 549         # Class name.
 550         next if statement.sub!(/^\.([\w\-]+)/) do |match|
 551           class_name = $1
 552           @source << ".#{class_name}"
 553           class_name = Regexp.new("(^|\s)#{Regexp.escape(class_name)}($|\s)") unless class_name.is_a?(Regexp)
 554           attributes << ["class", class_name]
 555           "" # Remove
 556         end
 557
 558         # Attribute value.
 559         next if statement.sub!(/^\[\s*([[:alpha:]][\w\-]*)\s*((?:[~|^$*])?=)?\s*('[^']*'|"[^*]"|[^\]]*)\s*\]/) do |match|
 560           name, equality, value = $1, $2, $3
 561           if value == "?"
 562             value = values.shift
 563           else
 564             # Handle single and double quotes.
 565             value.strip!
 566             if (value[0] == ?" || value[0] == ?') && value[0] == value[-1]
 567               value = value[1..-2]
 568             end
 569           end
 570           @source << "[#{name}#{equality}'#{value}']"
 571           attributes << [name.downcase.strip, attribute_match(equality, value)]
 572           "" # Remove
 573         end
 574
 575         # Root element only.
 576         next if statement.sub!(/^:root/) do |match|
 577           pseudo << lambda do |element|
 578             element.parent.nil? || !element.parent.tag?
 579           end
 580           @source << ":root"
 581           "" # Remove
 582         end
 583
 584         # Nth-child including last and of-type.
 585         next if statement.sub!(/^:nth-(last-)?(child|of-type)\((odd|even|(\d+|\?)|(-?\d*|\?)?n([+\-]\d+|\?)?)\)/) do |match|
 586           reverse = $1 == "last-"
 587           of_type = $2 == "of-type"
 588           @source << ":nth-#{$1}#{$2}("
 589           case $3
 590             when "odd"
 591               pseudo << nth_child(2, 1, of_type, reverse)
 592               @source << "odd)"
 593             when "even"
 594               pseudo << nth_child(2, 2, of_type, reverse)
 595               @source << "even)"
 596             when /^(\d+|\?)$/  # b only
 597               b = ($1 == "?" ? values.shift : $1).to_i
 598               pseudo << nth_child(0, b, of_type, reverse)
 599               @source << "#{b})"
 600             when /^(-?\d*|\?)?n([+\-]\d+|\?)?$/
 601               a = ($1 == "?" ? values.shift :
 602                    $1 == "" ? 1 : $1 == "-" ? -1 : $1).to_i
 603               b = ($2 == "?" ? values.shift : $2).to_i
 604               pseudo << nth_child(a, b, of_type, reverse)
 605               @source << (b >= 0 ? "#{a}n+#{b})" : "#{a}n#{b})")
 606             else
 607               raise ArgumentError, "Invalid nth-child #{match}"
 608           end
 609           "" # Remove
 610         end
 611         # First/last child (of type).
 612         next if statement.sub!(/^:(first|last)-(child|of-type)/) do |match|
 613           reverse = $1 == "last"
 614           of_type = $2 == "of-type"
 615           pseudo << nth_child(0, 1, of_type, reverse)
 616           @source << ":#{$1}-#{$2}"
 617           "" # Remove
 618         end
 619         # Only child (of type).
 620         next if statement.sub!(/^:only-(child|of-type)/) do |match|
 621           of_type = $1 == "of-type"
 622           pseudo << only_child(of_type)
 623           @source << ":only-#{$1}"
 624           "" # Remove
 625         end
 626
 627         # Empty: no child elements or meaningful content (whitespaces
 628         # are ignored).
 629         next if statement.sub!(/^:empty/) do |match|
 630           pseudo << lambda do |element|
 631             empty = true
 632             for child in element.children
 633               if child.tag? || !child.content.strip.empty?
 634                 empty = false
 635                 break
 636               end
 637             end
 638             empty
 639           end
 640           @source << ":empty"
 641           "" # Remove
 642         end
 643         # Content: match the text content of the element, stripping
 644         # leading and trailing spaces.
 645         next if statement.sub!(/^:content\(\s*(\?|'[^']*'|"[^"]*"|[^)]*)\s*\)/) do |match|
 646           content = $1
 647           if content == "?"
 648             content = values.shift
 649           elsif (content[0] == ?" || content[0] == ?') && content[0] == content[-1]
 650             content = content[1..-2]
 651           end
 652           @source << ":content('#{content}')"
 653           content = Regexp.new("^#{Regexp.escape(content.to_s)}$") unless content.is_a?(Regexp)
 654           pseudo << lambda do |element|
 655             text = ""
 656             for child in element.children
 657               unless child.tag?
 658                 text << child.content
 659               end
 660             end
 661             text.strip =~ content
 662           end
 663           "" # Remove
 664         end
 665
 666         # Negation. Create another simple selector to handle it.
 667         if statement.sub!(/^:not\(\s*/, "")
 668           raise ArgumentError, "Double negatives are not missing feature" unless can_negate
 669           @source << ":not("
 670           negation << simple_selector(statement, values, false)
 671           raise ArgumentError, "Negation not closed" unless statement.sub!(/^\s*\)/, "")
 672           @source << ")"
 673           next
 674         end
 675
 676         # No match: moving on.
 677         break
 678       end
 679
 680       # Return hash. The keys are mapped to instance variables.
 681       {:tag_name=>tag_name, :attributes=>attributes, :pseudo=>pseudo, :negation=>negation}
 682     end
 683
 684
 685     # Create a regular expression to match an attribute value based
 686     # on the equality operator (=, ^=, |=, etc).
 687     def attribute_match(equality, value)
 688       regexp = value.is_a?(Regexp) ? value : Regexp.escape(value.to_s)
 689       case equality
 690         when "=" then
 691           # Match the attribute value in full
 692           Regexp.new("^#{regexp}$")
 693         when "~=" then
 694           # Match a space-separated word within the attribute value
 695           Regexp.new("(^|\s)#{regexp}($|\s)")
 696         when "^="
 697           # Match the beginning of the attribute value
 698           Regexp.new("^#{regexp}")
 699         when "$="
 700           # Match the end of the attribute value
 701           Regexp.new("#{regexp}$")
 702         when "*="
 703           # Match substring of the attribute value
 704           regexp.is_a?(Regexp) ? regexp : Regexp.new(regexp)
 705         when "|=" then
 706           # Match the first space-separated item of the attribute value
 707           Regexp.new("^#{regexp}($|\s)")
 708         else
 709           raise InvalidSelectorError, "Invalid operation/value" unless value.empty?
 710           # Match all attributes values (existence check)
 711           //
 712       end
 713     end
 714
 715
 716     # Returns a lambda that can match an element against the nth-child
 717     # pseudo class, given the following arguments:
 718     # * +a+ -- Value of a part.
 719     # * +b+ -- Value of b part.
 720     # * +of_type+ -- True to test only elements of this type (of-type).
 721     # * +reverse+ -- True to count in reverse order (last-).
 722     def nth_child(a, b, of_type, reverse)
 723       # a = 0 means select at index b, if b = 0 nothing selected
 724       return lambda { |element| false } if a == 0 && b == 0
 725       # a < 0 and b < 0 will never match against an index
 726       return lambda { |element| false } if a < 0 && b < 0
 727       b = a + b + 1 if b < 0   # b < 0 just picks last element from each group
 728       b -= 1 unless b == 0  # b == 0 is same as b == 1, otherwise zero based
 729       lambda do |element|
 730         # Element must be inside parent element.
 731         return false unless element.parent && element.parent.tag?
 732         index = 0
 733         # Get siblings, reverse if counting from last.
 734         siblings = element.parent.children
 735         siblings = siblings.reverse if reverse
 736         # Match element name if of-type, otherwise ignore name.
 737         name = of_type ? element.name : nil
 738         found = false
 739         for child in siblings
 740           # Skip text nodes/comments.
 741           if child.tag? && (name == nil || child.name == name)
 742             if a == 0
 743               # Shortcut when a == 0 no need to go past count
 744               if index == b
 745                 found = child.equal?(element)
 746                 break
 747               end
 748             elsif a < 0
 749               # Only look for first b elements
 750               break if index > b
 751               if child.equal?(element)
 752                 found = (index % a) == 0
 753                 break
 754               end
 755             else
 756               # Otherwise, break if child found and count ==  an+b
 757               if child.equal?(element)
 758                 found = (index % a) == b
 759                 break
 760               end
 761             end
 762             index += 1
 763           end
 764         end
 765         found
 766       end
 767     end
 768
 769
 770     # Creates a only child lambda. Pass +of-type+ to only look at
 771     # elements of its type.
 772     def only_child(of_type)
 773       lambda do |element|
 774         # Element must be inside parent element.
 775         return false unless element.parent && element.parent.tag?
 776         name = of_type ? element.name : nil
 777         other = false
 778         for child in element.parent.children
 779           # Skip text nodes/comments.
 780           if child.tag? && (name == nil || child.name == name)
 781             unless child.equal?(element)
 782               other = true
 783               break
 784             end
 785           end
 786         end
 787         !other
 788       end
 789     end
 790
 791
 792     # Called to create a dependent selector (sibling, descendant, etc).
 793     # Passes the remainder of the statement that will be reduced to zero
 794     # eventually, and array of substitution values.
 795     #
 796     # This method is called from four places, so it helps to put it here
 797     # for reuse. The only logic deals with the need to detect comma
 798     # separators (alternate) and apply them to the selector group of the
 799     # top selector.
 800     def next_selector(statement, values)
 801       second = Selector.new(statement, values)
 802       # If there are alternate selectors, we group them in the top selector.
 803       if alternates = second.instance_variable_get(:@alternates)
 804         second.instance_variable_set(:@alternates, [])
 805         @alternates.concat alternates
 806       end
 807       second
 808     end
 809
 810   end
 811
 812
 813   # See HTML::Selector.new
 814   def self.selector(statement, *values)
 815     Selector.new(statement, *values)
 816   end
 817
 818
 819   class Tag
 820
 821     def select(selector, *values)
 822       selector = HTML::Selector.new(selector, values)
 823       selector.select(self)
 824     end
 825
 826   end
 827
 828 end