File: rexml/element.rb

Overview
Module Structure
Class Hierarchy
Code

Overview

Module Structure

  module: <Toplevel Module>
  module: REXML#8
  class: Element#20
includes
  Namespace ( REXML )
inherits from
  Parent ( REXML )
has properties
constant: UNDEFINED #23
attribute: attributes [R] #27
attribute: elements [R] #27
attribute: context [RW] #30
method: initialize / 3 #58
method: inspect #76
method: clone #96
method: root_node #119
method: root #123
method: document #131
method: whitespace #145
method: ignore_whitespace_nodes #161
method: raw #178
method: prefixes #195
method: namespaces #202
method: namespace / 1 #222
method: add_namespace / 2 #250
method: delete_namespace #270
method: add_element #295
method: delete_element #315
method: has_elements? #324
method: each_element_with_attribute / 5 #350
method: each_element_with_text / 4 #381
method: each_element / 2 #392
method: get_elements / 1 #400
method: next_element #409
method: previous_element #420
method: has_text? #432
method: text / 1 #450
method: get_text #461
method: text= / 1 #492
method: add_text / 1 #522
method: node_type #534
method: xpath #538
method: attribute / 2 #553
method: has_attributes? #562
method: add_attribute / 2 #583
method: add_attributes #595
method: delete_attribute / 1 #616
method: cdatas #627
method: comments #633
method: instructions #639
method: texts #645
method: write / 4 #674
method: __to_xpath_helper #690
method: each_with_something / 3 #705
  class: Elements#723
includes
  Enumerable ( Builtin-Module )
inherits from
  Object ( Builtin-Module )
has properties
method: initialize #727
method: [] / 2 #752
method: []= / 2 #785
method: empty? #796
method: index #803
method: delete #825
method: delete_all / 1 #840
method: add #861
alias: << add #874
method: each / 2 #890
method: collect / 2 #894
method: inject / 3 #902
method: size #921
method: to_a / 1 #934
method: literalize #942
  class: Attributes#954
inherits from
  Hash ( Builtin-Module )
has properties
method: initialize #957
method: [] / 1 #972
method: to_a #978
method: length #985
alias: size length #990
method: each_attribute #999
method: each #1014
method: get_attribute / 1 #1029
method: []= / 2 #1074
method: prefixes #1119
method: namespaces #1135
method: delete / 1 #1161
method: add / 1 #1193
alias: << add #1197
method: delete_all / 1 #1203
method: get_attribute_ns / 2 #1217

Class Hierarchy

Object ( Builtin-Module )
Hash ( Builtin-Module )
  Attributes ( REXML ) #954
Child ( REXML )
Parent ( REXML )
  Element    #20
Elements ( REXML ) — #723

Code

   1  require "rexml/parent"
   2  require "rexml/namespace"
   3  require "rexml/attribute"
   4  require "rexml/cdata"
   5  require "rexml/xpath"
   6  require "rexml/parseexception"
   7 
   8  module REXML
   9    # An implementation note about namespaces:
  10    # As we parse, when we find namespaces we put them in a hash and assign
  11    # them a unique ID.  We then convert the namespace prefix for the node
  12    # to the unique ID.  This makes namespace lookup much faster for the
  13    # cost of extra memory use.  We save the namespace prefix for the
  14    # context node and convert it back when we write it.
  15    @@namespaces = {}
  16 
  17    # Represents a tagged XML element.  Elements are characterized by
  18    # having children, attributes, and names, and can themselves be
  19    # children.
  20    class Element < Parent
  21      include Namespace
  22 
  23      UNDEFINED = "UNDEFINED";    # The default name
  24 
  25      # Mechanisms for accessing attributes and child elements of this
  26      # element.
  27      attr_reader :attributes, :elements
  28      # The context holds information about the processing environment, such as
  29      # whitespace handling.
  30      attr_accessor :context
  31 
  32      # Constructor
  33      # arg:: 
  34      #   if not supplied, will be set to the default value.
  35      #   If a String, the name of this object will be set to the argument.
  36      #   If an Element, the object will be shallowly cloned; name, 
  37      #   attributes, and namespaces will be copied.  Children will +not+ be
  38      #   copied.
  39      # parent:: 
  40      #   if supplied, must be a Parent, and will be used as
  41      #   the parent of this object.
  42      # context::
  43      #   If supplied, must be a hash containing context items.  Context items
  44      #   include:
  45      # * <tt>:respect_whitespace</tt> the value of this is :+all+ or an array of
  46      #   strings being the names of the elements to respect
  47      #   whitespace for.  Defaults to :+all+.
  48      # * <tt>:compress_whitespace</tt> the value can be :+all+ or an array of
  49      #   strings being the names of the elements to ignore whitespace on.
  50      #   Overrides :+respect_whitespace+.
  51      # * <tt>:ignore_whitespace_nodes</tt> the value can be :+all+ or an array
  52      #   of strings being the names of the elements in which to ignore
  53      #   whitespace-only nodes.  If this is set, Text nodes which contain only
  54      #   whitespace will not be added to the document tree.
  55      # * <tt>:raw</tt> can be :+all+, or an array of strings being the names of
  56      #   the elements to process in raw mode.  In raw mode, special
  57      #   characters in text is not converted to or from entities.
  58      def initialize( arg = UNDEFINED, parent=nil, context=nil )
  59        super(parent)
  60 
  61        @elements = Elements.new(self)
  62        @attributes = Attributes.new(self)
  63        @context = context
  64 
  65        if arg.kind_of? String
  66          self.name = arg
  67        elsif arg.kind_of? Element
  68          self.name = arg.expanded_name
  69          arg.attributes.each_attribute{ |attribute|
  70            @attributes << Attribute.new( attribute )
  71          }
  72          @context = arg.context
  73        end
  74      end
  75 
  76      def inspect
  77        rv = "<#@expanded_name"
  78 
  79        @attributes.each_attribute do |attr|
  80          rv << " "
  81          attr.write( rv, 0 )
  82        end
  83 
  84        if children.size > 0
  85          rv << "> ... </>"
  86        else
  87          rv << "/>"
  88        end
  89      end
  90 
  91 
  92      # Creates a shallow copy of self.
  93      #   d = Document.new "<a><b/><b/><c><d/></c></a>"
  94      #   new_a = d.root.clone
  95      #   puts new_a  # => "<a/>"
  96      def clone
  97        self.class.new self
  98      end
  99 
 100      # Evaluates to the root node of the document that this element 
 101      # belongs to. If this element doesn't belong to a document, but does
 102      # belong to another Element, the parent's root will be returned, until the
 103      # earliest ancestor is found.
 104      #
 105      # Note that this is not the same as the document element.
 106      # In the following example, <a> is the document element, and the root
 107      # node is the parent node of the document element.  You may ask yourself
 108      # why the root node is useful: consider the doctype and XML declaration,
 109      # and any processing instructions before the document element... they
 110      # are children of the root node, or siblings of the document element.
 111      # The only time this isn't true is when an Element is created that is
 112      # not part of any Document.  In this case, the ancestor that has no
 113      # parent acts as the root node.
 114      #  d = Document.new '<a><b><c/></b></a>'
 115      #  a = d[1] ; c = a[1][1]
 116      #  d.root_node == d   # TRUE
 117      #  a.root_node        # namely, d
 118      #  c.root_node        # again, d
 119      def root_node
 120        parent.nil? ? self : parent.root_node
 121      end
 122 
 123      def root
 124        return elements[1] if self.kind_of? Document
 125        return self if parent.kind_of? Document or parent.nil?
 126        return parent.root
 127      end
 128 
 129      # Evaluates to the document to which this element belongs, or nil if this
 130      # element doesn't belong to a document.
 131      def document
 132        rt = root
 133        rt.parent if rt
 134      end
 135 
 136      # Evaluates to +true+ if whitespace is respected for this element.  This
 137      # is the case if:
 138      # 1. Neither :+respect_whitespace+ nor :+compress_whitespace+ has any value
 139      # 2. The context has :+respect_whitespace+ set to :+all+ or
 140      #    an array containing the name of this element, and 
 141      #    :+compress_whitespace+ isn't set to :+all+ or an array containing the 
 142      #    name of this element.
 143      # The evaluation is tested against +expanded_name+, and so is namespace
 144      # sensitive.
 145      def whitespace
 146        @whitespace = nil
 147        if @context
 148          if @context[:respect_whitespace]
 149            @whitespace = (@context[:respect_whitespace] == :all or
 150                           @context[:respect_whitespace].include? expanded_name)
 151          end
 152          @whitespace = false if (@context[:compress_whitespace] and
 153                                  (@context[:compress_whitespace] == :all or
 154                                   @context[:compress_whitespace].include? expanded_name)
 155                                 )
 156        end
 157        @whitespace = true unless @whitespace == false
 158        @whitespace
 159      end
 160 
 161      def ignore_whitespace_nodes
 162        @ignore_whitespace_nodes = false
 163        if @context
 164          if @context[:ignore_whitespace_nodes]
 165            @ignore_whitespace_nodes = 
 166              (@context[:ignore_whitespace_nodes] == :all or
 167               @context[:ignore_whitespace_nodes].include? expanded_name)
 168          end
 169        end
 170      end
 171 
 172      # Evaluates to +true+ if raw mode is set for this element.  This
 173      # is the case if the context has :+raw+ set to :+all+ or
 174      # an array containing the name of this element.
 175      #
 176      # The evaluation is tested against +expanded_name+, and so is namespace
 177      # sensitive.
 178      def raw
 179        @raw = (@context and @context[:raw] and
 180                (@context[:raw] == :all or
 181                 @context[:raw].include? expanded_name))
 182                 @raw
 183      end
 184 
 185      #once :whitespace, :raw, :ignore_whitespace_nodes
 186 
 187      #################################################
 188      # Namespaces                                    #
 189      #################################################
 190 
 191      # Evaluates to an +Array+ containing the prefixes (names) of all defined
 192      # namespaces at this context node.
 193      #  doc = Document.new("<a xmlns:x='1' xmlns:y='2'><b/><c xmlns:z='3'/></a>")
 194      #  doc.elements['//b'].prefixes # -> ['x', 'y']
 195      def prefixes
 196        prefixes = []
 197        prefixes = parent.prefixes if parent
 198        prefixes |= attributes.prefixes
 199        return prefixes
 200      end
 201 
 202      def namespaces
 203        namespaces = {}
 204        namespaces = parent.namespaces if parent
 205        namespaces = namespaces.merge( attributes.namespaces )
 206        return namespaces
 207      end
 208 
 209      # Evalutas to the URI for a prefix, or the empty string if no such 
 210      # namespace is declared for this element. Evaluates recursively for
 211      # ancestors.  Returns the default namespace, if there is one.
 212      # prefix:: 
 213      #   the prefix to search for.  If not supplied, returns the default
 214      #   namespace if one exists
 215      # Returns:: 
 216      #   the namespace URI as a String, or nil if no such namespace
 217      #   exists.  If the namespace is undefined, returns an empty string
 218      #  doc = Document.new("<a xmlns='1' xmlns:y='2'><b/><c xmlns:z='3'/></a>")
 219      #  b = doc.elements['//b']
 220      #  b.namespace           # -> '1'
 221      #  b.namespace("y")      # -> '2'
 222      def namespace(prefix=nil)
 223        if prefix.nil?
 224          prefix = prefix()
 225        end
 226        if prefix == ''
 227          prefix = "xmlns"
 228        else
 229          prefix = "xmlns:#{prefix}" unless prefix[0,5] == 'xmlns'
 230        end
 231        ns = attributes[ prefix ]
 232        ns = parent.namespace(prefix) if ns.nil? and parent
 233        ns = '' if ns.nil? and prefix == 'xmlns'
 234        return ns
 235      end
 236 
 237      # Adds a namespace to this element.
 238      # prefix:: 
 239      #   the prefix string, or the namespace URI if +uri+ is not
 240      #   supplied
 241      # uri::    
 242      #   the namespace URI.  May be nil, in which +prefix+ is used as
 243      #   the URI
 244      # Evaluates to: this Element
 245      #  a = Element.new("a")
 246      #  a.add_namespace("xmlns:foo", "bar" )
 247      #  a.add_namespace("foo", "bar")  # shorthand for previous line
 248      #  a.add_namespace("twiddle")
 249      #  puts a   #-> <a xmlns:foo='bar' xmlns='twiddle'/>
 250      def add_namespace( prefix, uri=nil )
 251        unless uri
 252          @attributes["xmlns"] = prefix
 253        else
 254          prefix = "xmlns:#{prefix}" unless prefix =~ /^xmlns:/
 255          @attributes[ prefix ] = uri
 256        end
 257        self
 258      end
 259 
 260      # Removes a namespace from this node.  This only works if the namespace is
 261      # actually declared in this node.  If no argument is passed, deletes the
 262      # default namespace.
 263      #
 264      # Evaluates to: this element
 265      #  doc = Document.new "<a xmlns:foo='bar' xmlns='twiddle'/>"
 266      #  doc.root.delete_namespace
 267      #  puts doc     # -> <a xmlns:foo='bar'/>
 268      #  doc.root.delete_namespace 'foo'
 269      #  puts doc     # -> <a/>
 270      def delete_namespace namespace="xmlns"
 271        namespace = "xmlns:#{namespace}" unless namespace == 'xmlns'
 272        attribute = attributes.get_attribute(namespace)
 273        attribute.remove unless attribute.nil?
 274        self
 275      end
 276 
 277      #################################################
 278      # Elements                                      #
 279      #################################################
 280 
 281      # Adds a child to this element, optionally setting attributes in
 282      # the element.
 283      # element:: 
 284      #   optional.  If Element, the element is added.
 285      #   Otherwise, a new Element is constructed with the argument (see
 286      #   Element.initialize).
 287      # attrs:: 
 288      #   If supplied, must be a Hash containing String name,value 
 289      #   pairs, which will be used to set the attributes of the new Element.
 290      # Returns:: the Element that was added
 291      #  el = doc.add_element 'my-tag'
 292      #  el = doc.add_element 'my-tag', {'attr1'=>'val1', 'attr2'=>'val2'}
 293      #  el = Element.new 'my-tag'
 294      #  doc.add_element el
 295      def add_element element, attrs=nil
 296        raise "First argument must be either an element name, or an Element object" if element.nil?
 297        el = @elements.add(element)
 298        attrs.each do |key, value|
 299          el.attributes[key]=Attribute.new(key,value,self)
 300        end if attrs.kind_of? Hash
 301        el
 302      end
 303 
 304      # Deletes a child element.
 305      # element:: 
 306      #   Must be an +Element+, +String+, or +Integer+.  If Element, 
 307      #   the element is removed.  If String, the element is found (via XPath) 
 308      #   and removed.  <em>This means that any parent can remove any
 309      #   descendant.<em>  If Integer, the Element indexed by that number will be
 310      #   removed.
 311      # Returns:: the element that was removed.
 312      #  doc.delete_element "/a/b/c[@id='4']"
 313      #  doc.delete_element doc.elements["//k"]
 314      #  doc.delete_element 1
 315      def delete_element element
 316        @elements.delete element
 317      end
 318 
 319      # Evaluates to +true+ if this element has at least one child Element
 320      #  doc = Document.new "<a><b/><c>Text</c></a>"
 321      #  doc.root.has_elements               # -> true
 322      #  doc.elements["/a/b"].has_elements   # -> false
 323      #  doc.elements["/a/c"].has_elements   # -> false
 324      def has_elements?
 325        !@elements.empty?
 326      end
 327 
 328      # Iterates through the child elements, yielding for each Element that
 329      # has a particular attribute set.
 330      # key:: 
 331      #   the name of the attribute to search for
 332      # value:: 
 333      #   the value of the attribute
 334      # max:: 
 335      #   (optional) causes this method to return after yielding 
 336      #   for this number of matching children
 337      # name:: 
 338      #   (optional) if supplied, this is an XPath that filters
 339      #   the children to check.
 340      #
 341      #  doc = Document.new "<a><b @id='1'/><c @id='2'/><d @id='1'/><e/></a>"
 342      #  # Yields b, c, d
 343      #  doc.root.each_element_with_attribute( 'id' ) {|e| p e}
 344      #  # Yields b, d
 345      #  doc.root.each_element_with_attribute( 'id', '1' ) {|e| p e}
 346      #  # Yields b
 347      #  doc.root.each_element_with_attribute( 'id', '1', 1 ) {|e| p e}
 348      #  # Yields d
 349      #  doc.root.each_element_with_attribute( 'id', '1', 0, 'd' ) {|e| p e}
 350      def each_element_with_attribute( key, value=nil, max=0, name=nil, &block ) # :yields: Element
 351        each_with_something( proc {|child| 
 352          if value.nil?
 353            child.attributes[key] != nil
 354          else
 355            child.attributes[key]==value
 356          end
 357        }, max, name, &block )
 358      end
 359 
 360      # Iterates through the children, yielding for each Element that
 361      # has a particular text set.
 362      # text:: 
 363      #   the text to search for.  If nil, or not supplied, will iterate
 364      #   over all +Element+ children that contain at least one +Text+ node.
 365      # max:: 
 366      #   (optional) causes this method to return after yielding
 367      #   for this number of matching children
 368      # name:: 
 369      #   (optional) if supplied, this is an XPath that filters
 370      #   the children to check.
 371      #
 372      #  doc = Document.new '<a><b>b</b><c>b</c><d>d</d><e/></a>'
 373      #  # Yields b, c, d
 374      #  doc.each_element_with_text {|e|p e}
 375      #  # Yields b, c
 376      #  doc.each_element_with_text('b'){|e|p e}
 377      #  # Yields b
 378      #  doc.each_element_with_text('b', 1){|e|p e}
 379      #  # Yields d
 380      #  doc.each_element_with_text(nil, 0, 'd'){|e|p e}
 381      def each_element_with_text( text=nil, max=0, name=nil, &block ) # :yields: Element
 382        each_with_something( proc {|child| 
 383          if text.nil?
 384            child.has_text?
 385          else
 386            child.text == text
 387          end
 388        }, max, name, &block )
 389      end
 390 
 391      # Synonym for Element.elements.each
 392      def each_element( xpath=nil, &block ) # :yields: Element
 393        @elements.each( xpath, &block )
 394      end
 395 
 396      # Synonym for Element.to_a
 397      # This is a little slower than calling elements.each directly.
 398      # xpath:: any XPath by which to search for elements in the tree
 399      # Returns:: an array of Elements that match the supplied path
 400      def get_elements( xpath )
 401        @elements.to_a( xpath )
 402      end
 403 
 404      # Returns the next sibling that is an element, or nil if there is
 405      # no Element sibling after this one
 406      #  doc = Document.new '<a><b/>text<c/></a>'
 407      #  doc.root.elements['b'].next_element          #-> <c/>
 408      #  doc.root.elements['c'].next_element          #-> nil
 409      def next_element
 410        element = next_sibling
 411        element = element.next_sibling until element.nil? or element.kind_of? Element 
 412        return element
 413      end
 414 
 415      # Returns the previous sibling that is an element, or nil if there is
 416      # no Element sibling prior to this one
 417      #  doc = Document.new '<a><b/>text<c/></a>'
 418      #  doc.root.elements['c'].previous_element          #-> <b/>
 419      #  doc.root.elements['b'].previous_element          #-> nil
 420      def previous_element
 421        element = previous_sibling
 422        element = element.previous_sibling until element.nil? or element.kind_of? Element
 423        return element
 424      end
 425 
 426 
 427      #################################################
 428      # Text                                          #
 429      #################################################
 430 
 431      # Evaluates to +true+ if this element has at least one Text child
 432      def has_text?
 433        not text().nil?
 434      end
 435 
 436      # A convenience method which returns the String value of the _first_
 437      # child text element, if one exists, and +nil+ otherwise.
 438      #
 439      # <em>Note that an element may have multiple Text elements, perhaps
 440      # separated by other children</em>.  Be aware that this method only returns
 441      # the first Text node.
 442      #
 443      # This method returns the +value+ of the first text child node, which
 444      # ignores the +raw+ setting, so always returns normalized text. See
 445      # the Text::value documentation.
 446      #
 447      #  doc = Document.new "<p>some text <b>this is bold!</b> more text</p>"
 448      #  # The element 'p' has two text elements, "some text " and " more text".
 449      #  doc.root.text              #-> "some text "
 450      def text( path = nil )
 451        rv = get_text(path)
 452        return rv.value unless rv.nil?
 453        nil
 454      end
 455 
 456      # Returns the first child Text node, if any, or +nil+ otherwise.
 457      # This method returns the actual +Text+ node, rather than the String content.
 458      #  doc = Document.new "<p>some text <b>this is bold!</b> more text</p>"
 459      #  # The element 'p' has two text elements, "some text " and " more text".
 460      #  doc.root.get_text.value            #-> "some text "
 461      def get_text path = nil
 462        rv = nil
 463        if path
 464          element = @elements[ path ]
 465          rv = element.get_text unless element.nil?
 466        else
 467          rv = @children.find { |node| node.kind_of? Text }
 468        end
 469        return rv
 470      end
 471 
 472      # Sets the first Text child of this object.  See text() for a
 473      # discussion about Text children.
 474      #
 475      # If a Text child already exists, the child is replaced by this
 476      # content.  This means that Text content can be deleted by calling
 477      # this method with a nil argument.  In this case, the next Text
 478      # child becomes the first Text child.  In no case is the order of
 479      # any siblings disturbed.
 480      # text:: 
 481      #   If a String, a new Text child is created and added to
 482      #   this Element as the first Text child.  If Text, the text is set
 483      #   as the first Child element.  If nil, then any existing first Text
 484      #   child is removed.
 485      # Returns:: this Element.
 486      #  doc = Document.new '<a><b/></a>'
 487      #  doc.root.text = 'Sean'      #-> '<a><b/>Sean</a>'
 488      #  doc.root.text = 'Elliott'   #-> '<a><b/>Elliott</a>'
 489      #  doc.root.add_element 'c'    #-> '<a><b/>Elliott<c/></a>'
 490      #  doc.root.text = 'Russell'   #-> '<a><b/>Russell<c/></a>'
 491      #  doc.root.text = nil         #-> '<a><b/><c/></a>'
 492      def text=( text )
 493        if text.kind_of? String
 494          text = Text.new( text, whitespace(), nil, raw() )
 495        elsif !text.nil? and !text.kind_of? Text
 496          text = Text.new( text.to_s, whitespace(), nil, raw() )
 497        end
 498        old_text = get_text
 499        if text.nil?
 500          old_text.remove unless old_text.nil?
 501        else
 502          if old_text.nil?
 503            self << text
 504          else
 505            old_text.replace_with( text )
 506          end
 507        end
 508        return self
 509      end
 510 
 511      # A helper method to add a Text child.  Actual Text instances can
 512      # be added with regular Parent methods, such as add() and <<()
 513      # text::
 514      #   if a String, a new Text instance is created and added
 515      #   to the parent.  If Text, the object is added directly.
 516      # Returns:: this Element
 517      #  e = Element.new('a')          #-> <e/>
 518      #  e.add_text 'foo'              #-> <e>foo</e>
 519      #  e.add_text Text.new(' bar')    #-> <e>foo bar</e>
 520      # Note that at the end of this example, the branch has <b>3</b> nodes; the 'e'
 521      # element and <b>2</b> Text node children.
 522      def add_text( text )
 523        if text.kind_of? String 
 524          if @children[-1].kind_of? Text
 525            @children[-1] << text
 526            return
 527          end
 528          text = Text.new( text, whitespace(), nil, raw() )
 529        end
 530        self << text unless text.nil?
 531        return self
 532      end
 533 
 534      def node_type
 535        :element
 536      end
 537 
 538      def xpath
 539        path_elements = []
 540        cur = self
 541        path_elements << __to_xpath_helper( self )
 542        while cur.parent
 543          cur = cur.parent
 544          path_elements << __to_xpath_helper( cur )
 545        end
 546        return path_elements.reverse.join( "/" )
 547      end
 548 
 549      #################################################
 550      # Attributes                                    #
 551      #################################################
 552 
 553      def attribute( name, namespace=nil )
 554        prefix = nil
 555        prefix = namespaces.index(namespace) if namespace
 556        prefix = nil if prefix == 'xmlns'
 557        attributes.get_attribute( "#{prefix ? prefix + ':' : ''}#{name}" )
 558      end
 559 
 560      # Evaluates to +true+ if this element has any attributes set, false
 561      # otherwise.
 562      def has_attributes?
 563        return !@attributes.empty?
 564      end
 565 
 566      # Adds an attribute to this element, overwriting any existing attribute
 567      # by the same name.
 568      # key::
 569      #   can be either an Attribute or a String.  If an Attribute,
 570      #   the attribute is added to the list of Element attributes.  If String,
 571      #   the argument is used as the name of the new attribute, and the value
 572      #   parameter must be supplied.
 573      # value:: 
 574      #   Required if +key+ is a String, and ignored if the first argument is
 575      #   an Attribute.  This is a String, and is used as the value
 576      #   of the new Attribute.  This should be the unnormalized value of the
 577      #   attribute (without entities).
 578      # Returns:: the Attribute added
 579      #  e = Element.new 'e'
 580      #  e.add_attribute( 'a', 'b' )               #-> <e a='b'/>
 581      #  e.add_attribute( 'x:a', 'c' )             #-> <e a='b' x:a='c'/>
 582      #  e.add_attribute Attribute.new('b', 'd')   #-> <e a='b' x:a='c' b='d'/>
 583      def add_attribute( key, value=nil )
 584        if key.kind_of? Attribute
 585          @attributes << key
 586        else
 587          @attributes[key] = value
 588        end
 589      end
 590 
 591      # Add multiple attributes to this element.
 592      # hash:: is either a hash, or array of arrays
 593      #  el.add_attributes( {"name1"=>"value1", "name2"=>"value2"} )
 594      #  el.add_attributes( [ ["name1","value1"], ["name2"=>"value2"] ] )
 595      def add_attributes hash
 596        if hash.kind_of? Hash
 597          hash.each_pair {|key, value| @attributes[key] = value }
 598        elsif hash.kind_of? Array
 599          hash.each { |value| @attributes[ value[0] ] = value[1] }
 600        end
 601      end
 602 
 603      # Removes an attribute
 604      # key::
 605      #   either an Attribute or a String.  In either case, the
 606      #   attribute is found by matching the attribute name to the argument,
 607      #   and then removed.  If no attribute is found, no action is taken.
 608      # Returns:: 
 609      #   the attribute removed, or nil if this Element did not contain
 610      #   a matching attribute
 611      #  e = Element.new('E')
 612      #  e.add_attribute( 'name', 'Sean' )             #-> <E name='Sean'/>
 613      #  r = e.add_attribute( 'sur:name', 'Russell' )  #-> <E name='Sean' sur:name='Russell'/>
 614      #  e.delete_attribute( 'name' )                  #-> <E sur:name='Russell'/>
 615      #  e.delete_attribute( r )                       #-> <E/>
 616      def delete_attribute(key)
 617        attr = @attributes.get_attribute(key)
 618        attr.remove unless attr.nil?
 619      end
 620 
 621      #################################################
 622      # Other Utilities                               #
 623      #################################################
 624 
 625      # Get an array of all CData children.  
 626      # IMMUTABLE
 627      def cdatas
 628        find_all { |child| child.kind_of? CData }.freeze
 629      end
 630 
 631      # Get an array of all Comment children.
 632      # IMMUTABLE
 633      def comments
 634        find_all { |child| child.kind_of? Comment }.freeze
 635      end
 636 
 637      # Get an array of all Instruction children.
 638      # IMMUTABLE
 639      def instructions
 640        find_all { |child| child.kind_of? Instruction }.freeze
 641      end
 642 
 643      # Get an array of all Text children.
 644      # IMMUTABLE
 645      def texts
 646        find_all { |child| child.kind_of? Text }.freeze
 647      end
 648 
 649      # == DEPRECATED
 650      # See REXML::Formatters
 651      #
 652      # Writes out this element, and recursively, all children.
 653      # output::
 654      #   output an object which supports '<< string'; this is where the
 655      #   document will be written.
 656      # indent::
 657      #   An integer.  If -1, no indenting will be used; otherwise, the
 658      #   indentation will be this number of spaces, and children will be
 659      #   indented an additional amount.  Defaults to -1
 660      # transitive::
 661      #   If transitive is true and indent is >= 0, then the output will be
 662      #   pretty-printed in such a way that the added whitespace does not affect
 663      #   the parse tree of the document
 664      # ie_hack::
 665      #   Internet Explorer is the worst piece of crap to have ever been
 666      #   written, with the possible exception of Windows itself.  Since IE is
 667      #   unable to parse proper XML, we have to provide a hack to generate XML
 668      #   that IE's limited abilities can handle.  This hack inserts a space 
 669      #   before the /> on empty tags.  Defaults to false
 670      #
 671      #  out = ''
 672      #  doc.write( out )     #-> doc is written to the string 'out'
 673      #  doc.write( $stdout ) #-> doc written to the console
 674      def write(writer=$stdout, indent=-1, transitive=false, ie_hack=false)
 675        Kernel.warn("#{self.class.name}.write is deprecated.  See REXML::Formatters")
 676        formatter = if indent > -1
 677            if transitive
 678              REXML::Formatters::Transitive.new( indent, ie_hack )
 679            else
 680              REXML::Formatters::Pretty.new( indent, ie_hack )
 681            end
 682          else
 683            REXML::Formatters::Default.new( ie_hack )
 684          end
 685        formatter.write( self, output )
 686      end
 687 
 688 
 689      private
 690      def __to_xpath_helper node
 691        rv = node.expanded_name.clone
 692        if node.parent
 693          results = node.parent.find_all {|n| 
 694            n.kind_of?(REXML::Element) and n.expanded_name == node.expanded_name 
 695          }
 696          if results.length > 1
 697            idx = results.index( node )
 698            rv << "[#{idx+1}]"
 699          end
 700        end
 701        rv
 702      end
 703 
 704      # A private helper method
 705      def each_with_something( test, max=0, name=nil )
 706        num = 0
 707        child=nil
 708        @elements.each( name ){ |child|
 709          yield child if test.call(child) and num += 1
 710          return if max>0 and num == max
 711        }
 712      end
 713    end
 714 
 715    ########################################################################
 716    # ELEMENTS                                                             #
 717    ########################################################################
 718 
 719    # A class which provides filtering of children for Elements, and
 720    # XPath search support.  You are expected to only encounter this class as
 721    # the <tt>element.elements</tt> object.  Therefore, you are 
 722    # _not_ expected to instantiate this yourself.
 723    class Elements
 724      include Enumerable
 725      # Constructor
 726      # parent:: the parent Element
 727      def initialize parent
 728        @element = parent
 729      end
 730 
 731      # Fetches a child element.  Filters only Element children, regardless of
 732      # the XPath match.
 733      # index:: 
 734      #   the search parameter.  This is either an Integer, which
 735      #   will be used to find the index'th child Element, or an XPath,
 736      #   which will be used to search for the Element.  <em>Because
 737      #   of the nature of XPath searches, any element in the connected XML
 738      #   document can be fetched through any other element.</em>  <b>The
 739      #   Integer index is 1-based, not 0-based.</b>  This means that the first
 740      #   child element is at index 1, not 0, and the +n+th element is at index
 741      #   +n+, not <tt>n-1</tt>.  This is because XPath indexes element children
 742      #   starting from 1, not 0, and the indexes should be the same.
 743      # name:: 
 744      #   optional, and only used in the first argument is an
 745      #   Integer.  In that case, the index'th child Element that has the
 746      #   supplied name will be returned.  Note again that the indexes start at 1.
 747      # Returns:: the first matching Element, or nil if no child matched
 748      #  doc = Document.new '<a><b/><c id="1"/><c id="2"/><d/></a>'
 749      #  doc.root.elements[1]       #-> <b/>
 750      #  doc.root.elements['c']     #-> <c id="1"/>
 751      #  doc.root.elements[2,'c']   #-> <c id="2"/>
 752      def []( index, name=nil)
 753        if index.kind_of? Integer
 754          raise "index (#{index}) must be >= 1" if index < 1
 755          name = literalize(name) if name
 756          num = 0
 757          child = nil
 758          @element.find { |child|
 759            child.kind_of? Element and
 760            (name.nil? ? true : child.has_name?( name )) and 
 761            (num += 1) == index
 762          }
 763        else
 764          return XPath::first( @element, index )
 765          #{ |element| 
 766          # return element if element.kind_of? Element
 767          #}
 768          #return nil
 769        end
 770      end
 771 
 772      # Sets an element, replacing any previous matching element.  If no
 773      # existing element is found ,the element is added.
 774      # index:: Used to find a matching element to replace.  See []().
 775      # element:: 
 776      #   The element to replace the existing element with
 777      #   the previous element
 778      # Returns:: nil if no previous element was found.
 779      #
 780      #  doc = Document.new '<a/>'
 781      #  doc.root.elements[10] = Element.new('b')    #-> <a><b/></a>
 782      #  doc.root.elements[1]                        #-> <b/>
 783      #  doc.root.elements[1] = Element.new('c')     #-> <a><c/></a>
 784      #  doc.root.elements['c'] = Element.new('d')   #-> <a><d/></a>
 785      def []=( index, element )
 786        previous = self[index]
 787        if previous.nil?
 788          @element.add element
 789        else
 790          previous.replace_with element
 791        end
 792        return previous
 793      end
 794 
 795      # Returns +true+ if there are no +Element+ children, +false+ otherwise
 796      def empty?
 797        @element.find{ |child| child.kind_of? Element}.nil?
 798      end
 799 
 800      # Returns the index of the supplied child (starting at 1), or -1 if 
 801      # the element is not a child
 802      # element:: an +Element+ child
 803      def index element
 804        rv = 0
 805        found = @element.find do |child| 
 806          child.kind_of? Element and
 807          (rv += 1) and
 808          child == element
 809        end
 810        return rv if found == element
 811        return -1
 812      end
 813 
 814      # Deletes a child Element
 815      # element:: 
 816      #   Either an Element, which is removed directly; an
 817      #   xpath, where the first matching child is removed; or an Integer,
 818      #   where the n'th Element is removed.
 819      # Returns:: the removed child
 820      #  doc = Document.new '<a><b/><c/><c id="1"/></a>'
 821      #  b = doc.root.elements[1]
 822      #  doc.root.elements.delete b           #-> <a><c/><c id="1"/></a>
 823      #  doc.elements.delete("a/c[@id='1']")  #-> <a><c/></a>
 824      #  doc.root.elements.delete 1           #-> <a/>
 825      def delete element
 826        if element.kind_of? Element
 827          @element.delete element
 828        else
 829          el = self[element]
 830          el.remove if el
 831        end
 832      end
 833 
 834      # Removes multiple elements.  Filters for Element children, regardless of
 835      # XPath matching.
 836      # xpath:: all elements matching this String path are removed.
 837      # Returns:: an Array of Elements that have been removed
 838      #  doc = Document.new '<a><c/><c/><c/><c/></a>'
 839      #  deleted = doc.elements.delete_all 'a/c' #-> [<c/>, <c/>, <c/>, <c/>]
 840      def delete_all( xpath )
 841        rv = []
 842        XPath::each( @element, xpath) {|element| 
 843          rv << element if element.kind_of? Element
 844        }
 845        rv.each do |element|
 846          @element.delete element
 847          element.remove
 848        end
 849        return rv
 850      end
 851 
 852      # Adds an element
 853      # element:: 
 854      #   if supplied, is either an Element, String, or
 855      #   Source (see Element.initialize).  If not supplied or nil, a
 856      #   new, default Element will be constructed
 857      # Returns:: the added Element
 858      #  a = Element.new('a')
 859      #  a.elements.add(Element.new('b'))  #-> <a><b/></a>
 860      #  a.elements.add('c')               #-> <a><b/><c/></a>
 861      def add element=nil
 862        rv = nil
 863        if element.nil?
 864          Element.new("", self, @element.context)
 865        elsif not element.kind_of?(Element)
 866          Element.new(element, self, @element.context)
 867        else
 868          @element << element
 869          element.context = @element.context
 870          element
 871        end
 872      end
 873 
 874      alias :<< :add
 875 
 876      # Iterates through all of the child Elements, optionally filtering
 877      # them by a given XPath
 878      # xpath:: 
 879      #   optional.  If supplied, this is a String XPath, and is used to 
 880      #   filter the children, so that only matching children are yielded.  Note
 881      #   that XPaths are automatically filtered for Elements, so that
 882      #   non-Element children will not be yielded
 883      #  doc = Document.new '<a><b/><c/><d/>sean<b/><c/><d/></a>'
 884      #  doc.root.each {|e|p e}       #-> Yields b, c, d, b, c, d elements
 885      #  doc.root.each('b') {|e|p e}  #-> Yields b, b elements
 886      #  doc.root.each('child::node()')  {|e|p e}
 887      #  #-> Yields <b/>, <c/>, <d/>, <b/>, <c/>, <d/>
 888      #  XPath.each(doc.root, 'child::node()', &block)
 889      #  #-> Yields <b/>, <c/>, <d/>, sean, <b/>, <c/>, <d/>
 890      def each( xpath=nil, &block)
 891        XPath::each( @element, xpath ) {|e| yield e if e.kind_of? Element }
 892      end
 893 
 894      def collect( xpath=nil, &block )
 895        collection = []
 896        XPath::each( @element, xpath ) {|e| 
 897          collection << yield(e)  if e.kind_of?(Element) 
 898        }
 899        collection
 900      end
 901 
 902      def inject( xpath=nil, initial=nil, &block )
 903        first = true
 904        XPath::each( @element, xpath ) {|e|
 905          if (e.kind_of? Element)
 906            if (first and initial == nil)
 907              initial = e
 908              first = false
 909            else
 910              initial = yield( initial, e ) if e.kind_of? Element
 911            end
 912          end
 913        }
 914        initial
 915      end
 916 
 917      # Returns the number of +Element+ children of the parent object.
 918      #  doc = Document.new '<a>sean<b/>elliott<b/>russell<b/></a>'
 919      #  doc.root.size            #-> 6, 3 element and 3 text nodes
 920      #  doc.root.elements.size   #-> 3
 921      def size
 922        count = 0
 923        @element.each {|child| count+=1 if child.kind_of? Element }
 924        count
 925      end
 926 
 927      # Returns an Array of Element children.  An XPath may be supplied to
 928      # filter the children.  Only Element children are returned, even if the
 929      # supplied XPath matches non-Element children.
 930      #  doc = Document.new '<a>sean<b/>elliott<c/></a>'
 931      #  doc.root.elements.to_a                  #-> [ <b/>, <c/> ]
 932      #  doc.root.elements.to_a("child::node()") #-> [ <b/>, <c/> ] 
 933      #  XPath.match(doc.root, "child::node()")  #-> [ sean, <b/>, elliott, <c/> ]
 934      def to_a( xpath=nil )
 935        rv = XPath.match( @element, xpath )
 936        return rv.find_all{|e| e.kind_of? Element} if xpath
 937        rv
 938      end
 939 
 940      private
 941      # Private helper class.  Removes quotes from quoted strings
 942      def literalize name
 943        name = name[1..-2] if name[0] == ?' or name[0] == ?"               #'
 944        name
 945      end
 946    end
 947 
 948    ########################################################################
 949    # ATTRIBUTES                                                           #
 950    ########################################################################
 951 
 952    # A class that defines the set of Attributes of an Element and provides 
 953    # operations for accessing elements in that set.
 954    class Attributes < Hash
 955      # Constructor
 956      # element:: the Element of which this is an Attribute
 957      def initialize element
 958        @element = element
 959      end
 960 
 961      # Fetches an attribute value.  If you want to get the Attribute itself,
 962      # use get_attribute()
 963      # name:: an XPath attribute name.  Namespaces are relevant here.
 964      # Returns:: 
 965      #   the String value of the matching attribute, or +nil+ if no
 966      #   matching attribute was found.  This is the unnormalized value
 967      #   (with entities expanded).
 968      # 
 969      #  doc = Document.new "<a foo:att='1' bar:att='2' att='&lt;'/>"
 970      #  doc.root.attributes['att']         #-> '<'
 971      #  doc.root.attributes['bar:att']     #-> '2'
 972      def [](name)
 973        attr = get_attribute(name)
 974        return attr.value unless attr.nil?
 975        return nil
 976      end
 977 
 978      def to_a
 979        values.flatten
 980      end
 981 
 982      # Returns the number of attributes the owning Element contains.
 983      #  doc = Document "<a x='1' y='2' foo:x='3'/>"
 984      #  doc.root.attributes.length        #-> 3
 985      def length
 986        c = 0
 987        each_attribute { c+=1 }
 988        c
 989      end
 990      alias :size :length
 991 
 992      # Iterates over the attributes of an Element.  Yields actual Attribute
 993      # nodes, not String values.
 994      # 
 995      #  doc = Document.new '<a x="1" y="2"/>'
 996      #  doc.root.attributes.each_attribute {|attr|
 997      #    p attr.expanded_name+" => "+attr.value
 998      #  }
 999      def each_attribute # :yields: attribute
1000        each_value do |val|
1001          if val.kind_of? Attribute
1002            yield val
1003          else
1004            val.each_value { |atr| yield atr }
1005          end
1006        end
1007      end
1008 
1009      # Iterates over each attribute of an Element, yielding the expanded name
1010      # and value as a pair of Strings.
1011      #
1012      #  doc = Document.new '<a x="1" y="2"/>'
1013      #  doc.root.attributes.each {|name, value| p name+" => "+value }
1014      def each
1015        each_attribute do |attr|
1016          yield attr.expanded_name, attr.value
1017        end
1018      end
1019 
1020      # Fetches an attribute
1021      # name:: 
1022      #   the name by which to search for the attribute.  Can be a
1023      #   <tt>prefix:name</tt> namespace name.
1024      # Returns:: The first matching attribute, or nil if there was none.  This
1025      # value is an Attribute node, not the String value of the attribute.
1026      #  doc = Document.new '<a x:foo="1" foo="2" bar="3"/>'
1027      #  doc.root.attributes.get_attribute("foo").value    #-> "2"
1028      #  doc.root.attributes.get_attribute("x:foo").value  #-> "1"
1029      def get_attribute( name )
1030        attr = fetch( name, nil )
1031        if attr.nil?
1032          return nil if name.nil?
1033          # Look for prefix
1034          name =~ Namespace::NAMESPLIT
1035          prefix, n = $1, $2
1036          if prefix
1037            attr = fetch( n, nil )
1038            # check prefix
1039            if attr == nil
1040            elsif attr.kind_of? Attribute
1041              return attr if prefix == attr.prefix
1042            else
1043              attr = attr[ prefix ]
1044              return attr
1045            end
1046          end
1047          element_document = @element.document
1048          if element_document and element_document.doctype
1049            expn = @element.expanded_name
1050            expn = element_document.doctype.name if expn.size == 0
1051            attr_val = element_document.doctype.attribute_of(expn, name)
1052            return Attribute.new( name, attr_val ) if attr_val
1053          end
1054          return nil
1055        end
1056        if attr.kind_of? Hash
1057          attr = attr[ @element.prefix ]
1058        end
1059        return attr
1060      end
1061 
1062      # Sets an attribute, overwriting any existing attribute value by the
1063      # same name.  Namespace is significant.
1064      # name:: the name of the attribute
1065      # value:: 
1066      #   (optional) If supplied, the value of the attribute.  If
1067      #   nil, any existing matching attribute is deleted.
1068      # Returns:: 
1069      #   Owning element
1070      #  doc = Document.new "<a x:foo='1' foo='3'/>"
1071      #  doc.root.attributes['y:foo'] = '2'
1072      #  doc.root.attributes['foo'] = '4'
1073      #  doc.root.attributes['x:foo'] = nil
1074      def []=( name, value )
1075        if value.nil?   # Delete the named attribute
1076          attr = get_attribute(name)
1077          delete attr
1078          return
1079        end
1080        element_document = @element.document
1081        unless value.kind_of? Attribute
1082          if @element.document and @element.document.doctype
1083            value = Text::normalize( value, @element.document.doctype )
1084          else
1085            value = Text::normalize( value, nil )
1086          end
1087          value = Attribute.new(name, value)
1088        end
1089        value.element = @element
1090        old_attr = fetch(value.name, nil)
1091        if old_attr.nil?
1092          store(value.name, value)
1093        elsif old_attr.kind_of? Hash
1094          old_attr[value.prefix] = value
1095        elsif old_attr.prefix != value.prefix
1096          # Check for conflicting namespaces
1097          raise ParseException.new( 
1098            "Namespace conflict in adding attribute \"#{value.name}\": "+
1099            "Prefix \"#{old_attr.prefix}\" = "+
1100            "\"#{@element.namespace(old_attr.prefix)}\" and prefix "+
1101            "\"#{value.prefix}\" = \"#{@element.namespace(value.prefix)}\"") if 
1102            value.prefix != "xmlns" and old_attr.prefix != "xmlns" and
1103            @element.namespace( old_attr.prefix ) == 
1104              @element.namespace( value.prefix )
1105            store value.name, { old_attr.prefix => old_attr,
1106              value.prefix    => value }
1107        else
1108          store value.name, value
1109        end
1110        return @element
1111      end
1112 
1113      # Returns an array of Strings containing all of the prefixes declared 
1114      # by this set of # attributes.  The array does not include the default
1115      # namespace declaration, if one exists.
1116      #  doc = Document.new("<a xmlns='foo' xmlns:x='bar' xmlns:y='twee' "+
1117      #        "z='glorp' p:k='gru'/>")
1118      #  prefixes = doc.root.attributes.prefixes    #-> ['x', 'y']
1119      def prefixes
1120        ns = []
1121        each_attribute do |attribute|
1122          ns << attribute.name if attribute.prefix == 'xmlns'
1123        end
1124        if @element.document and @element.document.doctype
1125          expn = @element.expanded_name
1126          expn = @element.document.doctype.name if expn.size == 0
1127          @element.document.doctype.attributes_of(expn).each {
1128            |attribute|
1129            ns << attribute.name if attribute.prefix == 'xmlns'
1130          }
1131        end
1132        ns
1133      end
1134 
1135      def namespaces
1136        namespaces = {}
1137        each_attribute do |attribute|
1138          namespaces[attribute.name] = attribute.value if attribute.prefix == 'xmlns' or attribute.name == 'xmlns'
1139        end
1140        if @element.document and @element.document.doctype
1141          expn = @element.expanded_name
1142          expn = @element.document.doctype.name if expn.size == 0
1143          @element.document.doctype.attributes_of(expn).each {
1144            |attribute|
1145            namespaces[attribute.name] = attribute.value if attribute.prefix == 'xmlns' or attribute.name == 'xmlns'
1146          }
1147        end
1148        namespaces
1149      end
1150 
1151      # Removes an attribute
1152      # attribute:: 
1153      #   either a String, which is the name of the attribute to remove --
1154      #   namespaces are significant here -- or the attribute to remove.
1155      # Returns:: the owning element
1156      #  doc = Document.new "<a y:foo='0' x:foo='1' foo='3' z:foo='4'/>"
1157      #  doc.root.attributes.delete 'foo'   #-> <a y:foo='0' x:foo='1' z:foo='4'/>"
1158      #  doc.root.attributes.delete 'x:foo' #-> <a y:foo='0' z:foo='4'/>"
1159      #  attr = doc.root.attributes.get_attribute('y:foo')
1160      #  doc.root.attributes.delete attr    #-> <a z:foo='4'/>"
1161      def delete( attribute )
1162        name = nil
1163        prefix = nil
1164        if attribute.kind_of? Attribute
1165          name = attribute.name
1166          prefix = attribute.prefix
1167        else
1168          attribute =~ Namespace::NAMESPLIT
1169          prefix, name = $1, $2
1170          prefix = '' unless prefix
1171        end
1172        old = fetch(name, nil)
1173        attr = nil
1174        if old.kind_of? Hash # the supplied attribute is one of many
1175          attr = old.delete(prefix)
1176          if old.size == 1
1177            repl = nil
1178            old.each_value{|v| repl = v}
1179            store name, repl
1180          end
1181        elsif old.nil?
1182          return @element
1183        else # the supplied attribute is a top-level one
1184          attr = old
1185          res = super(name)
1186        end
1187        @element
1188      end
1189 
1190      # Adds an attribute, overriding any existing attribute by the
1191      # same name.  Namespaces are significant.
1192      # attribute:: An Attribute
1193      def add( attribute )
1194        self[attribute.name] = attribute
1195      end
1196 
1197      alias :<< :add
1198 
1199      # Deletes all attributes matching a name.  Namespaces are significant.
1200      # name:: 
1201      #   A String; all attributes that match this path will be removed
1202      # Returns:: an Array of the Attributes that were removed
1203      def delete_all( name )
1204        rv = []
1205        each_attribute { |attribute| 
1206          rv << attribute if attribute.expanded_name == name
1207        }
1208        rv.each{ |attr| attr.remove }
1209        return rv
1210      end
1211 
1212      # The +get_attribute_ns+ method retrieves a method by its namespace
1213      # and name. Thus it is possible to reliably identify an attribute
1214      # even if an XML processor has changed the prefix.
1215      # 
1216      # Method contributed by Henrik Martensson
1217      def get_attribute_ns(namespace, name)
1218        each_attribute() { |attribute|
1219          if name == attribute.name &&
1220            namespace == attribute.namespace()
1221            return attribute
1222          end
1223        }
1224        nil
1225      end
1226    end
1227  end