1 require "rexml/parent"
2 require "rexml/namespace"
3 require "rexml/attribute"
4 require "rexml/cdata"
5 require "rexml/xpath"
6 require "rexml/parseexception"
7
8 module REXML
9 # An implementation note about namespaces:
10 # As we parse, when we find namespaces we put them in a hash and assign
11 # them a unique ID. We then convert the namespace prefix for the node
12 # to the unique ID. This makes namespace lookup much faster for the
13 # cost of extra memory use. We save the namespace prefix for the
14 # context node and convert it back when we write it.
15 @@namespaces = {}
16
17 # Represents a tagged XML element. Elements are characterized by
18 # having children, attributes, and names, and can themselves be
19 # children.
20 class Element < Parent
21 include Namespace
22
23 UNDEFINED = "UNDEFINED"; # The default name
24
25 # Mechanisms for accessing attributes and child elements of this
26 # element.
27 attr_reader :attributes, :elements
28 # The context holds information about the processing environment, such as
29 # whitespace handling.
30 attr_accessor :context
31
32 # Constructor
33 # arg::
34 # if not supplied, will be set to the default value.
35 # If a String, the name of this object will be set to the argument.
36 # If an Element, the object will be shallowly cloned; name,
37 # attributes, and namespaces will be copied. Children will +not+ be
38 # copied.
39 # parent::
40 # if supplied, must be a Parent, and will be used as
41 # the parent of this object.
42 # context::
43 # If supplied, must be a hash containing context items. Context items
44 # include:
45 # * <tt>:respect_whitespace</tt> the value of this is :+all+ or an array of
46 # strings being the names of the elements to respect
47 # whitespace for. Defaults to :+all+.
48 # * <tt>:compress_whitespace</tt> the value can be :+all+ or an array of
49 # strings being the names of the elements to ignore whitespace on.
50 # Overrides :+respect_whitespace+.
51 # * <tt>:ignore_whitespace_nodes</tt> the value can be :+all+ or an array
52 # of strings being the names of the elements in which to ignore
53 # whitespace-only nodes. If this is set, Text nodes which contain only
54 # whitespace will not be added to the document tree.
55 # * <tt>:raw</tt> can be :+all+, or an array of strings being the names of
56 # the elements to process in raw mode. In raw mode, special
57 # characters in text is not converted to or from entities.
58 def initialize( arg = UNDEFINED, parent=nil, context=nil )
59 super(parent)
60
61 @elements = Elements.new(self)
62 @attributes = Attributes.new(self)
63 @context = context
64
65 if arg.kind_of? String
66 self.name = arg
67 elsif arg.kind_of? Element
68 self.name = arg.expanded_name
69 arg.attributes.each_attribute{ |attribute|
70 @attributes << Attribute.new( attribute )
71 }
72 @context = arg.context
73 end
74 end
75
76 def inspect
77 rv = "<#@expanded_name"
78
79 @attributes.each_attribute do |attr|
80 rv << " "
81 attr.write( rv, 0 )
82 end
83
84 if children.size > 0
85 rv << "> ... </>"
86 else
87 rv << "/>"
88 end
89 end
90
91
92 # Creates a shallow copy of self.
93 # d = Document.new "<a><b/><b/><c><d/></c></a>"
94 # new_a = d.root.clone
95 # puts new_a # => "<a/>"
96 def clone
97 self.class.new self
98 end
99
100 # Evaluates to the root node of the document that this element
101 # belongs to. If this element doesn't belong to a document, but does
102 # belong to another Element, the parent's root will be returned, until the
103 # earliest ancestor is found.
104 #
105 # Note that this is not the same as the document element.
106 # In the following example, <a> is the document element, and the root
107 # node is the parent node of the document element. You may ask yourself
108 # why the root node is useful: consider the doctype and XML declaration,
109 # and any processing instructions before the document element... they
110 # are children of the root node, or siblings of the document element.
111 # The only time this isn't true is when an Element is created that is
112 # not part of any Document. In this case, the ancestor that has no
113 # parent acts as the root node.
114 # d = Document.new '<a><b><c/></b></a>'
115 # a = d[1] ; c = a[1][1]
116 # d.root_node == d # TRUE
117 # a.root_node # namely, d
118 # c.root_node # again, d
119 def root_node
120 parent.nil? ? self : parent.root_node
121 end
122
123 def root
124 return elements[1] if self.kind_of? Document
125 return self if parent.kind_of? Document or parent.nil?
126 return parent.root
127 end
128
129 # Evaluates to the document to which this element belongs, or nil if this
130 # element doesn't belong to a document.
131 def document
132 rt = root
133 rt.parent if rt
134 end
135
136 # Evaluates to +true+ if whitespace is respected for this element. This
137 # is the case if:
138 # 1. Neither :+respect_whitespace+ nor :+compress_whitespace+ has any value
139 # 2. The context has :+respect_whitespace+ set to :+all+ or
140 # an array containing the name of this element, and
141 # :+compress_whitespace+ isn't set to :+all+ or an array containing the
142 # name of this element.
143 # The evaluation is tested against +expanded_name+, and so is namespace
144 # sensitive.
145 def whitespace
146 @whitespace = nil
147 if @context
148 if @context[:respect_whitespace]
149 @whitespace = (@context[:respect_whitespace] == :all or
150 @context[:respect_whitespace].include? expanded_name)
151 end
152 @whitespace = false if (@context[:compress_whitespace] and
153 (@context[:compress_whitespace] == :all or
154 @context[:compress_whitespace].include? expanded_name)
155 )
156 end
157 @whitespace = true unless @whitespace == false
158 @whitespace
159 end
160
161 def ignore_whitespace_nodes
162 @ignore_whitespace_nodes = false
163 if @context
164 if @context[:ignore_whitespace_nodes]
165 @ignore_whitespace_nodes =
166 (@context[:ignore_whitespace_nodes] == :all or
167 @context[:ignore_whitespace_nodes].include? expanded_name)
168 end
169 end
170 end
171
172 # Evaluates to +true+ if raw mode is set for this element. This
173 # is the case if the context has :+raw+ set to :+all+ or
174 # an array containing the name of this element.
175 #
176 # The evaluation is tested against +expanded_name+, and so is namespace
177 # sensitive.
178 def raw
179 @raw = (@context and @context[:raw] and
180 (@context[:raw] == :all or
181 @context[:raw].include? expanded_name))
182 @raw
183 end
184
185 #once :whitespace, :raw, :ignore_whitespace_nodes
186
187 #################################################
188 # Namespaces #
189 #################################################
190
191 # Evaluates to an +Array+ containing the prefixes (names) of all defined
192 # namespaces at this context node.
193 # doc = Document.new("<a xmlns:x='1' xmlns:y='2'><b/><c xmlns:z='3'/></a>")
194 # doc.elements['//b'].prefixes # -> ['x', 'y']
195 def prefixes
196 prefixes = []
197 prefixes = parent.prefixes if parent
198 prefixes |= attributes.prefixes
199 return prefixes
200 end
201
202 def namespaces
203 namespaces = {}
204 namespaces = parent.namespaces if parent
205 namespaces = namespaces.merge( attributes.namespaces )
206 return namespaces
207 end
208
209 # Evalutas to the URI for a prefix, or the empty string if no such
210 # namespace is declared for this element. Evaluates recursively for
211 # ancestors. Returns the default namespace, if there is one.
212 # prefix::
213 # the prefix to search for. If not supplied, returns the default
214 # namespace if one exists
215 # Returns::
216 # the namespace URI as a String, or nil if no such namespace
217 # exists. If the namespace is undefined, returns an empty string
218 # doc = Document.new("<a xmlns='1' xmlns:y='2'><b/><c xmlns:z='3'/></a>")
219 # b = doc.elements['//b']
220 # b.namespace # -> '1'
221 # b.namespace("y") # -> '2'
222 def namespace(prefix=nil)
223 if prefix.nil?
224 prefix = prefix()
225 end
226 if prefix == ''
227 prefix = "xmlns"
228 else
229 prefix = "xmlns:#{prefix}" unless prefix[0,5] == 'xmlns'
230 end
231 ns = attributes[ prefix ]
232 ns = parent.namespace(prefix) if ns.nil? and parent
233 ns = '' if ns.nil? and prefix == 'xmlns'
234 return ns
235 end
236
237 # Adds a namespace to this element.
238 # prefix::
239 # the prefix string, or the namespace URI if +uri+ is not
240 # supplied
241 # uri::
242 # the namespace URI. May be nil, in which +prefix+ is used as
243 # the URI
244 # Evaluates to: this Element
245 # a = Element.new("a")
246 # a.add_namespace("xmlns:foo", "bar" )
247 # a.add_namespace("foo", "bar") # shorthand for previous line
248 # a.add_namespace("twiddle")
249 # puts a #-> <a xmlns:foo='bar' xmlns='twiddle'/>
250 def add_namespace( prefix, uri=nil )
251 unless uri
252 @attributes["xmlns"] = prefix
253 else
254 prefix = "xmlns:#{prefix}" unless prefix =~ /^xmlns:/
255 @attributes[ prefix ] = uri
256 end
257 self
258 end
259
260 # Removes a namespace from this node. This only works if the namespace is
261 # actually declared in this node. If no argument is passed, deletes the
262 # default namespace.
263 #
264 # Evaluates to: this element
265 # doc = Document.new "<a xmlns:foo='bar' xmlns='twiddle'/>"
266 # doc.root.delete_namespace
267 # puts doc # -> <a xmlns:foo='bar'/>
268 # doc.root.delete_namespace 'foo'
269 # puts doc # -> <a/>
270 def delete_namespace namespace="xmlns"
271 namespace = "xmlns:#{namespace}" unless namespace == 'xmlns'
272 attribute = attributes.get_attribute(namespace)
273 attribute.remove unless attribute.nil?
274 self
275 end
276
277 #################################################
278 # Elements #
279 #################################################
280
281 # Adds a child to this element, optionally setting attributes in
282 # the element.
283 # element::
284 # optional. If Element, the element is added.
285 # Otherwise, a new Element is constructed with the argument (see
286 # Element.initialize).
287 # attrs::
288 # If supplied, must be a Hash containing String name,value
289 # pairs, which will be used to set the attributes of the new Element.
290 # Returns:: the Element that was added
291 # el = doc.add_element 'my-tag'
292 # el = doc.add_element 'my-tag', {'attr1'=>'val1', 'attr2'=>'val2'}
293 # el = Element.new 'my-tag'
294 # doc.add_element el
295 def add_element element, attrs=nil
296 raise "First argument must be either an element name, or an Element object" if element.nil?
297 el = @elements.add(element)
298 attrs.each do |key, value|
299 el.attributes[key]=Attribute.new(key,value,self)
300 end if attrs.kind_of? Hash
301 el
302 end
303
304 # Deletes a child element.
305 # element::
306 # Must be an +Element+, +String+, or +Integer+. If Element,
307 # the element is removed. If String, the element is found (via XPath)
308 # and removed. <em>This means that any parent can remove any
309 # descendant.<em> If Integer, the Element indexed by that number will be
310 # removed.
311 # Returns:: the element that was removed.
312 # doc.delete_element "/a/b/c[@id='4']"
313 # doc.delete_element doc.elements["//k"]
314 # doc.delete_element 1
315 def delete_element element
316 @elements.delete element
317 end
318
319 # Evaluates to +true+ if this element has at least one child Element
320 # doc = Document.new "<a><b/><c>Text</c></a>"
321 # doc.root.has_elements # -> true
322 # doc.elements["/a/b"].has_elements # -> false
323 # doc.elements["/a/c"].has_elements # -> false
324 def has_elements?
325 !@elements.empty?
326 end
327
328 # Iterates through the child elements, yielding for each Element that
329 # has a particular attribute set.
330 # key::
331 # the name of the attribute to search for
332 # value::
333 # the value of the attribute
334 # max::
335 # (optional) causes this method to return after yielding
336 # for this number of matching children
337 # name::
338 # (optional) if supplied, this is an XPath that filters
339 # the children to check.
340 #
341 # doc = Document.new "<a><b @id='1'/><c @id='2'/><d @id='1'/><e/></a>"
342 # # Yields b, c, d
343 # doc.root.each_element_with_attribute( 'id' ) {|e| p e}
344 # # Yields b, d
345 # doc.root.each_element_with_attribute( 'id', '1' ) {|e| p e}
346 # # Yields b
347 # doc.root.each_element_with_attribute( 'id', '1', 1 ) {|e| p e}
348 # # Yields d
349 # doc.root.each_element_with_attribute( 'id', '1', 0, 'd' ) {|e| p e}
350 def each_element_with_attribute( key, value=nil, max=0, name=nil, &block ) # :yields: Element
351 each_with_something( proc {|child|
352 if value.nil?
353 child.attributes[key] != nil
354 else
355 child.attributes[key]==value
356 end
357 }, max, name, &block )
358 end
359
360 # Iterates through the children, yielding for each Element that
361 # has a particular text set.
362 # text::
363 # the text to search for. If nil, or not supplied, will iterate
364 # over all +Element+ children that contain at least one +Text+ node.
365 # max::
366 # (optional) causes this method to return after yielding
367 # for this number of matching children
368 # name::
369 # (optional) if supplied, this is an XPath that filters
370 # the children to check.
371 #
372 # doc = Document.new '<a><b>b</b><c>b</c><d>d</d><e/></a>'
373 # # Yields b, c, d
374 # doc.each_element_with_text {|e|p e}
375 # # Yields b, c
376 # doc.each_element_with_text('b'){|e|p e}
377 # # Yields b
378 # doc.each_element_with_text('b', 1){|e|p e}
379 # # Yields d
380 # doc.each_element_with_text(nil, 0, 'd'){|e|p e}
381 def each_element_with_text( text=nil, max=0, name=nil, &block ) # :yields: Element
382 each_with_something( proc {|child|
383 if text.nil?
384 child.has_text?
385 else
386 child.text == text
387 end
388 }, max, name, &block )
389 end
390
391 # Synonym for Element.elements.each
392 def each_element( xpath=nil, &block ) # :yields: Element
393 @elements.each( xpath, &block )
394 end
395
396 # Synonym for Element.to_a
397 # This is a little slower than calling elements.each directly.
398 # xpath:: any XPath by which to search for elements in the tree
399 # Returns:: an array of Elements that match the supplied path
400 def get_elements( xpath )
401 @elements.to_a( xpath )
402 end
403
404 # Returns the next sibling that is an element, or nil if there is
405 # no Element sibling after this one
406 # doc = Document.new '<a><b/>text<c/></a>'
407 # doc.root.elements['b'].next_element #-> <c/>
408 # doc.root.elements['c'].next_element #-> nil
409 def next_element
410 element = next_sibling
411 element = element.next_sibling until element.nil? or element.kind_of? Element
412 return element
413 end
414
415 # Returns the previous sibling that is an element, or nil if there is
416 # no Element sibling prior to this one
417 # doc = Document.new '<a><b/>text<c/></a>'
418 # doc.root.elements['c'].previous_element #-> <b/>
419 # doc.root.elements['b'].previous_element #-> nil
420 def previous_element
421 element = previous_sibling
422 element = element.previous_sibling until element.nil? or element.kind_of? Element
423 return element
424 end
425
426
427 #################################################
428 # Text #
429 #################################################
430
431 # Evaluates to +true+ if this element has at least one Text child
432 def has_text?
433 not text().nil?
434 end
435
436 # A convenience method which returns the String value of the _first_
437 # child text element, if one exists, and +nil+ otherwise.
438 #
439 # <em>Note that an element may have multiple Text elements, perhaps
440 # separated by other children</em>. Be aware that this method only returns
441 # the first Text node.
442 #
443 # This method returns the +value+ of the first text child node, which
444 # ignores the +raw+ setting, so always returns normalized text. See
445 # the Text::value documentation.
446 #
447 # doc = Document.new "<p>some text <b>this is bold!</b> more text</p>"
448 # # The element 'p' has two text elements, "some text " and " more text".
449 # doc.root.text #-> "some text "
450 def text( path = nil )
451 rv = get_text(path)
452 return rv.value unless rv.nil?
453 nil
454 end
455
456 # Returns the first child Text node, if any, or +nil+ otherwise.
457 # This method returns the actual +Text+ node, rather than the String content.
458 # doc = Document.new "<p>some text <b>this is bold!</b> more text</p>"
459 # # The element 'p' has two text elements, "some text " and " more text".
460 # doc.root.get_text.value #-> "some text "
461 def get_text path = nil
462 rv = nil
463 if path
464 element = @elements[ path ]
465 rv = element.get_text unless element.nil?
466 else
467 rv = @children.find { |node| node.kind_of? Text }
468 end
469 return rv
470 end
471
472 # Sets the first Text child of this object. See text() for a
473 # discussion about Text children.
474 #
475 # If a Text child already exists, the child is replaced by this
476 # content. This means that Text content can be deleted by calling
477 # this method with a nil argument. In this case, the next Text
478 # child becomes the first Text child. In no case is the order of
479 # any siblings disturbed.
480 # text::
481 # If a String, a new Text child is created and added to
482 # this Element as the first Text child. If Text, the text is set
483 # as the first Child element. If nil, then any existing first Text
484 # child is removed.
485 # Returns:: this Element.
486 # doc = Document.new '<a><b/></a>'
487 # doc.root.text = 'Sean' #-> '<a><b/>Sean</a>'
488 # doc.root.text = 'Elliott' #-> '<a><b/>Elliott</a>'
489 # doc.root.add_element 'c' #-> '<a><b/>Elliott<c/></a>'
490 # doc.root.text = 'Russell' #-> '<a><b/>Russell<c/></a>'
491 # doc.root.text = nil #-> '<a><b/><c/></a>'
492 def text=( text )
493 if text.kind_of? String
494 text = Text.new( text, whitespace(), nil, raw() )
495 elsif !text.nil? and !text.kind_of? Text
496 text = Text.new( text.to_s, whitespace(), nil, raw() )
497 end
498 old_text = get_text
499 if text.nil?
500 old_text.remove unless old_text.nil?
501 else
502 if old_text.nil?
503 self << text
504 else
505 old_text.replace_with( text )
506 end
507 end
508 return self
509 end
510
511 # A helper method to add a Text child. Actual Text instances can
512 # be added with regular Parent methods, such as add() and <<()
513 # text::
514 # if a String, a new Text instance is created and added
515 # to the parent. If Text, the object is added directly.
516 # Returns:: this Element
517 # e = Element.new('a') #-> <e/>
518 # e.add_text 'foo' #-> <e>foo</e>
519 # e.add_text Text.new(' bar') #-> <e>foo bar</e>
520 # Note that at the end of this example, the branch has <b>3</b> nodes; the 'e'
521 # element and <b>2</b> Text node children.
522 def add_text( text )
523 if text.kind_of? String
524 if @children[-1].kind_of? Text
525 @children[-1] << text
526 return
527 end
528 text = Text.new( text, whitespace(), nil, raw() )
529 end
530 self << text unless text.nil?
531 return self
532 end
533
534 def node_type
535 :element
536 end
537
538 def xpath
539 path_elements = []
540 cur = self
541 path_elements << __to_xpath_helper( self )
542 while cur.parent
543 cur = cur.parent
544 path_elements << __to_xpath_helper( cur )
545 end
546 return path_elements.reverse.join( "/" )
547 end
548
549 #################################################
550 # Attributes #
551 #################################################
552
553 def attribute( name, namespace=nil )
554 prefix = nil
555 prefix = namespaces.index(namespace) if namespace
556 prefix = nil if prefix == 'xmlns'
557 attributes.get_attribute( "#{prefix ? prefix + ':' : ''}#{name}" )
558 end
559
560 # Evaluates to +true+ if this element has any attributes set, false
561 # otherwise.
562 def has_attributes?
563 return !@attributes.empty?
564 end
565
566 # Adds an attribute to this element, overwriting any existing attribute
567 # by the same name.
568 # key::
569 # can be either an Attribute or a String. If an Attribute,
570 # the attribute is added to the list of Element attributes. If String,
571 # the argument is used as the name of the new attribute, and the value
572 # parameter must be supplied.
573 # value::
574 # Required if +key+ is a String, and ignored if the first argument is
575 # an Attribute. This is a String, and is used as the value
576 # of the new Attribute. This should be the unnormalized value of the
577 # attribute (without entities).
578 # Returns:: the Attribute added
579 # e = Element.new 'e'
580 # e.add_attribute( 'a', 'b' ) #-> <e a='b'/>
581 # e.add_attribute( 'x:a', 'c' ) #-> <e a='b' x:a='c'/>
582 # e.add_attribute Attribute.new('b', 'd') #-> <e a='b' x:a='c' b='d'/>
583 def add_attribute( key, value=nil )
584 if key.kind_of? Attribute
585 @attributes << key
586 else
587 @attributes[key] = value
588 end
589 end
590
591 # Add multiple attributes to this element.
592 # hash:: is either a hash, or array of arrays
593 # el.add_attributes( {"name1"=>"value1", "name2"=>"value2"} )
594 # el.add_attributes( [ ["name1","value1"], ["name2"=>"value2"] ] )
595 def add_attributes hash
596 if hash.kind_of? Hash
597 hash.each_pair {|key, value| @attributes[key] = value }
598 elsif hash.kind_of? Array
599 hash.each { |value| @attributes[ value[0] ] = value[1] }
600 end
601 end
602
603 # Removes an attribute
604 # key::
605 # either an Attribute or a String. In either case, the
606 # attribute is found by matching the attribute name to the argument,
607 # and then removed. If no attribute is found, no action is taken.
608 # Returns::
609 # the attribute removed, or nil if this Element did not contain
610 # a matching attribute
611 # e = Element.new('E')
612 # e.add_attribute( 'name', 'Sean' ) #-> <E name='Sean'/>
613 # r = e.add_attribute( 'sur:name', 'Russell' ) #-> <E name='Sean' sur:name='Russell'/>
614 # e.delete_attribute( 'name' ) #-> <E sur:name='Russell'/>
615 # e.delete_attribute( r ) #-> <E/>
616 def delete_attribute(key)
617 attr = @attributes.get_attribute(key)
618 attr.remove unless attr.nil?
619 end
620
621 #################################################
622 # Other Utilities #
623 #################################################
624
625 # Get an array of all CData children.
626 # IMMUTABLE
627 def cdatas
628 find_all { |child| child.kind_of? CData }.freeze
629 end
630
631 # Get an array of all Comment children.
632 # IMMUTABLE
633 def comments
634 find_all { |child| child.kind_of? Comment }.freeze
635 end
636
637 # Get an array of all Instruction children.
638 # IMMUTABLE
639 def instructions
640 find_all { |child| child.kind_of? Instruction }.freeze
641 end
642
643 # Get an array of all Text children.
644 # IMMUTABLE
645 def texts
646 find_all { |child| child.kind_of? Text }.freeze
647 end
648
649 # == DEPRECATED
650 # See REXML::Formatters
651 #
652 # Writes out this element, and recursively, all children.
653 # output::
654 # output an object which supports '<< string'; this is where the
655 # document will be written.
656 # indent::
657 # An integer. If -1, no indenting will be used; otherwise, the
658 # indentation will be this number of spaces, and children will be
659 # indented an additional amount. Defaults to -1
660 # transitive::
661 # If transitive is true and indent is >= 0, then the output will be
662 # pretty-printed in such a way that the added whitespace does not affect
663 # the parse tree of the document
664 # ie_hack::
665 # Internet Explorer is the worst piece of crap to have ever been
666 # written, with the possible exception of Windows itself. Since IE is
667 # unable to parse proper XML, we have to provide a hack to generate XML
668 # that IE's limited abilities can handle. This hack inserts a space
669 # before the /> on empty tags. Defaults to false
670 #
671 # out = ''
672 # doc.write( out ) #-> doc is written to the string 'out'
673 # doc.write( $stdout ) #-> doc written to the console
674 def write(writer=$stdout, indent=-1, transitive=false, ie_hack=false)
675 Kernel.warn("#{self.class.name}.write is deprecated. See REXML::Formatters")
676 formatter = if indent > -1
677 if transitive
678 REXML::Formatters::Transitive.new( indent, ie_hack )
679 else
680 REXML::Formatters::Pretty.new( indent, ie_hack )
681 end
682 else
683 REXML::Formatters::Default.new( ie_hack )
684 end
685 formatter.write( self, output )
686 end
687
688
689 private
690 def __to_xpath_helper node
691 rv = node.expanded_name.clone
692 if node.parent
693 results = node.parent.find_all {|n|
694 n.kind_of?(REXML::Element) and n.expanded_name == node.expanded_name
695 }
696 if results.length > 1
697 idx = results.index( node )
698 rv << "[#{idx+1}]"
699 end
700 end
701 rv
702 end
703
704 # A private helper method
705 def each_with_something( test, max=0, name=nil )
706 num = 0
707 child=nil
708 @elements.each( name ){ |child|
709 yield child if test.call(child) and num += 1
710 return if max>0 and num == max
711 }
712 end
713 end
714
715 ########################################################################
716 # ELEMENTS #
717 ########################################################################
718
719 # A class which provides filtering of children for Elements, and
720 # XPath search support. You are expected to only encounter this class as
721 # the <tt>element.elements</tt> object. Therefore, you are
722 # _not_ expected to instantiate this yourself.
723 class Elements
724 include Enumerable
725 # Constructor
726 # parent:: the parent Element
727 def initialize parent
728 @element = parent
729 end
730
731 # Fetches a child element. Filters only Element children, regardless of
732 # the XPath match.
733 # index::
734 # the search parameter. This is either an Integer, which
735 # will be used to find the index'th child Element, or an XPath,
736 # which will be used to search for the Element. <em>Because
737 # of the nature of XPath searches, any element in the connected XML
738 # document can be fetched through any other element.</em> <b>The
739 # Integer index is 1-based, not 0-based.</b> This means that the first
740 # child element is at index 1, not 0, and the +n+th element is at index
741 # +n+, not <tt>n-1</tt>. This is because XPath indexes element children
742 # starting from 1, not 0, and the indexes should be the same.
743 # name::
744 # optional, and only used in the first argument is an
745 # Integer. In that case, the index'th child Element that has the
746 # supplied name will be returned. Note again that the indexes start at 1.
747 # Returns:: the first matching Element, or nil if no child matched
748 # doc = Document.new '<a><b/><c id="1"/><c id="2"/><d/></a>'
749 # doc.root.elements[1] #-> <b/>
750 # doc.root.elements['c'] #-> <c id="1"/>
751 # doc.root.elements[2,'c'] #-> <c id="2"/>
752 def []( index, name=nil)
753 if index.kind_of? Integer
754 raise "index (#{index}) must be >= 1" if index < 1
755 name = literalize(name) if name
756 num = 0
757 child = nil
758 @element.find { |child|
759 child.kind_of? Element and
760 (name.nil? ? true : child.has_name?( name )) and
761 (num += 1) == index
762 }
763 else
764 return XPath::first( @element, index )
765 #{ |element|
766 # return element if element.kind_of? Element
767 #}
768 #return nil
769 end
770 end
771
772 # Sets an element, replacing any previous matching element. If no
773 # existing element is found ,the element is added.
774 # index:: Used to find a matching element to replace. See []().
775 # element::
776 # The element to replace the existing element with
777 # the previous element
778 # Returns:: nil if no previous element was found.
779 #
780 # doc = Document.new '<a/>'
781 # doc.root.elements[10] = Element.new('b') #-> <a><b/></a>
782 # doc.root.elements[1] #-> <b/>
783 # doc.root.elements[1] = Element.new('c') #-> <a><c/></a>
784 # doc.root.elements['c'] = Element.new('d') #-> <a><d/></a>
785 def []=( index, element )
786 previous = self[index]
787 if previous.nil?
788 @element.add element
789 else
790 previous.replace_with element
791 end
792 return previous
793 end
794
795 # Returns +true+ if there are no +Element+ children, +false+ otherwise
796 def empty?
797 @element.find{ |child| child.kind_of? Element}.nil?
798 end
799
800 # Returns the index of the supplied child (starting at 1), or -1 if
801 # the element is not a child
802 # element:: an +Element+ child
803 def index element
804 rv = 0
805 found = @element.find do |child|
806 child.kind_of? Element and
807 (rv += 1) and
808 child == element
809 end
810 return rv if found == element
811 return -1
812 end
813
814 # Deletes a child Element
815 # element::
816 # Either an Element, which is removed directly; an
817 # xpath, where the first matching child is removed; or an Integer,
818 # where the n'th Element is removed.
819 # Returns:: the removed child
820 # doc = Document.new '<a><b/><c/><c id="1"/></a>'
821 # b = doc.root.elements[1]
822 # doc.root.elements.delete b #-> <a><c/><c id="1"/></a>
823 # doc.elements.delete("a/c[@id='1']") #-> <a><c/></a>
824 # doc.root.elements.delete 1 #-> <a/>
825 def delete element
826 if element.kind_of? Element
827 @element.delete element
828 else
829 el = self[element]
830 el.remove if el
831 end
832 end
833
834 # Removes multiple elements. Filters for Element children, regardless of
835 # XPath matching.
836 # xpath:: all elements matching this String path are removed.
837 # Returns:: an Array of Elements that have been removed
838 # doc = Document.new '<a><c/><c/><c/><c/></a>'
839 # deleted = doc.elements.delete_all 'a/c' #-> [<c/>, <c/>, <c/>, <c/>]
840 def delete_all( xpath )
841 rv = []
842 XPath::each( @element, xpath) {|element|
843 rv << element if element.kind_of? Element
844 }
845 rv.each do |element|
846 @element.delete element
847 element.remove
848 end
849 return rv
850 end
851
852 # Adds an element
853 # element::
854 # if supplied, is either an Element, String, or
855 # Source (see Element.initialize). If not supplied or nil, a
856 # new, default Element will be constructed
857 # Returns:: the added Element
858 # a = Element.new('a')
859 # a.elements.add(Element.new('b')) #-> <a><b/></a>
860 # a.elements.add('c') #-> <a><b/><c/></a>
861 def add element=nil
862 rv = nil
863 if element.nil?
864 Element.new("", self, @element.context)
865 elsif not element.kind_of?(Element)
866 Element.new(element, self, @element.context)
867 else
868 @element << element
869 element.context = @element.context
870 element
871 end
872 end
873
874 alias :<< :add
875
876 # Iterates through all of the child Elements, optionally filtering
877 # them by a given XPath
878 # xpath::
879 # optional. If supplied, this is a String XPath, and is used to
880 # filter the children, so that only matching children are yielded. Note
881 # that XPaths are automatically filtered for Elements, so that
882 # non-Element children will not be yielded
883 # doc = Document.new '<a><b/><c/><d/>sean<b/><c/><d/></a>'
884 # doc.root.each {|e|p e} #-> Yields b, c, d, b, c, d elements
885 # doc.root.each('b') {|e|p e} #-> Yields b, b elements
886 # doc.root.each('child::node()') {|e|p e}
887 # #-> Yields <b/>, <c/>, <d/>, <b/>, <c/>, <d/>
888 # XPath.each(doc.root, 'child::node()', &block)
889 # #-> Yields <b/>, <c/>, <d/>, sean, <b/>, <c/>, <d/>
890 def each( xpath=nil, &block)
891 XPath::each( @element, xpath ) {|e| yield e if e.kind_of? Element }
892 end
893
894 def collect( xpath=nil, &block )
895 collection = []
896 XPath::each( @element, xpath ) {|e|
897 collection << yield(e) if e.kind_of?(Element)
898 }
899 collection
900 end
901
902 def inject( xpath=nil, initial=nil, &block )
903 first = true
904 XPath::each( @element, xpath ) {|e|
905 if (e.kind_of? Element)
906 if (first and initial == nil)
907 initial = e
908 first = false
909 else
910 initial = yield( initial, e ) if e.kind_of? Element
911 end
912 end
913 }
914 initial
915 end
916
917 # Returns the number of +Element+ children of the parent object.
918 # doc = Document.new '<a>sean<b/>elliott<b/>russell<b/></a>'
919 # doc.root.size #-> 6, 3 element and 3 text nodes
920 # doc.root.elements.size #-> 3
921 def size
922 count = 0
923 @element.each {|child| count+=1 if child.kind_of? Element }
924 count
925 end
926
927 # Returns an Array of Element children. An XPath may be supplied to
928 # filter the children. Only Element children are returned, even if the
929 # supplied XPath matches non-Element children.
930 # doc = Document.new '<a>sean<b/>elliott<c/></a>'
931 # doc.root.elements.to_a #-> [ <b/>, <c/> ]
932 # doc.root.elements.to_a("child::node()") #-> [ <b/>, <c/> ]
933 # XPath.match(doc.root, "child::node()") #-> [ sean, <b/>, elliott, <c/> ]
934 def to_a( xpath=nil )
935 rv = XPath.match( @element, xpath )
936 return rv.find_all{|e| e.kind_of? Element} if xpath
937 rv
938 end
939
940 private
941 # Private helper class. Removes quotes from quoted strings
942 def literalize name
943 name = name[1..-2] if name[0] == ?' or name[0] == ?" #'
944 name
945 end
946 end
947
948 ########################################################################
949 # ATTRIBUTES #
950 ########################################################################
951
952 # A class that defines the set of Attributes of an Element and provides
953 # operations for accessing elements in that set.
954 class Attributes < Hash
955 # Constructor
956 # element:: the Element of which this is an Attribute
957 def initialize element
958 @element = element
959 end
960
961 # Fetches an attribute value. If you want to get the Attribute itself,
962 # use get_attribute()
963 # name:: an XPath attribute name. Namespaces are relevant here.
964 # Returns::
965 # the String value of the matching attribute, or +nil+ if no
966 # matching attribute was found. This is the unnormalized value
967 # (with entities expanded).
968 #
969 # doc = Document.new "<a foo:att='1' bar:att='2' att='<'/>"
970 # doc.root.attributes['att'] #-> '<'
971 # doc.root.attributes['bar:att'] #-> '2'
972 def [](name)
973 attr = get_attribute(name)
974 return attr.value unless attr.nil?
975 return nil
976 end
977
978 def to_a
979 values.flatten
980 end
981
982 # Returns the number of attributes the owning Element contains.
983 # doc = Document "<a x='1' y='2' foo:x='3'/>"
984 # doc.root.attributes.length #-> 3
985 def length
986 c = 0
987 each_attribute { c+=1 }
988 c
989 end
990 alias :size :length
991
992 # Iterates over the attributes of an Element. Yields actual Attribute
993 # nodes, not String values.
994 #
995 # doc = Document.new '<a x="1" y="2"/>'
996 # doc.root.attributes.each_attribute {|attr|
997 # p attr.expanded_name+" => "+attr.value
998 # }
999 def each_attribute # :yields: attribute
1000 each_value do |val|
1001 if val.kind_of? Attribute
1002 yield val
1003 else
1004 val.each_value { |atr| yield atr }
1005 end
1006 end
1007 end
1008
1009 # Iterates over each attribute of an Element, yielding the expanded name
1010 # and value as a pair of Strings.
1011 #
1012 # doc = Document.new '<a x="1" y="2"/>'
1013 # doc.root.attributes.each {|name, value| p name+" => "+value }
1014 def each
1015 each_attribute do |attr|
1016 yield attr.expanded_name, attr.value
1017 end
1018 end
1019
1020 # Fetches an attribute
1021 # name::
1022 # the name by which to search for the attribute. Can be a
1023 # <tt>prefix:name</tt> namespace name.
1024 # Returns:: The first matching attribute, or nil if there was none. This
1025 # value is an Attribute node, not the String value of the attribute.
1026 # doc = Document.new '<a x:foo="1" foo="2" bar="3"/>'
1027 # doc.root.attributes.get_attribute("foo").value #-> "2"
1028 # doc.root.attributes.get_attribute("x:foo").value #-> "1"
1029 def get_attribute( name )
1030 attr = fetch( name, nil )
1031 if attr.nil?
1032 return nil if name.nil?
1033 # Look for prefix
1034 name =~ Namespace::NAMESPLIT
1035 prefix, n = $1, $2
1036 if prefix
1037 attr = fetch( n, nil )
1038 # check prefix
1039 if attr == nil
1040 elsif attr.kind_of? Attribute
1041 return attr if prefix == attr.prefix
1042 else
1043 attr = attr[ prefix ]
1044 return attr
1045 end
1046 end
1047 element_document = @element.document
1048 if element_document and element_document.doctype
1049 expn = @element.expanded_name
1050 expn = element_document.doctype.name if expn.size == 0
1051 attr_val = element_document.doctype.attribute_of(expn, name)
1052 return Attribute.new( name, attr_val ) if attr_val
1053 end
1054 return nil
1055 end
1056 if attr.kind_of? Hash
1057 attr = attr[ @element.prefix ]
1058 end
1059 return attr
1060 end
1061
1062 # Sets an attribute, overwriting any existing attribute value by the
1063 # same name. Namespace is significant.
1064 # name:: the name of the attribute
1065 # value::
1066 # (optional) If supplied, the value of the attribute. If
1067 # nil, any existing matching attribute is deleted.
1068 # Returns::
1069 # Owning element
1070 # doc = Document.new "<a x:foo='1' foo='3'/>"
1071 # doc.root.attributes['y:foo'] = '2'
1072 # doc.root.attributes['foo'] = '4'
1073 # doc.root.attributes['x:foo'] = nil
1074 def []=( name, value )
1075 if value.nil? # Delete the named attribute
1076 attr = get_attribute(name)
1077 delete attr
1078 return
1079 end
1080 element_document = @element.document
1081 unless value.kind_of? Attribute
1082 if @element.document and @element.document.doctype
1083 value = Text::normalize( value, @element.document.doctype )
1084 else
1085 value = Text::normalize( value, nil )
1086 end
1087 value = Attribute.new(name, value)
1088 end
1089 value.element = @element
1090 old_attr = fetch(value.name, nil)
1091 if old_attr.nil?
1092 store(value.name, value)
1093 elsif old_attr.kind_of? Hash
1094 old_attr[value.prefix] = value
1095 elsif old_attr.prefix != value.prefix
1096 # Check for conflicting namespaces
1097 raise ParseException.new(
1098 "Namespace conflict in adding attribute \"#{value.name}\": "+
1099 "Prefix \"#{old_attr.prefix}\" = "+
1100 "\"#{@element.namespace(old_attr.prefix)}\" and prefix "+
1101 "\"#{value.prefix}\" = \"#{@element.namespace(value.prefix)}\"") if
1102 value.prefix != "xmlns" and old_attr.prefix != "xmlns" and
1103 @element.namespace( old_attr.prefix ) ==
1104 @element.namespace( value.prefix )
1105 store value.name, { old_attr.prefix => old_attr,
1106 value.prefix => value }
1107 else
1108 store value.name, value
1109 end
1110 return @element
1111 end
1112
1113 # Returns an array of Strings containing all of the prefixes declared
1114 # by this set of # attributes. The array does not include the default
1115 # namespace declaration, if one exists.
1116 # doc = Document.new("<a xmlns='foo' xmlns:x='bar' xmlns:y='twee' "+
1117 # "z='glorp' p:k='gru'/>")
1118 # prefixes = doc.root.attributes.prefixes #-> ['x', 'y']
1119 def prefixes
1120 ns = []
1121 each_attribute do |attribute|
1122 ns << attribute.name if attribute.prefix == 'xmlns'
1123 end
1124 if @element.document and @element.document.doctype
1125 expn = @element.expanded_name
1126 expn = @element.document.doctype.name if expn.size == 0
1127 @element.document.doctype.attributes_of(expn).each {
1128 |attribute|
1129 ns << attribute.name if attribute.prefix == 'xmlns'
1130 }
1131 end
1132 ns
1133 end
1134
1135 def namespaces
1136 namespaces = {}
1137 each_attribute do |attribute|
1138 namespaces[attribute.name] = attribute.value if attribute.prefix == 'xmlns' or attribute.name == 'xmlns'
1139 end
1140 if @element.document and @element.document.doctype
1141 expn = @element.expanded_name
1142 expn = @element.document.doctype.name if expn.size == 0
1143 @element.document.doctype.attributes_of(expn).each {
1144 |attribute|
1145 namespaces[attribute.name] = attribute.value if attribute.prefix == 'xmlns' or attribute.name == 'xmlns'
1146 }
1147 end
1148 namespaces
1149 end
1150
1151 # Removes an attribute
1152 # attribute::
1153 # either a String, which is the name of the attribute to remove --
1154 # namespaces are significant here -- or the attribute to remove.
1155 # Returns:: the owning element
1156 # doc = Document.new "<a y:foo='0' x:foo='1' foo='3' z:foo='4'/>"
1157 # doc.root.attributes.delete 'foo' #-> <a y:foo='0' x:foo='1' z:foo='4'/>"
1158 # doc.root.attributes.delete 'x:foo' #-> <a y:foo='0' z:foo='4'/>"
1159 # attr = doc.root.attributes.get_attribute('y:foo')
1160 # doc.root.attributes.delete attr #-> <a z:foo='4'/>"
1161 def delete( attribute )
1162 name = nil
1163 prefix = nil
1164 if attribute.kind_of? Attribute
1165 name = attribute.name
1166 prefix = attribute.prefix
1167 else
1168 attribute =~ Namespace::NAMESPLIT
1169 prefix, name = $1, $2
1170 prefix = '' unless prefix
1171 end
1172 old = fetch(name, nil)
1173 attr = nil
1174 if old.kind_of? Hash # the supplied attribute is one of many
1175 attr = old.delete(prefix)
1176 if old.size == 1
1177 repl = nil
1178 old.each_value{|v| repl = v}
1179 store name, repl
1180 end
1181 elsif old.nil?
1182 return @element
1183 else # the supplied attribute is a top-level one
1184 attr = old
1185 res = super(name)
1186 end
1187 @element
1188 end
1189
1190 # Adds an attribute, overriding any existing attribute by the
1191 # same name. Namespaces are significant.
1192 # attribute:: An Attribute
1193 def add( attribute )
1194 self[attribute.name] = attribute
1195 end
1196
1197 alias :<< :add
1198
1199 # Deletes all attributes matching a name. Namespaces are significant.
1200 # name::
1201 # A String; all attributes that match this path will be removed
1202 # Returns:: an Array of the Attributes that were removed
1203 def delete_all( name )
1204 rv = []
1205 each_attribute { |attribute|
1206 rv << attribute if attribute.expanded_name == name
1207 }
1208 rv.each{ |attr| attr.remove }
1209 return rv
1210 end
1211
1212 # The +get_attribute_ns+ method retrieves a method by its namespace
1213 # and name. Thus it is possible to reliably identify an attribute
1214 # even if an XML processor has changed the prefix.
1215 #
1216 # Method contributed by Henrik Martensson
1217 def get_attribute_ns(namespace, name)
1218 each_attribute() { |attribute|
1219 if name == attribute.name &&
1220 namespace == attribute.namespace()
1221 return attribute
1222 end
1223 }
1224 nil
1225 end
1226 end
1227 end