1 require 'rexml/formatters/default'
2
3 module REXML
4 module Formatters
5 # Pretty-prints an XML document. This destroys whitespace in text nodes
6 # and will insert carriage returns and indentations.
7 #
8 # TODO: Add an option to print attributes on new lines
9 class Pretty < Default
10
11 # If compact is set to true, then the formatter will attempt to use as
12 # little space as possible
13 attr_accessor :compact
14 # The width of a page. Used for formatting text
15 attr_accessor :width
16
17 # Create a new pretty printer.
18 #
19 # output::
20 # An object implementing '<<(String)', to which the output will be written.
21 # indentation::
22 # An integer greater than 0. The indentation of each level will be
23 # this number of spaces. If this is < 1, the behavior of this object
24 # is undefined. Defaults to 2.
25 # ie_hack::
26 # If true, the printer will insert whitespace before closing empty
27 # tags, thereby allowing Internet Explorer's feeble XML parser to
28 # function. Defaults to false.
29 def initialize( indentation=2, ie_hack=false )
30 @indentation = indentation
31 @level = 0
32 @ie_hack = ie_hack
33 @width = 80
34 end
35
36 protected
37 def write_element(node, output)
38 output << ' '*@level
39 output << "<#{node.expanded_name}"
40
41 node.attributes.each_attribute do |attr|
42 output << " "
43 attr.write( output )
44 end unless node.attributes.empty?
45
46 if node.children.empty?
47 if @ie_hack
48 output << " "
49 end
50 output << "/"
51 else
52 output << ">"
53 # If compact and all children are text, and if the formatted output
54 # is less than the specified width, then try to print everything on
55 # one line
56 skip = false
57 if compact
58 if node.children.inject(true) {|s,c| s & c.kind_of?(Text)}
59 string = ""
60 old_level = @level
61 @level = 0
62 node.children.each { |child| write( child, string ) }
63 @level = old_level
64 if string.length < @width
65 output << string
66 skip = true
67 end
68 end
69 end
70 unless skip
71 output << "\n"
72 @level += @indentation
73 node.children.each { |child|
74 next if child.kind_of?(Text) and child.to_s.strip.length == 0
75 write( child, output )
76 output << "\n"
77 }
78 @level -= @indentation
79 output << ' '*@level
80 end
81 output << "</#{node.expanded_name}"
82 end
83 output << ">"
84 end
85
86 def write_text( node, output )
87 s = node.to_s()
88 s.gsub!(/\s/,' ')
89 s.squeeze!(" ")
90 s = wrap(s, 80-@level)
91 s = indent_text(s, @level, " ", true)
92 output << (' '*@level + s)
93 end
94
95 def write_comment( node, output)
96 output << ' ' * @level
97 super
98 end
99
100 def write_cdata( node, output)
101 output << ' ' * @level
102 super
103 end
104
105 def write_document( node, output )
106 # Ok, this is a bit odd. All XML documents have an XML declaration,
107 # but it may not write itself if the user didn't specifically add it,
108 # either through the API or in the input document. If it doesn't write
109 # itself, then we don't need a carriage return... which makes this
110 # logic more complex.
111 node.children.each { |child|
112 next if child == node.children[-1] and child.instance_of?(Text)
113 unless child == node.children[0] or child.instance_of?(Text) or
114 (child == node.children[1] and !node.children[0].writethis)
115 output << "\n"
116 end
117 write( child, output )
118 }
119 end
120
121 private
122 def indent_text(string, level=1, style="\t", indentfirstline=true)
123 return string if level < 0
124 string.gsub(/\n/, "\n#{style*level}")
125 end
126
127 def wrap(string, width)
128 # Recursively wrap string at width.
129 return string if string.length <= width
130 place = string.rindex(' ', width) # Position in string with last ' ' before cutoff
131 return string if place.nil?
132 return string[0,place] + "\n" + wrap(string[place+1..-1], width)
133 end
134
135 end
136 end
137 end
138