File: rexml/parsers/baseparser.rb

File: rexml/parsers/baseparser.rb
Prev	Chapter 4. Sources	Next

Overview

Module Structure
Class Hierarchy

Module Structure

module: <Toplevel Module>

module: REXML — #6

module: Parsers — #7

class: BaseParser — #27

inherits from

Object ( Builtin-Module )

has properties

constant:	`NCNAME_STR`	—	#28
constant:	`NAME_STR`	—	#29
constant:	`UNAME_STR`	—	#30
constant:	`NAMECHAR`	—	#32
constant:	`NAME`	—	#33
constant:	`NMTOKEN`	—	#34
constant:	`NMTOKENS`	—	#35
constant:	`REFERENCE`	—	#36
constant:	`REFERENCE_RE`	—	#37
constant:	`DOCTYPE_START`	—	#39
constant:	`DOCTYPE_PATTERN`	—	#40
constant:	`ATTRIBUTE_PATTERN`	—	#41
constant:	`COMMENT_START`	—	#42
constant:	`COMMENT_PATTERN`	—	#43
constant:	`CDATA_START`	—	#44
constant:	`CDATA_END`	—	#45
constant:	`CDATA_PATTERN`	—	#46
constant:	`XMLDECL_START`	—	#47
constant:	`XMLDECL_PATTERN`	—	#48
constant:	`INSTRUCTION_START`	—	#49
constant:	`INSTRUCTION_PATTERN`	—	#50
constant:	`TAG_MATCH`	—	#51
constant:	`CLOSE_MATCH`	—	#52
constant:	`VERSION`	—	#54
constant:	`ENCODING`	—	#55
constant:	`STANDALONE`	—	#56
constant:	`ENTITY_START`	—	#58
constant:	`IDENTITY`	—	#59
constant:	`ELEMENTDECL_START`	—	#60
constant:	`ELEMENTDECL_PATTERN`	—	#61
constant:	`SYSTEMENTITY`	—	#62
constant:	`ENUMERATION`	—	#63
constant:	`NOTATIONTYPE`	—	#64
constant:	`ENUMERATEDTYPE`	—	#65
constant:	`ATTTYPE`	—	#66
constant:	`ATTVALUE`	—	#67
constant:	`DEFAULTDECL`	—	#68
constant:	`ATTDEF`	—	#69
constant:	`ATTDEF_RE`	—	#70
constant:	`ATTLISTDECL_START`	—	#71
constant:	`ATTLISTDECL_PATTERN`	—	#72
constant:	`NOTATIONDECL_START`	—	#73
constant:	`PUBLIC`	—	#74
constant:	`SYSTEM`	—	#75
constant:	`TEXT_PATTERN`	—	#77
constant:	`PUBIDCHAR`	—	#80
constant:	`SYSTEMLITERAL`	—	#81
constant:	`PUBIDLITERAL`	—	#82
constant:	`EXTERNALID`	—	#83
constant:	`NDATADECL`	—	#84
constant:	`PEREFERENCE`	—	#85
constant:	`ENTITYVALUE`	—	#86
constant:	`PEDEF`	—	#87
constant:	`ENTITYDEF`	—	#88
constant:	`PEDECL`	—	#89
constant:	`GEDECL`	—	#90
constant:	`ENTITYDECL`	—	#91
constant:	`EREFERENCE`	—	#93
constant:	`DEFAULT_ENTITIES`	—	#95
constant:	`MISSING_ATTRIBUTE_QUOTES`	—	#107
method:	`initialize` / 1	—	#109
method:	`add_listener` / 1	—	#113
attribute:	`source` [R]	—	#130
method:	`stream=` / 1	—	#132
method:	`position`	—	#142
method:	`empty?`	—	#152
method:	`has_next?`	—	#157
method:	`unshift`	—	#163
method:	`peek`	—	#173
method:	`pull`	—	#188
method:	`entity` / 2	—	#423
method:	`normalize` / 3	—	#434
method:	`unnormalize` / 3	—	#450

Class Hierarchy

Object ( Builtin-Module )

BaseParser ( REXML::Parsers ) — #27

Code

   1  require 'rexml/parseexception'
   2  require 'rexml/undefinednamespaceexception'
   3  require 'rexml/source'
   4  require 'set'
   5 
   6  module REXML
   7    module Parsers
   8      # = Using the Pull Parser
   9      # <em>This API is experimental, and subject to change.</em>
  10      #  parser = PullParser.new( "<a>text<b att='val'/>txet</a>" )
  11      #  while parser.has_next?
  12      #    res = parser.next
  13      #    puts res[1]['att'] if res.start_tag? and res[0] == 'b'
  14      #  end
  15      # See the PullEvent class for information on the content of the results.
  16      # The data is identical to the arguments passed for the various events to
  17      # the StreamListener API.
  18      #
  19      # Notice that:
  20      #  parser = PullParser.new( "<a>BAD DOCUMENT" )
  21      #  while parser.has_next?
  22      #    res = parser.next
  23      #    raise res[1] if res.error?
  24      #  end
  25      #
  26      # Nat Price gave me some good ideas for the API.
  27      class BaseParser
  28        NCNAME_STR= '[\w:][\-\w\d.]*'
  29        NAME_STR= "(?:(#{NCNAME_STR}):)?(#{NCNAME_STR})"
  30        UNAME_STR= "(?:#{NCNAME_STR}:)?#{NCNAME_STR}"
  31 
  32        NAMECHAR = '[\-\w\d\.:]'
  33        NAME = "([\\w:]#{NAMECHAR}*)"
  34        NMTOKEN = "(?:#{NAMECHAR})+"
  35        NMTOKENS = "#{NMTOKEN}(\\s+#{NMTOKEN})*"
  36        REFERENCE = "(?:&#{NAME};|&#\\d+;|&#x[0-9a-fA-F]+;)"
  37        REFERENCE_RE = /#{REFERENCE}/
  38 
  39        DOCTYPE_START = /\A\s*<!DOCTYPE\s/um
  40        DOCTYPE_PATTERN = /\s*<!DOCTYPE\s+(.*?)(\[|>)/um
  41        ATTRIBUTE_PATTERN = /\s*(#{NAME_STR})\s*=\s*(["'])(.*?)\4/um
  42        COMMENT_START = /\A<!--/u
  43        COMMENT_PATTERN = /<!--(.*?)-->/um
  44        CDATA_START = /\A<!\[CDATA\[/u
  45        CDATA_END = /^\s*\]\s*>/um
  46        CDATA_PATTERN = /<!\[CDATA\[(.*?)\]\]>/um
  47        XMLDECL_START = /\A<\?xml\s/u;
  48        XMLDECL_PATTERN = /<\?xml\s+(.*?)\?>/um
  49        INSTRUCTION_START = /\A<\?/u
  50        INSTRUCTION_PATTERN = /<\?(.*?)(\s+.*?)?\?>/um
  51        TAG_MATCH = /^<((?>#{NAME_STR}))\s*((?>\s+#{UNAME_STR}\s*=\s*(["']).*?\5)*)\s*(\/)?>/um
  52        CLOSE_MATCH = /^\s*<\/(#{NAME_STR})\s*>/um
  53 
  54        VERSION = /\bversion\s*=\s*["'](.*?)['"]/um
  55        ENCODING = /\bencoding\s*=\s*["'](.*?)['"]/um
  56        STANDALONE = /\bstandalone\s*=\s["'](.*?)['"]/um
  57 
  58        ENTITY_START = /^\s*<!ENTITY/
  59        IDENTITY = /^([!\*\w\-]+)(\s+#{NCNAME_STR})?(\s+["'](.*?)['"])?(\s+['"](.*?)["'])?/u
  60        ELEMENTDECL_START = /^\s*<!ELEMENT/um
  61        ELEMENTDECL_PATTERN = /^\s*(<!ELEMENT.*?)>/um
  62        SYSTEMENTITY = /^\s*(%.*?;)\s*$/um
  63        ENUMERATION = "\\(\\s*#{NMTOKEN}(?:\\s*\\|\\s*#{NMTOKEN})*\\s*\\)"
  64        NOTATIONTYPE = "NOTATION\\s+\\(\\s*#{NAME}(?:\\s*\\|\\s*#{NAME})*\\s*\\)"
  65        ENUMERATEDTYPE = "(?:(?:#{NOTATIONTYPE})|(?:#{ENUMERATION}))"
  66        ATTTYPE = "(CDATA|ID|IDREF|IDREFS|ENTITY|ENTITIES|NMTOKEN|NMTOKENS|#{ENUMERATEDTYPE})"
  67        ATTVALUE = "(?:\"((?:[^<&\"]|#{REFERENCE})*)\")|(?:'((?:[^<&']|#{REFERENCE})*)')"
  68        DEFAULTDECL = "(#REQUIRED|#IMPLIED|(?:(#FIXED\\s+)?#{ATTVALUE}))"
  69        ATTDEF = "\\s+#{NAME}\\s+#{ATTTYPE}\\s+#{DEFAULTDECL}"
  70        ATTDEF_RE = /#{ATTDEF}/
  71        ATTLISTDECL_START = /^\s*<!ATTLIST/um
  72        ATTLISTDECL_PATTERN = /^\s*<!ATTLIST\s+#{NAME}(?:#{ATTDEF})*\s*>/um
  73        NOTATIONDECL_START = /^\s*<!NOTATION/um
  74        PUBLIC = /^\s*<!NOTATION\s+(\w[\-\w]*)\s+(PUBLIC)\s+(["'])(.*?)\3(?:\s+(["'])(.*?)\5)?\s*>/um
  75        SYSTEM = /^\s*<!NOTATION\s+(\w[\-\w]*)\s+(SYSTEM)\s+(["'])(.*?)\3\s*>/um
  76 
  77        TEXT_PATTERN = /\A([^<]*)/um
  78 
  79        # Entity constants
  80        PUBIDCHAR = "\x20\x0D\x0Aa-zA-Z0-9\\-()+,./:=?;!*@$_%#"
  81        SYSTEMLITERAL = %Q{((?:"[^"]*")|(?:'[^']*'))}
  82        PUBIDLITERAL = %Q{("[#{PUBIDCHAR}']*"|'[#{PUBIDCHAR}]*')}
  83        EXTERNALID = "(?:(?:(SYSTEM)\\s+#{SYSTEMLITERAL})|(?:(PUBLIC)\\s+#{PUBIDLITERAL}\\s+#{SYSTEMLITERAL}))"
  84        NDATADECL = "\\s+NDATA\\s+#{NAME}"
  85        PEREFERENCE = "%#{NAME};"
  86        ENTITYVALUE = %Q{((?:"(?:[^%&"]|#{PEREFERENCE}|#{REFERENCE})*")|(?:'([^%&']|#{PEREFERENCE}|#{REFERENCE})*'))}
  87        PEDEF = "(?:#{ENTITYVALUE}|#{EXTERNALID})"
  88        ENTITYDEF = "(?:#{ENTITYVALUE}|(?:#{EXTERNALID}(#{NDATADECL})?))"
  89        PEDECL = "<!ENTITY\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
  90        GEDECL = "<!ENTITY\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
  91        ENTITYDECL = /\s*(?:#{GEDECL})|(?:#{PEDECL})/um
  92 
  93        EREFERENCE = /&(?!#{NAME};)/
  94 
  95        DEFAULT_ENTITIES = { 
  96          'gt' => [/&gt;/, '&gt;', '>', />/], 
  97          'lt' => [/&lt;/, '&lt;', '<', /</], 
  98          'quot' => [/&quot;/, '&quot;', '"', /"/], 
  99          "apos" => [/&apos;/, "&apos;", "'", /'/] 
 100        }
 101 
 102 
 103        ######################################################################
 104        # These are patterns to identify common markup errors, to make the
 105        # error messages more informative.
 106        ######################################################################
 107        MISSING_ATTRIBUTE_QUOTES = /^<#{NAME_STR}\s+#{NAME_STR}\s*=\s*[^"']/um
 108 
 109        def initialize( source )
 110          self.stream = source
 111        end
 112 
 113        def add_listener( listener )
 114          if !defined?(@listeners) or !@listeners
 115            @listeners = []
 116            instance_eval <<-EOL
 117              alias :_old_pull :pull
 118              def pull
 119                event = _old_pull
 120                @listeners.each do |listener|
 121                  listener.receive event
 122                end
 123                event
 124              end
 125            EOL
 126          end
 127          @listeners << listener
 128        end
 129 
 130        attr_reader :source
 131 
 132        def stream=( source )
 133          @source = SourceFactory.create_from( source )
 134          @closed = nil
 135          @document_status = nil
 136          @tags = []
 137          @stack = []
 138          @entities = []
 139          @nsstack = []
 140        end
 141 
 142        def position
 143          if @source.respond_to? :position
 144            @source.position
 145          else
 146            # FIXME
 147            0
 148          end
 149        end
 150 
 151        # Returns true if there are no more events
 152        def empty?
 153          return (@source.empty? and @stack.empty?)
 154        end
 155 
 156        # Returns true if there are more events.  Synonymous with !empty?
 157        def has_next?
 158          return !(@source.empty? and @stack.empty?)
 159        end
 160 
 161        # Push an event back on the head of the stream.  This method
 162        # has (theoretically) infinite depth.
 163        def unshift token
 164          @stack.unshift(token)
 165        end
 166 
 167        # Peek at the +depth+ event in the stack.  The first element on the stack
 168        # is at depth 0.  If +depth+ is -1, will parse to the end of the input
 169        # stream and return the last event, which is always :end_document.
 170        # Be aware that this causes the stream to be parsed up to the +depth+ 
 171        # event, so you can effectively pre-parse the entire document (pull the 
 172        # entire thing into memory) using this method.  
 173        def peek depth=0
 174          raise %Q[Illegal argument "#{depth}"] if depth < -1
 175          temp = []
 176          if depth == -1
 177            temp.push(pull()) until empty?
 178          else
 179            while @stack.size+temp.size < depth+1
 180              temp.push(pull())
 181            end
 182          end
 183          @stack += temp if temp.size > 0
 184          @stack[depth]
 185        end
 186 
 187        # Returns the next event.  This is a +PullEvent+ object.
 188        def pull
 189          if @closed
 190            x, @closed = @closed, nil
 191            return [ :end_element, x ]
 192          end
 193          return [ :end_document ] if empty?
 194          return @stack.shift if @stack.size > 0
 195          #STDERR.puts @source.encoding
 196          @source.read if @source.buffer.size<2
 197          #STDERR.puts "BUFFER = #{@source.buffer.inspect}"
 198          if @document_status == nil
 199            #@source.consume( /^\s*/um )
 200            word = @source.match( /^((?:\s+)|(?:<[^>]*>))/um )
 201            word = word[1] unless word.nil?
 202            #STDERR.puts "WORD = #{word.inspect}"
 203            case word
 204            when COMMENT_START
 205              return [ :comment, @source.match( COMMENT_PATTERN, true )[1] ]
 206            when XMLDECL_START
 207              #STDERR.puts "XMLDECL"
 208              results = @source.match( XMLDECL_PATTERN, true )[1]
 209              version = VERSION.match( results )
 210              version = version[1] unless version.nil?
 211              encoding = ENCODING.match(results)
 212              encoding = encoding[1] unless encoding.nil?
 213              @source.encoding = encoding
 214              standalone = STANDALONE.match(results)
 215              standalone = standalone[1] unless standalone.nil?
 216              return [ :xmldecl, version, encoding, standalone ]
 217            when INSTRUCTION_START
 218              return [ :processing_instruction, *@source.match(INSTRUCTION_PATTERN, true)[1,2] ]
 219            when DOCTYPE_START
 220              md = @source.match( DOCTYPE_PATTERN, true )
 221              @nsstack.unshift(curr_ns=Set.new)
 222              identity = md[1]
 223              close = md[2]
 224              identity =~ IDENTITY
 225              name = $1
 226              raise REXML::ParseException.new("DOCTYPE is missing a name") if name.nil?
 227              pub_sys = $2.nil? ? nil : $2.strip
 228              long_name = $4.nil? ? nil : $4.strip
 229              uri = $6.nil? ? nil : $6.strip
 230              args = [ :start_doctype, name, pub_sys, long_name, uri ]
 231              if close == ">"
 232                @document_status = :after_doctype
 233                @source.read if @source.buffer.size<2
 234                md = @source.match(/^\s*/um, true)
 235                @stack << [ :end_doctype ]
 236              else
 237                @document_status = :in_doctype
 238              end
 239              return args
 240            when /^\s+/
 241            else
 242              @document_status = :after_doctype
 243              @source.read if @source.buffer.size<2
 244              md = @source.match(/\s*/um, true)
 245            end
 246          end
 247          if @document_status == :in_doctype
 248            md = @source.match(/\s*(.*?>)/um)
 249            case md[1]
 250            when SYSTEMENTITY 
 251              match = @source.match( SYSTEMENTITY, true )[1]
 252              return [ :externalentity, match ]
 253 
 254            when ELEMENTDECL_START
 255              return [ :elementdecl, @source.match( ELEMENTDECL_PATTERN, true )[1] ]
 256 
 257            when ENTITY_START
 258              match = @source.match( ENTITYDECL, true ).to_a.compact
 259              match[0] = :entitydecl
 260              ref = false
 261              if match[1] == '%'
 262                ref = true
 263                match.delete_at 1
 264              end
 265              # Now we have to sort out what kind of entity reference this is
 266              if match[2] == 'SYSTEM'
 267                # External reference
 268                match[3] = match[3][1..-2] # PUBID
 269                match.delete_at(4) if match.size > 4 # Chop out NDATA decl
 270                # match is [ :entity, name, SYSTEM, pubid(, ndata)? ]
 271              elsif match[2] == 'PUBLIC'
 272                # External reference
 273                match[3] = match[3][1..-2] # PUBID
 274                match[4] = match[4][1..-2] # HREF
 275                # match is [ :entity, name, PUBLIC, pubid, href ]
 276              else
 277                match[2] = match[2][1..-2]
 278                match.pop if match.size == 4
 279                # match is [ :entity, name, value ]
 280              end
 281              match << '%' if ref
 282              return match
 283            when ATTLISTDECL_START
 284              md = @source.match( ATTLISTDECL_PATTERN, true )
 285              raise REXML::ParseException.new( "Bad ATTLIST declaration!", @source ) if md.nil?
 286              element = md[1]
 287              contents = md[0]
 288 
 289              pairs = {}
 290              values = md[0].scan( ATTDEF_RE )
 291              values.each do |attdef|
 292                unless attdef[3] == "#IMPLIED"
 293                  attdef.compact!
 294                  val = attdef[3]
 295                  val = attdef[4] if val == "#FIXED "
 296                  pairs[attdef[0]] = val
 297                  if attdef[0] =~ /^xmlns:(.*)/
 298                    @nsstack[0] << $1
 299                  end
 300                end
 301              end
 302              return [ :attlistdecl, element, pairs, contents ]
 303            when NOTATIONDECL_START
 304              md = nil
 305              if @source.match( PUBLIC )
 306                md = @source.match( PUBLIC, true )
 307                vals = [md[1],md[2],md[4],md[6]]
 308              elsif @source.match( SYSTEM )
 309                md = @source.match( SYSTEM, true )
 310                vals = [md[1],md[2],nil,md[4]]
 311              else
 312                raise REXML::ParseException.new( "error parsing notation: no matching pattern", @source )
 313              end
 314              return [ :notationdecl, *vals ]
 315            when CDATA_END
 316              @document_status = :after_doctype
 317              @source.match( CDATA_END, true )
 318              return [ :end_doctype ]
 319            end
 320          end
 321          begin
 322            if @source.buffer[0] == ?<
 323              if @source.buffer[1] == ?/
 324                @nsstack.shift
 325                last_tag = @tags.pop
 326                #md = @source.match_to_consume( '>', CLOSE_MATCH)
 327                md = @source.match( CLOSE_MATCH, true )
 328                raise REXML::ParseException.new( "Missing end tag for "+
 329                  "'#{last_tag}' (got \"#{md[1]}\")", 
 330                  @source) unless last_tag == md[1]
 331                return [ :end_element, last_tag ]
 332              elsif @source.buffer[1] == ?!
 333                md = @source.match(/\A(\s*[^>]*>)/um)
 334                #STDERR.puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}"
 335                raise REXML::ParseException.new("Malformed node", @source) unless md
 336                if md[0][2] == ?-
 337                  md = @source.match( COMMENT_PATTERN, true )
 338                  return [ :comment, md[1] ] if md
 339                else
 340                  md = @source.match( CDATA_PATTERN, true )
 341                  return [ :cdata, md[1] ] if md
 342                end
 343                raise REXML::ParseException.new( "Declarations can only occur "+
 344                  "in the doctype declaration.", @source)
 345              elsif @source.buffer[1] == ??
 346                md = @source.match( INSTRUCTION_PATTERN, true )
 347                return [ :processing_instruction, md[1], md[2] ] if md
 348                raise REXML::ParseException.new( "Bad instruction declaration",
 349                  @source)
 350              else
 351                # Get the next tag
 352                md = @source.match(TAG_MATCH, true)
 353                unless md
 354                  # Check for missing attribute quotes
 355                  raise REXML::ParseException.new("missing attribute quote", @source) if @source.match(MISSING_ATTRIBUTE_QUOTES )
 356                  raise REXML::ParseException.new("malformed XML: missing tag start", @source) 
 357                end
 358                attributes = {}
 359                prefixes = Set.new
 360                prefixes << md[2] if md[2]
 361                @nsstack.unshift(curr_ns=Set.new)
 362                if md[4].size > 0
 363                  attrs = md[4].scan( ATTRIBUTE_PATTERN )
 364                  raise REXML::ParseException.new( "error parsing attributes: [#{attrs.join ', '}], excess = \"#$'\"", @source) if $' and $'.strip.size > 0
 365                  attrs.each { |a,b,c,d,e| 
 366                    if b == "xmlns"
 367                      if c == "xml"
 368                        if d != "http://www.w3.org/XML/1998/namespace"
 369                          msg = "The 'xml' prefix must not be bound to any other namespace "+
 370                          "(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
 371                          raise REXML::ParseException.new( msg, @source, self )
 372                        end
 373                      elsif c == "xmlns"
 374                        msg = "The 'xmlns' prefix must not be declared "+
 375                        "(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
 376                        raise REXML::ParseException.new( msg, @source, self)
 377                      end
 378                      curr_ns << c
 379                    elsif b
 380                      prefixes << b unless b == "xml"
 381                    end
 382                    attributes[a] = e 
 383                  }
 384                end
 385          
 386                # Verify that all of the prefixes have been defined
 387                for prefix in prefixes
 388                  unless @nsstack.find{|k| k.member?(prefix)}
 389                    raise UndefinedNamespaceException.new(prefix,@source,self)
 390                  end
 391                end
 392 
 393                if md[6]
 394                  @closed = md[1]
 395                  @nsstack.shift
 396                else
 397                  @tags.push( md[1] )
 398                end
 399                return [ :start_element, md[1], attributes ]
 400              end
 401            else
 402              md = @source.match( TEXT_PATTERN, true )
 403              if md[0].length == 0
 404                @source.match( /(\s+)/, true )
 405              end
 406              #STDERR.puts "GOT #{md[1].inspect}" unless md[0].length == 0
 407              #return [ :text, "" ] if md[0].length == 0
 408              # unnormalized = Text::unnormalize( md[1], self )
 409              # return PullEvent.new( :text, md[1], unnormalized )
 410              return [ :text, md[1] ]
 411            end
 412          rescue REXML::UndefinedNamespaceException
 413            raise
 414          rescue REXML::ParseException
 415            raise
 416          rescue Exception, NameError => error
 417            raise REXML::ParseException.new( "Exception parsing",
 418              @source, self, (error ? error : $!) )
 419          end
 420          return [ :dummy ]
 421        end
 422 
 423        def entity( reference, entities )
 424          value = nil
 425          value = entities[ reference ] if entities
 426          if not value
 427            value = DEFAULT_ENTITIES[ reference ]
 428            value = value[2] if value
 429          end
 430          unnormalize( value, entities ) if value
 431        end
 432 
 433        # Escapes all possible entities
 434        def normalize( input, entities=nil, entity_filter=nil )
 435          copy = input.clone
 436          # Doing it like this rather than in a loop improves the speed
 437          copy.gsub!( EREFERENCE, '&amp;' )
 438          entities.each do |key, value|
 439            copy.gsub!( value, "&#{key};" ) unless entity_filter and 
 440                                        entity_filter.include?(entity)
 441          end if entities
 442          copy.gsub!( EREFERENCE, '&amp;' )
 443          DEFAULT_ENTITIES.each do |key, value|
 444            copy.gsub!( value[3], value[1] )
 445          end
 446          copy
 447        end
 448 
 449        # Unescapes all possible entities
 450        def unnormalize( string, entities=nil, filter=nil )
 451          rv = string.clone
 452          rv.gsub!( /\r\n?/, "\n" )
 453          matches = rv.scan( REFERENCE_RE )
 454          return rv if matches.size == 0
 455          rv.gsub!( /&#0*((?:\d+)|(?:x[a-fA-F0-9]+));/ ) {|m|
 456            m=$1
 457            m = "0#{m}" if m[0] == ?x
 458            [Integer(m)].pack('U*')
 459          }
 460          matches.collect!{|x|x[0]}.compact!
 461          if matches.size > 0
 462            matches.each do |entity_reference|
 463              unless filter and filter.include?(entity_reference)
 464                entity_value = entity( entity_reference, entities )
 465                if entity_value
 466                  re = /&#{entity_reference};/
 467                  rv.gsub!( re, entity_value )
 468                end
 469              end
 470            end
 471            matches.each do |entity_reference|
 472              unless filter and filter.include?(entity_reference)
 473                er = DEFAULT_ENTITIES[entity_reference]
 474                rv.gsub!( er[0], er[2] ) if er
 475              end
 476            end
 477            rv.gsub!( /&amp;/, '&' )
 478          end
 479          rv
 480        end
 481      end
 482    end
 483  end
 484 
 485  =begin
 486    case event[0]
 487    when :start_element
 488    when :text
 489    when :end_element
 490    when :processing_instruction
 491    when :cdata
 492    when :comment
 493    when :xmldecl
 494    when :start_doctype
 495    when :end_doctype
 496    when :externalentity
 497    when :elementdecl
 498    when :entity
 499    when :attlistdecl
 500    when :notationdecl
 501    when :end_doctype
 502    end
 503  =end

Prev	Up	Next
File: rexml/parseexception.rb	Home	File: rexml/parsers/lightparser.rb