File: rexml/parsers/pullparser.rb

Overview
Module Structure
Class Hierarchy
Code

Overview

Module Structure

  module: <Toplevel Module>
  module: REXML#7
  module: Parsers#8
  class: PullParser#28
extends
  Forwardable   
includes
  XMLTokens ( REXML )
inherits from
  Object ( Builtin-Module )
has properties
method: initialize #37
method: add_listener / 1 #44
method: each #49
method: peek #55
method: pull #65
method: unshift #80
  class: PullEvent#90
inherits from
  Object ( Builtin-Module )
has properties
method: initialize / 1 #94
method: [] / 2 #98
method: event_type #112
method: start_element? #117
method: end_element? #122
method: text? #127
method: instruction? #132
method: comment? #137
method: doctype? #142
method: attlistdecl? #147
method: elementdecl? #152
method: entitydecl? #163
method: notationdecl? #168
method: entity? #173
method: cdata? #178
method: xmldecl? #183
method: error? #187
method: inspect #191

Class Hierarchy

Code

   1  require 'forwardable'
   2 
   3  require 'rexml/parseexception'
   4  require 'rexml/parsers/baseparser'
   5  require 'rexml/xmltokens'
   6 
   7  module REXML
   8    module Parsers
   9      # = Using the Pull Parser
  10      # <em>This API is experimental, and subject to change.</em>
  11      #  parser = PullParser.new( "<a>text<b att='val'/>txet</a>" )
  12      #  while parser.has_next?
  13      #    res = parser.next
  14      #    puts res[1]['att'] if res.start_tag? and res[0] == 'b'
  15      #  end
  16      # See the PullEvent class for information on the content of the results.
  17      # The data is identical to the arguments passed for the various events to
  18      # the StreamListener API.
  19      #
  20      # Notice that:
  21      #  parser = PullParser.new( "<a>BAD DOCUMENT" )
  22      #  while parser.has_next?
  23      #    res = parser.next
  24      #    raise res[1] if res.error?
  25      #  end
  26      #
  27      # Nat Price gave me some good ideas for the API.
  28      class PullParser
  29        include XMLTokens
  30        extend Forwardable
  31 
  32        def_delegators( :@parser, :has_next? )
  33        def_delegators( :@parser, :entity )
  34        def_delegators( :@parser, :empty? )
  35        def_delegators( :@parser, :source )
  36 
  37        def initialize stream
  38          @entities = {}
  39          @listeners = nil
  40          @parser = BaseParser.new( stream )
  41          @my_stack = []
  42        end
  43 
  44        def add_listener( listener )
  45          @listeners = [] unless @listeners
  46          @listeners << listener
  47        end
  48 
  49        def each
  50          while has_next?
  51            yield self.pull
  52          end
  53        end
  54 
  55        def peek depth=0
  56          if @my_stack.length <= depth
  57            (depth - @my_stack.length + 1).times {
  58              e = PullEvent.new(@parser.pull)
  59              @my_stack.push(e)
  60            }
  61          end
  62          @my_stack[depth]
  63        end
  64 
  65        def pull
  66          return @my_stack.shift if @my_stack.length > 0
  67 
  68          event = @parser.pull
  69          case event[0]
  70          when :entitydecl
  71            @entities[ event[1] ] = 
  72              event[2] unless event[2] =~ /PUBLIC|SYSTEM/
  73          when :text
  74            unnormalized = @parser.unnormalize( event[1], @entities )
  75            event << unnormalized
  76          end
  77          PullEvent.new( event )
  78        end
  79 
  80        def unshift token
  81          @my_stack.unshift token
  82        end
  83      end
  84 
  85      # A parsing event.  The contents of the event are accessed as an +Array?,
  86      # and the type is given either by the ...? methods, or by accessing the
  87      # +type+ accessor.  The contents of this object vary from event to event,
  88      # but are identical to the arguments passed to +StreamListener+s for each
  89      # event.
  90      class PullEvent
  91        # The type of this event.  Will be one of :tag_start, :tag_end, :text,
  92        # :processing_instruction, :comment, :doctype, :attlistdecl, :entitydecl,
  93        # :notationdecl, :entity, :cdata, :xmldecl, or :error.
  94        def initialize(arg)
  95          @contents = arg
  96        end
  97 
  98        def []( start, endd=nil)
  99          if start.kind_of? Range
 100            @contents.slice( start.begin+1 .. start.end )
 101          elsif start.kind_of? Numeric
 102            if endd.nil?
 103              @contents.slice( start+1 )
 104            else
 105              @contents.slice( start+1, endd )
 106            end
 107          else
 108            raise "Illegal argument #{start.inspect} (#{start.class})"
 109          end
 110        end
 111 
 112        def event_type
 113          @contents[0]
 114        end
 115 
 116        # Content: [ String tag_name, Hash attributes ]
 117        def start_element?
 118          @contents[0] == :start_element
 119        end
 120 
 121        # Content: [ String tag_name ]
 122        def end_element?
 123          @contents[0] == :end_element
 124        end
 125 
 126        # Content: [ String raw_text, String unnormalized_text ]
 127        def text?
 128          @contents[0] == :text
 129        end
 130 
 131        # Content: [ String text ]
 132        def instruction?
 133          @contents[0] == :processing_instruction
 134        end
 135 
 136        # Content: [ String text ]
 137        def comment?
 138          @contents[0] == :comment
 139        end
 140 
 141        # Content: [ String name, String pub_sys, String long_name, String uri ]
 142        def doctype?
 143          @contents[0] == :start_doctype
 144        end
 145 
 146        # Content: [ String text ]
 147        def attlistdecl?
 148          @contents[0] == :attlistdecl
 149        end
 150 
 151        # Content: [ String text ]
 152        def elementdecl?
 153          @contents[0] == :elementdecl
 154        end
 155 
 156        # Due to the wonders of DTDs, an entity declaration can be just about
 157        # anything.  There's no way to normalize it; you'll have to interpret the
 158        # content yourself.  However, the following is true:
 159        #
 160        # * If the entity declaration is an internal entity:
 161        #   [ String name, String value ]
 162        # Content: [ String text ]
 163        def entitydecl?
 164          @contents[0] == :entitydecl
 165        end
 166 
 167        # Content: [ String text ]
 168        def notationdecl?
 169          @contents[0] == :notationdecl
 170        end
 171 
 172        # Content: [ String text ]
 173        def entity?
 174          @contents[0] == :entity
 175        end
 176 
 177        # Content: [ String text ]
 178        def cdata?
 179          @contents[0] == :cdata
 180        end
 181 
 182        # Content: [ String version, String encoding, String standalone ]
 183        def xmldecl?
 184          @contents[0] == :xmldecl
 185        end
 186 
 187        def error?
 188          @contents[0] == :error
 189        end
 190 
 191        def inspect
 192          @contents[0].to_s + ": " + @contents[1..-1].inspect
 193        end
 194      end
 195    end
 196  end