File: rexml/parsers/sax2parser.rb

Overview
Module Structure
Class Hierarchy
Code

Overview

Module Structure

  module: <Toplevel Module>
  module: REXML#6
  module: Parsers#7
  class: SAX2Parser#9
inherits from
  Object ( Builtin-Module )
has properties
method: initialize #10
method: source #20
method: add_listener / 1 #24
method: listen / 2 #58
method: deafen / 2 #76
method: parse #85
method: handle / 2 #181
method: get_procs / 2 #194
method: get_listeners / 2 #208
method: add / 1 #222
method: get_namespace / 1 #231

Class Hierarchy

Code

   1  require 'rexml/parsers/baseparser'
   2  require 'rexml/parseexception'
   3  require 'rexml/namespace'
   4  require 'rexml/text'
   5 
   6  module REXML
   7    module Parsers
   8      # SAX2Parser
   9      class SAX2Parser
  10        def initialize source
  11          @parser = BaseParser.new(source)
  12          @listeners = []
  13          @procs = []
  14          @namespace_stack = []
  15          @has_listeners = false
  16          @tag_stack = []
  17          @entities = {}
  18        end
  19 
  20        def source
  21          @parser.source
  22        end
  23 
  24        def add_listener( listener )
  25          @parser.add_listener( listener )
  26        end
  27 
  28        # Listen arguments:
  29        #
  30        # Symbol, Array, Block
  31        #   Listen to Symbol events on Array elements
  32        # Symbol, Block
  33        #   Listen to Symbol events
  34        # Array, Listener
  35        #   Listen to all events on Array elements
  36        # Array, Block
  37        #   Listen to :start_element events on Array elements
  38        # Listener
  39        #   Listen to All events
  40        #
  41        # Symbol can be one of: :start_element, :end_element,
  42        # :start_prefix_mapping, :end_prefix_mapping, :characters,
  43        # :processing_instruction, :doctype, :attlistdecl, :elementdecl,
  44        # :entitydecl, :notationdecl, :cdata, :xmldecl, :comment
  45        #
  46        # There is an additional symbol that can be listened for: :progress.
  47        # This will be called for every event generated, passing in the current 
  48        # stream position.
  49        #
  50        # Array contains regular expressions or strings which will be matched
  51        # against fully qualified element names.
  52        #
  53        # Listener must implement the methods in SAX2Listener
  54        #
  55        # Block will be passed the same arguments as a SAX2Listener method would
  56        # be, where the method name is the same as the matched Symbol.
  57        # See the SAX2Listener for more information.
  58        def listen( *args, &blok )
  59          if args[0].kind_of? Symbol
  60            if args.size == 2
  61              args[1].each { |match| @procs << [args[0], match, blok] }
  62            else
  63              add( [args[0], nil, blok] )
  64            end
  65          elsif args[0].kind_of? Array
  66            if args.size == 2
  67              args[0].each { |match| add( [nil, match, args[1]] ) }
  68            else
  69              args[0].each { |match| add( [ :start_element, match, blok ] ) }
  70            end
  71          else
  72            add([nil, nil, args[0]])
  73          end
  74        end
  75 
  76        def deafen( listener=nil, &blok )
  77          if listener
  78            @listeners.delete_if {|item| item[-1] == listener }
  79            @has_listeners = false if @listeners.size == 0
  80          else
  81            @procs.delete_if {|item| item[-1] == blok }
  82          end
  83        end
  84 
  85        def parse
  86          @procs.each { |sym,match,block| block.call if sym == :start_document }
  87          @listeners.each { |sym,match,block| 
  88            block.start_document if sym == :start_document or sym.nil?
  89          }
  90          root = context = []
  91          while true
  92            event = @parser.pull
  93            case event[0]
  94            when :end_document
  95              handle( :end_document )
  96              break
  97            when :start_doctype
  98              handle( :doctype, *event[1..-1])
  99            when :end_doctype
 100              context = context[1]
 101            when :start_element
 102              @tag_stack.push(event[1])
 103              # find the observers for namespaces
 104              procs = get_procs( :start_prefix_mapping, event[1] )
 105              listeners = get_listeners( :start_prefix_mapping, event[1] )
 106              if procs or listeners
 107                # break out the namespace declarations
 108                # The attributes live in event[2]
 109                event[2].each {|n, v| event[2][n] = @parser.normalize(v)}
 110                nsdecl = event[2].find_all { |n, value| n =~ /^xmlns(:|$)/ }
 111                nsdecl.collect! { |n, value| [ n[6..-1], value ] }
 112                @namespace_stack.push({})
 113                nsdecl.each do |n,v|
 114                  @namespace_stack[-1][n] = v
 115                  # notify observers of namespaces
 116                  procs.each { |ob| ob.call( n, v ) } if procs
 117                  listeners.each { |ob| ob.start_prefix_mapping(n, v) } if listeners
 118                end
 119              end
 120              event[1] =~ Namespace::NAMESPLIT
 121              prefix = $1
 122              local = $2
 123              uri = get_namespace(prefix)
 124              # find the observers for start_element
 125              procs = get_procs( :start_element, event[1] )
 126              listeners = get_listeners( :start_element, event[1] )
 127              # notify observers
 128              procs.each { |ob| ob.call( uri, local, event[1], event[2] ) } if procs
 129              listeners.each { |ob| 
 130                ob.start_element( uri, local, event[1], event[2] ) 
 131              } if listeners
 132            when :end_element
 133              @tag_stack.pop
 134              event[1] =~ Namespace::NAMESPLIT
 135              prefix = $1
 136              local = $2
 137              uri = get_namespace(prefix)
 138              # find the observers for start_element
 139              procs = get_procs( :end_element, event[1] )
 140              listeners = get_listeners( :end_element, event[1] )
 141              # notify observers
 142              procs.each { |ob| ob.call( uri, local, event[1] ) } if procs
 143              listeners.each { |ob| 
 144                ob.end_element( uri, local, event[1] ) 
 145              } if listeners
 146 
 147              namespace_mapping = @namespace_stack.pop
 148              # find the observers for namespaces
 149              procs = get_procs( :end_prefix_mapping, event[1] )
 150              listeners = get_listeners( :end_prefix_mapping, event[1] )
 151              if procs or listeners
 152                namespace_mapping.each do |prefix, uri|
 153                  # notify observers of namespaces
 154                  procs.each { |ob| ob.call( prefix ) } if procs
 155                  listeners.each { |ob| ob.end_prefix_mapping(prefix) } if listeners
 156                end
 157              end
 158            when :text
 159              #normalized = @parser.normalize( event[1] )
 160              #handle( :characters, normalized )
 161              copy = event[1].clone
 162              @entities.each { |key, value| copy = copy.gsub("&#{key};", value) }
 163              copy.gsub!( Text::NUMERICENTITY ) {|m|
 164                m=$1
 165                m = "0#{m}" if m[0] == ?x
 166                [Integer(m)].pack('U*')
 167              }
 168              handle( :characters, copy )
 169            when :entitydecl
 170              @entities[ event[1] ] = event[2] if event.size == 3
 171              handle( *event )
 172            when :processing_instruction, :comment, :attlistdecl, 
 173              :elementdecl, :cdata, :notationdecl, :xmldecl
 174              handle( *event )
 175            end
 176            handle( :progress, @parser.position )
 177          end
 178        end
 179 
 180        private
 181        def handle( symbol, *arguments )
 182          tag = @tag_stack[-1]
 183          procs = get_procs( symbol, tag )
 184          listeners = get_listeners( symbol, tag )
 185          # notify observers
 186          procs.each { |ob| ob.call( *arguments ) } if procs
 187          listeners.each { |l| 
 188            l.send( symbol.to_s, *arguments ) 
 189          } if listeners
 190        end
 191 
 192        # The following methods are duplicates, but it is faster than using
 193        # a helper
 194        def get_procs( symbol, name )
 195          return nil if @procs.size == 0
 196          @procs.find_all do |sym, match, block|
 197            #puts sym.inspect+"=="+symbol.inspect+ "\t"+match.inspect+"=="+name.inspect+ "\t"+( (sym.nil? or symbol == sym) and ((name.nil? and match.nil?) or match.nil? or ( (name == match) or (match.kind_of? Regexp and name =~ match)))).to_s
 198            (
 199              (sym.nil? or symbol == sym) and 
 200              ((name.nil? and match.nil?) or match.nil? or (
 201                (name == match) or
 202                (match.kind_of? Regexp and name =~ match)
 203                )
 204              )
 205            )
 206          end.collect{|x| x[-1]}
 207        end
 208        def get_listeners( symbol, name )
 209          return nil if @listeners.size == 0
 210          @listeners.find_all do |sym, match, block|
 211            (
 212              (sym.nil? or symbol == sym) and 
 213              ((name.nil? and match.nil?) or match.nil? or (
 214                (name == match) or
 215                (match.kind_of? Regexp and name =~ match)
 216                )
 217              )
 218            )
 219          end.collect{|x| x[-1]}
 220        end
 221 
 222        def add( pair )
 223          if pair[-1].respond_to? :call
 224            @procs << pair unless @procs.include? pair
 225          else
 226            @listeners << pair unless @listeners.include? pair
 227            @has_listeners = true
 228          end
 229        end
 230 
 231        def get_namespace( prefix ) 
 232          uris = (@namespace_stack.find_all { |ns| not ns[prefix].nil? }) ||
 233            (@namespace_stack.find { |ns| not ns[nil].nil? })
 234          uris[-1][prefix] unless uris.nil? or 0 == uris.size
 235        end
 236      end
 237    end
 238  end