File: rexml/source.rb

Overview
Module Structure
Class Hierarchy
Code

Overview

Module Structure

  module: <Toplevel Module>
  module: REXML#3
  class: SourceFactory#5
inherits from
  Object ( Builtin-Module )
has properties
class method: create_from / 1 #9
  class: Source#28
includes
  Encoding ( REXML )
inherits from
  Object ( Builtin-Module )
has properties
attribute: buffer [R] #31
attribute: line [R] #33
attribute: encoding [R] #34
method: initialize / 2 #40
method: encoding= / 1 #53
method: scan / 2 #80
method: read #87
method: consume / 1 #90
method: match_to / 2 #94
method: match_to_consume / 2 #98
method: match / 2 #104
method: empty? #111
method: position #115
method: current_line #120
  class: IOSource#130
inherits from
  Source ( REXML )
has properties
method: initialize / 3 #134
method: scan / 2 #163
method: read #190
method: consume / 1 #200
method: match / 2 #204
method: empty? #222
method: position #226
method: current_line #231

Class Hierarchy

Object ( Builtin-Module )
SourceFactory ( REXML ) — #5
Source ( REXML ) — #28
  IOSource    #130

Code

   1  require 'rexml/encoding'
   2 
   3  module REXML
   4    # Generates Source-s.  USE THIS CLASS.
   5    class SourceFactory
   6      # Generates a Source object
   7      # @param arg Either a String, or an IO
   8      # @return a Source, or nil if a bad argument was given
   9      def SourceFactory::create_from(arg)
  10        if arg.kind_of? String
  11          Source.new(arg)
  12        elsif arg.respond_to? :read and
  13              arg.respond_to? :readline and
  14              arg.respond_to? :nil? and
  15              arg.respond_to? :eof?
  16          IOSource.new(arg)
  17        elsif arg.kind_of? Source
  18          arg
  19        else
  20          raise "#{arg.class} is not a valid input stream.  It must walk \n"+
  21            "like either a String, an IO, or a Source."
  22        end
  23      end
  24    end
  25 
  26    # A Source can be searched for patterns, and wraps buffers and other
  27    # objects and provides consumption of text
  28    class Source
  29      include Encoding
  30      # The current buffer (what we're going to read next)
  31      attr_reader :buffer
  32      # The line number of the last consumed text
  33      attr_reader :line
  34      attr_reader :encoding
  35 
  36      # Constructor
  37      # @param arg must be a String, and should be a valid XML document
  38      # @param encoding if non-null, sets the encoding of the source to this
  39      # value, overriding all encoding detection
  40      def initialize(arg, encoding=nil)
  41        @orig = @buffer = arg
  42        if encoding
  43          self.encoding = encoding
  44        else
  45          self.encoding = check_encoding( @buffer )
  46        end
  47        @line = 0
  48      end
  49 
  50 
  51      # Inherited from Encoding
  52      # Overridden to support optimized en/decoding
  53      def encoding=(enc)
  54        return unless super
  55        @line_break = encode( '>' )
  56        if enc != UTF_8
  57          @buffer = decode(@buffer)
  58          @to_utf = true
  59        else
  60          @to_utf = false
  61        end
  62      end
  63 
  64      # Scans the source for a given pattern.  Note, that this is not your
  65      # usual scan() method.  For one thing, the pattern argument has some
  66      # requirements; for another, the source can be consumed.  You can easily
  67      # confuse this method.  Originally, the patterns were easier
  68      # to construct and this method more robust, because this method 
  69      # generated search regexes on the fly; however, this was 
  70      # computationally expensive and slowed down the entire REXML package 
  71      # considerably, since this is by far the most commonly called method.
  72      # @param pattern must be a Regexp, and must be in the form of
  73      # /^\s*(#{your pattern, with no groups})(.*)/.  The first group
  74      # will be returned; the second group is used if the consume flag is
  75      # set.
  76      # @param consume if true, the pattern returned will be consumed, leaving
  77      # everything after it in the Source.
  78      # @return the pattern, if found, or nil if the Source is empty or the
  79      # pattern is not found.
  80      def scan(pattern, cons=false)
  81        return nil if @buffer.nil?
  82        rv = @buffer.scan(pattern)
  83        @buffer = $' if cons and rv.size>0
  84        rv
  85      end
  86 
  87      def read
  88      end
  89 
  90      def consume( pattern )
  91        @buffer = $' if pattern.match( @buffer )
  92      end
  93 
  94      def match_to( char, pattern )
  95        return pattern.match(@buffer)
  96      end
  97 
  98      def match_to_consume( char, pattern )
  99        md = pattern.match(@buffer)
 100        @buffer = $'
 101        return md
 102      end
 103 
 104      def match(pattern, cons=false)
 105        md = pattern.match(@buffer)
 106        @buffer = $' if cons and md
 107        return md
 108      end
 109 
 110      # @return true if the Source is exhausted
 111      def empty?
 112        @buffer == ""
 113      end
 114 
 115      def position
 116        @orig.index( @buffer )
 117      end
 118 
 119      # @return the current line in the source
 120      def current_line
 121        lines = @orig.split
 122        res = lines.grep @buffer[0..30]
 123        res = res[-1] if res.kind_of? Array
 124        lines.index( res ) if res
 125      end
 126    end
 127 
 128    # A Source that wraps an IO.  See the Source class for method
 129    # documentation
 130    class IOSource < Source
 131      #attr_reader :block_size
 132 
 133      # block_size has been deprecated
 134      def initialize(arg, block_size=500, encoding=nil)
 135        @er_source = @source = arg
 136        @to_utf = false
 137 
 138        # Determining the encoding is a deceptively difficult issue to resolve.
 139        # First, we check the first two bytes for UTF-16.  Then we
 140        # assume that the encoding is at least ASCII enough for the '>', and
 141        # we read until we get one of those.  This gives us the XML declaration,
 142        # if there is one.  If there isn't one, the file MUST be UTF-8, as per
 143        # the XML spec.  If there is one, we can determine the encoding from
 144        # it.
 145        @buffer = ""
 146        str = @source.read( 2 )
 147        if encoding
 148          self.encoding = encoding
 149        elsif 0xfe == str[0] && 0xff == str[1]
 150          @line_break = "\000>"
 151        elsif 0xff == str[0] && 0xfe == str[1]
 152          @line_break = ">\000"
 153        elsif 0xef == str[0] && 0xbb == str[1]
 154          str += @source.read(1)
 155          str = '' if (0xbf == str[2])
 156          @line_break = ">"
 157        else
 158          @line_break = ">"
 159        end
 160        super str+@source.readline( @line_break )
 161      end
 162 
 163      def scan(pattern, cons=false)
 164        rv = super
 165        # You'll notice that this next section is very similar to the same
 166        # section in match(), but just a liiittle different.  This is
 167        # because it is a touch faster to do it this way with scan()
 168        # than the way match() does it; enough faster to warrent duplicating
 169        # some code
 170        if rv.size == 0
 171          until @buffer =~ pattern or @source.nil?
 172            begin
 173              # READLINE OPT
 174              #str = @source.read(@block_size)
 175              str = @source.readline(@line_break)
 176              str = decode(str) if @to_utf and str
 177              @buffer << str
 178            rescue Iconv::IllegalSequence
 179              raise
 180            rescue
 181              @source = nil
 182            end
 183          end
 184          rv = super
 185        end
 186        rv.taint
 187        rv
 188      end
 189 
 190      def read
 191        begin
 192          str = @source.readline(@line_break)
 193          str = decode(str) if @to_utf and str 
 194          @buffer << str
 195        rescue Exception, NameError
 196          @source = nil
 197        end
 198      end
 199 
 200      def consume( pattern )
 201        match( pattern, true )
 202      end
 203 
 204      def match( pattern, cons=false )
 205        rv = pattern.match(@buffer)
 206        @buffer = $' if cons and rv
 207        while !rv and @source
 208          begin
 209            str = @source.readline(@line_break)
 210            str = decode(str) if @to_utf and str
 211            @buffer << str
 212            rv = pattern.match(@buffer)
 213            @buffer = $' if cons and rv
 214          rescue
 215            @source = nil
 216          end
 217        end
 218        rv.taint
 219        rv
 220      end
 221      
 222      def empty?
 223        super and ( @source.nil? || @source.eof? )
 224      end
 225 
 226      def position
 227        @er_source.stat.pipe? ? 0 : @er_source.pos
 228      end
 229 
 230      # @return the current line in the source
 231      def current_line
 232        begin
 233          pos = @er_source.pos        # The byte position in the source
 234          lineno = @er_source.lineno  # The XML < position in the source
 235          @er_source.rewind
 236          line = 0                    # The \r\n position in the source
 237          begin
 238            while @er_source.pos < pos
 239              @er_source.readline
 240              line += 1
 241            end
 242          rescue
 243          end
 244        rescue IOError
 245          pos = -1
 246          line = -1
 247        end
 248        [pos, lineno, line]
 249      end
 250    end
 251  end