File: rexml/parsers/xpathparser.rb

Overview
Module Structure
Class Hierarchy
Code

Overview

Module Structure

  module: <Toplevel Module>
  module: REXML#4
  module: Parsers#5
  class: XPathParser#10
includes
  XMLTokens ( REXML )
inherits from
  Object ( Builtin-Module )
has properties
constant: LITERAL #12
method: namespaces= / 1 #14
method: parse #19
method: predicate #27
method: abbreviate / 1 #33
method: expand / 1 #91
method: predicate_to_string / 2 #130
method: LocationPath #185
constant: AXIS #210
method: RelativeLocationPath #211
constant: NCNAMETEST #279
constant: QNAME #280
constant: NODE_TYPE #281
constant: PI #282
method: NodeTest #283
method: Predicate #326
method: OrExpr #352
method: AndExpr #373
method: EqualityExpr #396
method: RelationalExpr #421
method: AdditiveExpr #448
method: MultiplicativeExpr #473
method: UnaryExpr #500
method: UnionExpr #520
method: PathExpr #541
method: FilterExpr #561
constant: VARIABLE_REFERENCE #577
constant: NUMBER #578
constant: NT #579
method: PrimaryExpr #580
method: FunctionCall #622
method: get_group #635
method: parse_args / 1 #653

Class Hierarchy

Code

   1  require 'rexml/namespace'
   2  require 'rexml/xmltokens'
   3 
   4  module REXML
   5    module Parsers
   6      # You don't want to use this class.  Really.  Use XPath, which is a wrapper
   7      # for this class.  Believe me.  You don't want to poke around in here.
   8      # There is strange, dark magic at work in this code.  Beware.  Go back!  Go
   9      # back while you still can!
  10      class XPathParser
  11        include XMLTokens
  12        LITERAL    = /^'([^']*)'|^"([^"]*)"/u
  13 
  14        def namespaces=( namespaces )
  15          Functions::namespace_context = namespaces
  16          @namespaces = namespaces
  17        end
  18 
  19        def parse path
  20          path.gsub!(/([\(\[])\s+/, '\1') # Strip ignorable spaces
  21          path.gsub!( /\s+([\]\)])/, '\1' )
  22          parsed = []
  23          path = OrExpr(path, parsed)
  24          parsed
  25        end
  26 
  27        def predicate path
  28          parsed = []
  29          Predicate( "[#{path}]", parsed )
  30          parsed
  31        end
  32 
  33        def abbreviate( path )
  34          path = path.kind_of?(String) ? parse( path ) : path
  35          string = ""
  36          document = false
  37          while path.size > 0
  38            op = path.shift
  39            case op
  40            when :node
  41            when :attribute
  42              string << "/" if string.size > 0
  43              string << "@"
  44            when :child
  45              string << "/" if string.size > 0
  46            when :descendant_or_self
  47              string << "/"
  48            when :self
  49              string << "."
  50            when :parent
  51              string << ".."
  52            when :any
  53              string << "*"
  54            when :text
  55              string << "text()"
  56            when :following, :following_sibling, 
  57                  :ancestor, :ancestor_or_self, :descendant, 
  58                  :namespace, :preceding, :preceding_sibling
  59              string << "/" unless string.size == 0
  60              string << op.to_s.tr("_", "-")
  61              string << "::"
  62            when :qname
  63              prefix = path.shift
  64              name = path.shift
  65              string << prefix+":" if prefix.size > 0
  66              string << name
  67            when :predicate
  68              string << '['
  69              string << predicate_to_string( path.shift ) {|x| abbreviate( x ) }
  70              string << ']'
  71            when :document
  72              document = true
  73            when :function
  74              string << path.shift
  75              string << "( "
  76              string << predicate_to_string( path.shift[0] ) {|x| abbreviate( x )}
  77              string << " )"
  78            when :literal
  79              string << %Q{ "#{path.shift}" }
  80            else
  81              string << "/" unless string.size == 0
  82              string << "UNKNOWN("
  83              string << op.inspect
  84              string << ")"
  85            end
  86          end
  87          string = "/"+string if document
  88          return string
  89        end
  90 
  91        def expand( path )
  92          path = path.kind_of?(String) ? parse( path ) : path
  93          string = ""
  94          document = false
  95          while path.size > 0
  96            op = path.shift
  97            case op
  98            when :node
  99              string << "node()"
 100            when :attribute, :child, :following, :following_sibling, 
 101                  :ancestor, :ancestor_or_self, :descendant, :descendant_or_self,
 102                  :namespace, :preceding, :preceding_sibling, :self, :parent
 103              string << "/" unless string.size == 0
 104              string << op.to_s.tr("_", "-")
 105              string << "::"
 106            when :any
 107              string << "*"
 108            when :qname
 109              prefix = path.shift
 110              name = path.shift
 111              string << prefix+":" if prefix.size > 0
 112              string << name
 113            when :predicate
 114              string << '['
 115              string << predicate_to_string( path.shift ) { |x| expand(x) }
 116              string << ']'
 117            when :document
 118              document = true
 119            else
 120              string << "/" unless string.size == 0
 121              string << "UNKNOWN("
 122              string << op.inspect
 123              string << ")"
 124            end
 125          end
 126          string = "/"+string if document
 127          return string
 128        end
 129 
 130        def predicate_to_string( path, &block )
 131          string = ""
 132          case path[0]
 133          when :and, :or, :mult, :plus, :minus, :neq, :eq, :lt, :gt, :lteq, :gteq, :div, :mod, :union
 134            op = path.shift
 135            case op
 136            when :eq
 137              op = "="
 138            when :lt
 139              op = "<"
 140            when :gt
 141              op = ">"
 142            when :lteq
 143              op = "<="
 144            when :gteq
 145              op = ">="
 146            when :neq
 147              op = "!="
 148            when :union
 149              op = "|"
 150            end
 151            left = predicate_to_string( path.shift, &block )
 152            right = predicate_to_string( path.shift, &block )
 153            string << " "
 154            string << left
 155            string << " "
 156            string << op.to_s
 157            string << " "
 158            string << right
 159            string << " "
 160          when :function
 161            path.shift
 162            name = path.shift
 163            string << name
 164            string << "( "
 165            string << predicate_to_string( path.shift, &block )
 166            string << " )"
 167          when :literal
 168            path.shift
 169            string << " "
 170            string << path.shift.inspect
 171            string << " "
 172          else
 173            string << " "
 174            string << yield( path )
 175            string << " "
 176          end
 177          return string.squeeze(" ")
 178        end
 179 
 180        private
 181        #LocationPath
 182        #  | RelativeLocationPath
 183        #  | '/' RelativeLocationPath?
 184        #  | '//' RelativeLocationPath
 185        def LocationPath path, parsed
 186          #puts "LocationPath '#{path}'"
 187          path = path.strip
 188          if path[0] == ?/
 189            parsed << :document
 190            if path[1] == ?/
 191              parsed << :descendant_or_self
 192              parsed << :node
 193              path = path[2..-1]
 194            else
 195              path = path[1..-1]
 196            end
 197          end
 198          #puts parsed.inspect
 199          return RelativeLocationPath( path, parsed ) if path.size > 0
 200        end
 201 
 202        #RelativeLocationPath
 203        #  |                                                    Step
 204        #    | (AXIS_NAME '::' | '@' | '')                     AxisSpecifier
 205        #      NodeTest
 206        #        Predicate
 207        #    | '.' | '..'                                      AbbreviatedStep
 208        #  |  RelativeLocationPath '/' Step
 209        #  | RelativeLocationPath '//' Step
 210        AXIS = /^(ancestor|ancestor-or-self|attribute|child|descendant|descendant-or-self|following|following-sibling|namespace|parent|preceding|preceding-sibling|self)::/
 211        def RelativeLocationPath path, parsed
 212          #puts "RelativeLocationPath #{path}"
 213          while path.size > 0
 214            # (axis or @ or <child::>) nodetest predicate  >
 215            # OR                                          >  / Step
 216            # (. or ..)                                    >
 217            if path[0] == ?.
 218              if path[1] == ?.
 219                parsed << :parent
 220                parsed << :node
 221                path = path[2..-1]
 222              else
 223                parsed << :self
 224                parsed << :node
 225                path = path[1..-1]
 226              end
 227            else
 228              if path[0] == ?@
 229                #puts "ATTRIBUTE"
 230                parsed << :attribute
 231                path = path[1..-1]
 232                # Goto Nodetest
 233              elsif path =~ AXIS
 234                parsed << $1.tr('-','_').intern
 235                path = $'
 236                # Goto Nodetest
 237              else
 238                parsed << :child
 239              end
 240 
 241              #puts "NODETESTING '#{path}'"
 242              n = []
 243              path = NodeTest( path, n)
 244              #puts "NODETEST RETURNED '#{path}'"
 245 
 246              if path[0] == ?[
 247                path = Predicate( path, n )
 248              end
 249 
 250              parsed.concat(n)
 251            end
 252            
 253            if path.size > 0
 254              if path[0] == ?/
 255                if path[1] == ?/
 256                  parsed << :descendant_or_self
 257                  parsed << :node
 258                  path = path[2..-1]
 259                else
 260                  path = path[1..-1]
 261                end
 262              else
 263                return path
 264              end
 265            end
 266          end
 267          return path
 268        end
 269 
 270        # Returns a 1-1 map of the nodeset
 271        # The contents of the resulting array are either:
 272        #   true/false, if a positive match
 273        #   String, if a name match
 274        #NodeTest
 275        #  | ('*' | NCNAME ':' '*' | QNAME)                NameTest
 276        #  | NODE_TYPE '(' ')'                              NodeType
 277        #  | PI '(' LITERAL ')'                            PI
 278        #    | '[' expr ']'                                Predicate
 279        NCNAMETEST= /^(#{NCNAME_STR}):\*/u
 280        QNAME     = Namespace::NAMESPLIT
 281        NODE_TYPE  = /^(comment|text|node)\(\s*\)/m
 282        PI        = /^processing-instruction\(/
 283        def NodeTest path, parsed
 284          #puts "NodeTest with #{path}"
 285          res = nil
 286          case path
 287          when /^\*/
 288            path = $'
 289            parsed << :any
 290          when NODE_TYPE
 291            type = $1
 292            path = $'
 293            parsed << type.tr('-', '_').intern
 294          when PI
 295            path = $'
 296            literal = nil
 297            if path !~ /^\s*\)/
 298              path =~ LITERAL
 299              literal = $1
 300              path = $'
 301              raise ParseException.new("Missing ')' after processing instruction") if path[0] != ?)
 302              path = path[1..-1]
 303            end
 304            parsed << :processing_instruction
 305            parsed << (literal || '')
 306          when NCNAMETEST
 307            #puts "NCNAMETEST"
 308            prefix = $1
 309            path = $'
 310            parsed << :namespace
 311            parsed << prefix
 312          when QNAME
 313            #puts "QNAME"
 314            prefix = $1
 315            name = $2
 316            path = $'
 317            prefix = "" unless prefix
 318            parsed << :qname
 319            parsed << prefix
 320            parsed << name
 321          end
 322          return path
 323        end
 324 
 325        # Filters the supplied nodeset on the predicate(s)
 326        def Predicate path, parsed
 327          #puts "PREDICATE with #{path}"
 328          return nil unless path[0] == ?[
 329          predicates = []
 330          while path[0] == ?[
 331            path, expr = get_group(path)
 332            predicates << expr[1..-2] if expr
 333          end
 334          #puts "PREDICATES = #{predicates.inspect}"
 335          predicates.each{ |expr| 
 336            #puts "ORING #{expr}"
 337            preds = []
 338            parsed << :predicate
 339            parsed << preds
 340            OrExpr(expr, preds) 
 341          }
 342          #puts "PREDICATES = #{predicates.inspect}"
 343          path
 344        end
 345 
 346        # The following return arrays of true/false, a 1-1 mapping of the
 347        # supplied nodeset, except for axe(), which returns a filtered
 348        # nodeset
 349 
 350        #| OrExpr S 'or' S AndExpr
 351        #| AndExpr
 352        def OrExpr path, parsed
 353          #puts "OR >>> #{path}"
 354          n = []
 355          rest = AndExpr( path, n )
 356          #puts "OR <<< #{rest}"
 357          if rest != path
 358            while rest =~ /^\s*( or )/
 359              n = [ :or, n, [] ]
 360              rest = AndExpr( $', n[-1] )
 361            end
 362          end
 363          if parsed.size == 0 and n.size != 0
 364            parsed.replace(n)
 365          elsif n.size > 0
 366            parsed << n
 367          end
 368          rest
 369        end
 370 
 371        #| AndExpr S 'and' S EqualityExpr
 372        #| EqualityExpr
 373        def AndExpr path, parsed
 374          #puts "AND >>> #{path}"
 375          n = []
 376          rest = EqualityExpr( path, n )
 377          #puts "AND <<< #{rest}"
 378          if rest != path
 379            while rest =~ /^\s*( and )/
 380              n = [ :and, n, [] ]
 381              #puts "AND >>> #{rest}"
 382              rest = EqualityExpr( $', n[-1] )
 383              #puts "AND <<< #{rest}"
 384            end
 385          end
 386          if parsed.size == 0 and n.size != 0
 387            parsed.replace(n)
 388          elsif n.size > 0
 389            parsed << n
 390          end
 391          rest
 392        end
 393 
 394        #| EqualityExpr ('=' | '!=')  RelationalExpr
 395        #| RelationalExpr
 396        def EqualityExpr path, parsed
 397          #puts "EQUALITY >>> #{path}"
 398          n = []
 399          rest = RelationalExpr( path, n )
 400          #puts "EQUALITY <<< #{rest}"
 401          if rest != path
 402            while rest =~ /^\s*(!?=)\s*/
 403              if $1[0] == ?!
 404                n = [ :neq, n, [] ]
 405              else
 406                n = [ :eq, n, [] ]
 407              end
 408              rest = RelationalExpr( $', n[-1] )
 409            end
 410          end
 411          if parsed.size == 0 and n.size != 0
 412            parsed.replace(n)
 413          elsif n.size > 0
 414            parsed << n
 415          end
 416          rest
 417        end
 418 
 419        #| RelationalExpr ('<' | '>' | '<=' | '>=') AdditiveExpr
 420        #| AdditiveExpr
 421        def RelationalExpr path, parsed
 422          #puts "RELATION >>> #{path}"
 423          n = []
 424          rest = AdditiveExpr( path, n )
 425          #puts "RELATION <<< #{rest}"
 426          if rest != path
 427            while rest =~ /^\s*([<>]=?)\s*/
 428              if $1[0] == ?<
 429                sym = "lt"
 430              else
 431                sym = "gt"
 432              end
 433              sym << "eq" if $1[-1] == ?=
 434              n = [ sym.intern, n, [] ]
 435              rest = AdditiveExpr( $', n[-1] )
 436            end
 437          end
 438          if parsed.size == 0 and n.size != 0
 439            parsed.replace(n)
 440          elsif n.size > 0
 441            parsed << n
 442          end
 443          rest
 444        end
 445 
 446        #| AdditiveExpr ('+' | S '-') MultiplicativeExpr
 447        #| MultiplicativeExpr
 448        def AdditiveExpr path, parsed
 449          #puts "ADDITIVE >>> #{path}"
 450          n = []
 451          rest = MultiplicativeExpr( path, n )
 452          #puts "ADDITIVE <<< #{rest}"
 453          if rest != path
 454            while rest =~ /^\s*(\+| -)\s*/
 455              if $1[0] == ?+
 456                n = [ :plus, n, [] ]
 457              else
 458                n = [ :minus, n, [] ]
 459              end
 460              rest = MultiplicativeExpr( $', n[-1] )
 461            end
 462          end
 463          if parsed.size == 0 and n.size != 0
 464            parsed.replace(n)
 465          elsif n.size > 0
 466            parsed << n
 467          end
 468          rest
 469        end
 470 
 471        #| MultiplicativeExpr ('*' | S ('div' | 'mod') S) UnaryExpr
 472        #| UnaryExpr
 473        def MultiplicativeExpr path, parsed
 474          #puts "MULT >>> #{path}"
 475          n = []
 476          rest = UnaryExpr( path, n )
 477          #puts "MULT <<< #{rest}"
 478          if rest != path
 479            while rest =~ /^\s*(\*| div | mod )\s*/
 480              if $1[0] == ?*
 481                n = [ :mult, n, [] ]
 482              elsif $1.include?( "div" )
 483                n = [ :div, n, [] ]
 484              else
 485                n = [ :mod, n, [] ]
 486              end
 487              rest = UnaryExpr( $', n[-1] )
 488            end
 489          end
 490          if parsed.size == 0 and n.size != 0
 491            parsed.replace(n)
 492          elsif n.size > 0
 493            parsed << n
 494          end
 495          rest
 496        end
 497 
 498        #| '-' UnaryExpr
 499        #| UnionExpr
 500        def UnaryExpr path, parsed
 501          path =~ /^(\-*)/
 502          path = $'
 503          if $1 and (($1.size % 2) != 0)
 504            mult = -1
 505          else
 506            mult = 1
 507          end
 508          parsed << :neg if mult < 0
 509 
 510          #puts "UNARY >>> #{path}"
 511          n = []
 512          path = UnionExpr( path, n )
 513          #puts "UNARY <<< #{path}"
 514          parsed.concat( n )
 515          path
 516        end
 517 
 518        #| UnionExpr '|' PathExpr
 519        #| PathExpr
 520        def UnionExpr path, parsed
 521          #puts "UNION >>> #{path}"
 522          n = []
 523          rest = PathExpr( path, n )
 524          #puts "UNION <<< #{rest}"
 525          if rest != path
 526            while rest =~ /^\s*(\|)\s*/
 527              n = [ :union, n, [] ]
 528              rest = PathExpr( $', n[-1] )
 529            end
 530          end
 531          if parsed.size == 0 and n.size != 0
 532            parsed.replace( n )
 533          elsif n.size > 0
 534            parsed << n
 535          end
 536          rest
 537        end
 538 
 539        #| LocationPath
 540        #| FilterExpr ('/' | '//') RelativeLocationPath
 541        def PathExpr path, parsed
 542          path =~ /^\s*/
 543          path = $'
 544          #puts "PATH >>> #{path}"
 545          n = []
 546          rest = FilterExpr( path, n )
 547          #puts "PATH <<< '#{rest}'"
 548          if rest != path
 549            if rest and rest[0] == ?/
 550              return RelativeLocationPath(rest, n)
 551            end
 552          end
 553          #puts "BEFORE WITH '#{rest}'"
 554          rest = LocationPath(rest, n) if rest =~ /\A[\/\.\@\[\w_*]/
 555          parsed.concat(n)
 556          return rest
 557        end
 558 
 559        #| FilterExpr Predicate
 560        #| PrimaryExpr
 561        def FilterExpr path, parsed
 562          #puts "FILTER >>> #{path}"
 563          n = []
 564          path = PrimaryExpr( path, n )
 565          #puts "FILTER <<< #{path}"
 566          path = Predicate(path, n) if path and path[0] == ?[
 567          #puts "FILTER <<< #{path}"
 568          parsed.concat(n)
 569          path
 570        end
 571 
 572        #| VARIABLE_REFERENCE
 573        #| '(' expr ')'
 574        #| LITERAL
 575        #| NUMBER
 576        #| FunctionCall
 577        VARIABLE_REFERENCE  = /^\$(#{NAME_STR})/u
 578        NUMBER              = /^(\d*\.?\d+)/
 579        NT        = /^comment|text|processing-instruction|node$/
 580        def PrimaryExpr path, parsed
 581          arry = []
 582          case path
 583          when VARIABLE_REFERENCE
 584            varname = $1
 585            path = $'
 586            parsed << :variable
 587            parsed << varname
 588            #arry << @variables[ varname ]
 589          when /^(\w[-\w]*)(?:\()/
 590            #puts "PrimaryExpr :: Function >>> #$1 -- '#$''"
 591            fname = $1
 592            tmp = $'
 593            #puts "#{fname} =~ #{NT.inspect}"
 594            return path if fname =~ NT
 595            path = tmp
 596            parsed << :function
 597            parsed << fname
 598            path = FunctionCall(path, parsed)
 599          when NUMBER
 600            #puts "LITERAL or NUMBER: #$1"
 601            varname = $1.nil? ? $2 : $1
 602            path = $'
 603            parsed << :literal 
 604            parsed << (varname.include?('.') ? varname.to_f : varname.to_i)
 605          when LITERAL
 606            #puts "LITERAL or NUMBER: #$1"
 607            varname = $1.nil? ? $2 : $1
 608            path = $'
 609            parsed << :literal 
 610            parsed << varname
 611          when /^\(/                                               #/
 612            path, contents = get_group(path)
 613            contents = contents[1..-2]
 614            n = []
 615            OrExpr( contents, n )
 616            parsed.concat(n)
 617          end
 618          path
 619        end
 620 
 621        #| FUNCTION_NAME '(' ( expr ( ',' expr )* )? ')'
 622        def FunctionCall rest, parsed
 623          path, arguments = parse_args(rest)
 624          argset = []
 625          for argument in arguments
 626            args = []
 627            OrExpr( argument, args )
 628            argset << args
 629          end
 630          parsed << argset
 631          path
 632        end
 633 
 634        # get_group( '[foo]bar' ) -> ['bar', '[foo]']
 635        def get_group string
 636          ind = 0
 637          depth = 0
 638          st = string[0,1]
 639          en = (st == "(" ? ")" : "]")
 640          begin
 641            case string[ind,1]
 642            when st
 643              depth += 1
 644            when en
 645              depth -= 1
 646            end
 647            ind += 1
 648          end while depth > 0 and ind < string.length
 649          return nil unless depth==0
 650          [string[ind..-1], string[0..ind-1]]
 651        end
 652        
 653        def parse_args( string )
 654          arguments = []
 655          ind = 0
 656          inquot = false
 657          inapos = false
 658          depth = 1
 659          begin
 660            case string[ind]
 661            when ?"
 662              inquot = !inquot unless inapos
 663            when ?'
 664              inapos = !inapos unless inquot
 665            else
 666              unless inquot or inapos
 667                case string[ind]
 668                when ?(
 669                  depth += 1
 670                  if depth == 1
 671                    string = string[1..-1]
 672                    ind -= 1
 673                  end
 674                when ?)
 675                  depth -= 1
 676                  if depth == 0
 677                    s = string[0,ind].strip
 678                    arguments << s unless s == ""
 679                    string = string[ind+1..-1]
 680                  end
 681                when ?,
 682                  if depth == 1
 683                    s = string[0,ind].strip
 684                    arguments << s unless s == ""
 685                    string = string[ind+1..-1]
 686                    ind = -1 
 687                  end
 688                end
 689              end
 690            end
 691            ind += 1
 692          end while depth > 0 and ind < string.length
 693          return nil unless depth==0
 694          [string,arguments]
 695        end
 696      end
 697    end
 698  end