File: lexical/lexer/token.rb

Overview
Module Structure
Class Hierarchy
Code

Overview

Module Structure

  module: <Toplevel Module>
  module: Umu#6
  module: Lexical#8
  module: Lexer#10
  class: Token#12
inherits from
  Abstract ( Umu::Lexical::Lexer )
has properties
constant: IDENT_WORD #14
constant: MODULE_DIRECTORY_PATTERN #17
constant: IDENT_PATTERN #18
constant: RESERVED_WORDS #20
constant: RESERVED_SYMBOLS #77
constant: IDENTIFIER_SYMBOLS #104
constant: BRAKET_PAIRS #113
constant: BRAKET_MAP_OF_BEGIN_TO_END #142
constant: BEGIN_BRAKET_SYMBOLS #149
constant: END_BRAKET_SYMBOLS #156
constant: SYMBOL_PATTERNS #161
method: lex / 1 #177

Class Hierarchy

Code

   1  # coding: utf-8
   2  # frozen_string_literal: true
   3 
   4 
   5 
   6  module Umu
   7 
   8  module Lexical
   9 
  10  module Lexer
  11 
  12  class Token < Abstract
  13 
  14  IDENT_WORD = '(_*[[:alpha:]][[:alnum:]]*(\-[[:alnum:]]+)*_*[\?!]?\'*)'
  15  # See -> https://qiita.com/Takayuki_Nakano/items/8d38beaddb84b488d683
  16 
  17  MODULE_DIRECTORY_PATTERN    = Regexp.new IDENT_WORD + '::'
  18  IDENT_PATTERN               = Regexp.new '([@$\.])?' + IDENT_WORD + '(:)?'
  19 
  20  RESERVED_WORDS = [
  21      '__FILE__',     '__LINE__',
  22      'and',          'assert',
  23      'case',         'cond',
  24      'delay',        'do',
  25      'else',         'elsif',
  26      'fun',
  27      'if',           'import',           'in',
  28      'kind-of?',
  29      'let',
  30      'mod',
  31      'of',
  32      'pow',
  33      'rec',
  34      'struct',       'structure',
  35      'then',
  36      'val',
  37      'where',
  38 
  39      # Not used, but reserved for future
  40 
  41      # For pattern matching
  42      'as',
  43 
  44      # For pragma
  45      'export', 'pragma', 'use',
  46 
  47      # For module language
  48      'functor', 'signat', 'signature',
  49 
  50      # For data type declaration
  51      'data', 'datum', 'type',
  52 
  53      # For class declaration
  54      'abstract', 'alias', 'class', 'def',
  55      'has', 'is-a', 'protocol', 'self', 'super', 'with',
  56 
  57      # For infix operator declaration
  58      'infixl', 'infixr',
  59 
  60      # For continuation
  61      'callcc', 'throw',
  62 
  63      # For exception
  64      'begin', 'ensure', 'raise', 'rescue',
  65 
  66      # For lazy evaluation
  67      'lazy',
  68 
  69      # For non-determinism
  70      'none', 'or'
  71  ].inject({}) { |hash, word|
  72      hash.merge(word => true) { |key, _, _|
  73          ASSERT.abort format("Duplicated reserved-word: '%s'", key)
  74      }
  75  }
  76 
  77  RESERVED_SYMBOLS = [
  78      '=',    '$',    '!',    '_',    ',',
  79      '&',    '|',
  80      '&&',   '||',
  81      '.',    ':',    ';',
  82      '..',   '::',   ';;',
  83      '->',   '<-',
  84      '<<',   '>>',   '<|',   '|>',
  85 
  86      # Redefinable symbols
  87      '+',    '-',    '*',    '/',    '^',
  88      '==',   '<>',   '<',    '>',    '<=',   '>=',   '<=>',
  89      '++',
  90      ':=',
  91 
  92      # Not used, but reserved for future
  93      '?',    # Propagating errors in DO-expression
  94      '...',  # Range (exclude last value)
  95      ':/:',  # Junction for component oriented design
  96      ']|['   # Guard separator for concurrency
  97  ].inject({}) { |hash, x|
  98      hash.merge(x => true) { |key, _, _|
  99          ASSERT.abort format("Duplicated reserved-symbol: '%s'", key)
 100      }
 101  }
 102 
 103 
 104  IDENTIFIER_SYMBOLS = [
 105      '!!'    # Peek operator for reference type
 106  ].inject({}) { |hash, x|
 107      hash.merge(x => true) { |key, _, _|
 108          ASSERT.abort format("Duplicated identifier-symbol: '%s'", key)
 109      }
 110  }
 111 
 112 
 113  BRAKET_PAIRS = [
 114      ['(',   ')'],   # Tuple, etc
 115      ['[',   ']'],   # List
 116      ['{',   '}'],   # Lambda, etc
 117      ['.(',  ')'],   # Message
 118      ['.[',  ']'],   # Apply Message
 119      ['%[',  ']'],   # Polymorphic pattern
 120      ['%S(', ')'],   # S-Expression
 121      ['%{',  '}'],   # Embeded-expression in S-expression (and Map)
 122      ['&(',  ')'],   # Instance message
 123      ['&[',  ']'],   # Cell stream
 124      ['&{',  '}'],   # Memorized and Suspended stream
 125      ['$(',  ')'],   # Named tuple modifier
 126 
 127 
 128      # Not used, but reserved for future
 129 
 130      ['%q[', ']'],   # Queue
 131      ['%v[', ']'],   # Vector
 132      ['%a[', ']'],   # Array
 133      ['%J[', ']'],   # JSON
 134      ['%X[', ']'],   # XML
 135      ['%(',  ')'],   # Set
 136      ['@[',  ']'],   # Assoc -- Key-Value list
 137      ['@(',  ')'],   # Dict  -- Key-Value set
 138      ['$[',  ']']    # Exchange -- Communication channel for concurrency
 139  ]
 140 
 141 
 142  BRAKET_MAP_OF_BEGIN_TO_END = BRAKET_PAIRS.inject({}) { |hash, (bb, eb)|
 143      hash.merge(bb => eb) { |key, _, _|
 144          ASSERT.abort format("Duplicated begin-braket: '%s'", key)
 145      }
 146  }
 147 
 148 
 149  BEGIN_BRAKET_SYMBOLS = BRAKET_PAIRS.inject({}) { |hash, (bb, _eb)|
 150      hash.merge(bb => true) { |key, _, _|
 151          ASSERT.abort format("Duplicated begin-braket: '%s'", key)
 152      }
 153  }
 154 
 155 
 156  END_BRAKET_SYMBOLS = BRAKET_PAIRS.inject({}) { |hash, (_bb, eb)|
 157      hash.merge(eb => true)
 158  }
 159 
 160 
 161  SYMBOL_PATTERNS = [
 162      RESERVED_SYMBOLS,
 163      IDENTIFIER_SYMBOLS,
 164      BEGIN_BRAKET_SYMBOLS,
 165      END_BRAKET_SYMBOLS
 166  ].inject({}) { |acc_hash, elem_hash|
 167      acc_hash.merge(elem_hash) { |key, _, _|
 168          ASSERT.abort format("Duplicated symbol: '%s'", key)
 169      }
 170  }.keys.sort { |x, y|
 171      y.length <=> x.length   # For longest-match
 172  }.map { |s|
 173      Regexp.new Regexp.escape(s)
 174  }
 175 
 176 
 177      def lex(scanner)
 178          ASSERT.kind_of scanner, ::StringScanner
 179 
 180          case
 181          # Float or Int
 182          when scanner.scan(/[+-]?\d+(\.\d+)?/)
 183              [
 184                  :Number,
 185 
 186                  scanner.matched,
 187 
 188                  [
 189                      if scanner[1]
 190                          LT.make_float self.loc, scanner.matched.to_f
 191                      else
 192                          LT.make_integer self.loc, scanner.matched.to_i
 193                      end
 194                  ],
 195 
 196                  __make_separator__
 197              ]
 198 
 199          # Number-Selector
 200          when scanner.scan(/\$(\d+)/)
 201              [
 202                  :NumberSelector,
 203 
 204                  scanner.matched,
 205 
 206                  [LT.make_number_selector(self.loc, scanner[1].to_i)],
 207 
 208                  __make_separator__
 209              ]
 210 
 211          # Begin-String
 212          when scanner.scan(/(@)?"/)
 213              [
 214                  :BeginString,
 215 
 216                  scanner.matched,
 217 
 218                  [],
 219 
 220                  if scanner[1]
 221                      __make_symbolized_string__('')
 222                  else
 223                      __make_string__('')
 224                  end
 225              ]
 226 
 227 
 228          # Module identifier word
 229          when scanner.scan(MODULE_DIRECTORY_PATTERN)
 230              body_matched = scanner[1]
 231 
 232              [
 233                  :Word,
 234 
 235                  scanner.matched,
 236 
 237                  [LT.make_module_directory(self.loc, body_matched)],
 238 
 239                  __make_separator__
 240              ]
 241 
 242 
 243          # Symbol, Message, Reserved-word or Identifier-word
 244          when scanner.scan(IDENT_PATTERN)
 245              head_matched = scanner[1]
 246              body_matched = scanner[2]
 247              tail_matched = scanner[4]
 248 
 249              [
 250                  :Word,
 251 
 252                  scanner.matched,
 253 
 254                  [
 255                      if head_matched
 256                          if tail_matched
 257                              raise X::LexicalError.new(
 258                                  self.loc,
 259                                  "Invalid character: ':' in word: '%s'",
 260                                      scanner.matched
 261                              )
 262                          end
 263 
 264                          case head_matched
 265                          when '@'
 266                              LT.make_symbol         self.loc, body_matched
 267                          when '$'
 268                              LT.make_label_selector self.loc, body_matched
 269                          when '.'
 270                              LT.make_message        self.loc, body_matched
 271                          else
 272                              ASSERT.abort head_matched
 273                          end
 274                      else
 275                          if tail_matched
 276                              LT.make_label self.loc, body_matched
 277                          else
 278                              if RESERVED_WORDS[body_matched]
 279                                  LT.make_reserved_word self.loc, body_matched
 280                              else
 281                                  LT.make_identifier self.loc, body_matched
 282                              end
 283                          end
 284                      end
 285                  ],
 286 
 287                  __make_separator__
 288              ]
 289 
 290 
 291          # Reserved-symbol or Identifier-symbol
 292          when SYMBOL_PATTERNS.any? { |pat| scanner.scan pat }
 293              matched = scanner.matched
 294 
 295              if RESERVED_SYMBOLS[matched]
 296                  [
 297                      :ReservedSymbol,
 298 
 299                      scanner.matched,
 300 
 301                      [LT.make_reserved_symbol(self.loc, matched)],
 302 
 303                      __make_separator__
 304                  ]
 305              elsif IDENTIFIER_SYMBOLS[matched]
 306                  [
 307                      :IdentifierSymbol,
 308 
 309                      scanner.matched,
 310 
 311                      [LT.make_identifier(self.loc, matched)],
 312 
 313                      __make_separator__
 314                  ]
 315              elsif BEGIN_BRAKET_SYMBOLS[matched]
 316                  [
 317                      :BeginBraket,
 318 
 319                      scanner.matched,
 320 
 321                      [LT.make_reserved_symbol(self.loc, matched)],
 322 
 323                      __make_separator__(
 324                          self.loc, [matched] + self.braket_stack
 325                      )
 326                  ]
 327              elsif END_BRAKET_SYMBOLS[matched]
 328                  bb, *stack = self.braket_stack
 329                  unless bb   # Is stack empty?
 330                      raise X::LexicalError.new(
 331                          self.loc,
 332                          "Unexpected end-braket: '%s'", matched
 333                      )
 334                  end
 335 
 336                  eb = BRAKET_MAP_OF_BEGIN_TO_END[bb]
 337                  unless eb
 338                      ASSERT.abort self.inspect
 339                  end
 340 
 341                  if matched == eb
 342                      [
 343                          :EndBraket,
 344 
 345                          scanner.matched,
 346                          
 347                          [LT.make_reserved_symbol(self.loc, matched)],
 348 
 349                          __make_separator__(self.loc, stack)
 350                      ]
 351                  else
 352                      raise X::LexicalError.new(
 353                          self.loc,
 354                          "Mismatch of brakets: '%s' .... '%s'",
 355                                                bb,       matched
 356                      )
 357                  end
 358              else
 359                  ASSERT.abort matched
 360              end
 361 
 362          # Unmatched
 363          else
 364              raise X::LexicalError.new(
 365                  self.loc,
 366                  "Can't recognized as token: '%s'", scanner.inspect
 367              )
 368          end
 369      end
 370  end
 371 
 372  end # Umu::Lexical::Lexer
 373 
 374  end # Umu::Lexical
 375 
 376  end # Umu