1 # coding: utf-8 2 # frozen_string_literal: true 3 4 5 require 'strscan' 6 7 require_relative 'lexer/abstract' 8 require_relative 'lexer/separator' 9 require_relative 'lexer/comment' 10 require_relative 'lexer/token' 11 require_relative 'lexer/string' 12 require_relative 'lexer/entry' 13 14 15 16 =begin 17 The lexcical analyzer is modeled as a finite state machine with a stack. 18 Here we show the definition of a model using a fictitious domain 19 specific language (DSL). 20 This domain specific language was inspired by Estelle, 21 a formal specification description language for communication protocols. 22 23 24 字句解析器は、スタック付き有限状態機械としてモデル化される。 25 ここでは、架空のドメイン固有言語(DSL)によるモデルの定義を示す。 26 このドメイン記述言語は、通信プロトコルの 27 形式的仕様記述言語であるEstelleから発想を得て考案した。 28 29 Estelleに関する文献を以下に示す。 30 * 通信システムの形式記述技法の標準化: 31 Estelle言語の特質と処理系の現状と動向 32 情報処理, Vol.31 No.1, Jan. 1990, 岡田康治, IPSJ-MGN310109 33 * プロトコル言語 34 1994年, 監修 水野忠則, 発行 (株)カットシステム, ISBN 4-906391-08-7 35 36 37 ####################################### 38 ##### State Machine Specification #### 39 ####################################### 40 41 %STRUCTURE Lexer { 42 %DATA Pattern { 43 | PatBeginComment String 44 | PatEndComment String 45 | PatNewline String 46 | PatSpace String 47 | PatBeginString String 48 | PatEndString String 49 | PatBeginBraket String 50 | PatEndBraket String 51 | PatNumber String 52 | PatWord String 53 | Any String 54 } 55 56 %DATA Token { 57 | TokNewline (String, Int) /* NL(String) */ 58 | TokSpace (String, Int) /* SP(String) */ 59 | TokComment (String, Int) /* COMMENT(String) */ 60 61 | TokBeginBraket (String, Int) 62 | TokEndBraket (String, Int) 63 64 | TokReserved (Symbol, Int) /* IF, THEN, ELSE, ... etc */ 65 66 | TokIdentifier (Symbol, Int) /* ID(Symbol) or */ 67 /* MSG(Symbol) or */ 68 /* LABEL(Symbol) or */ 69 /* DIR(Symbol) or */ 70 /* NSEL(Int) or */ 71 /* LSEL(Symbol) or */ 72 73 | TokNumber (Number, Int) /* INT(Int) or */ 74 /* FLOAT(Float) */ 75 76 | TokSymbol (Symbol, Int) /* SYM(Symbol) */ 77 | TokString (String, Int) /* STRING(String) */ 78 } 79 80 %INPUT %EVENT = Pattern 81 %OUTPUT %EVENT = Token 82 83 84 %STRUCTURE Abstraction { 85 %ABSTRACT %STATE Abstract %HAS { 86 line-num: Int, 87 braket-stack: String List 88 } 89 90 %ABSTRACT %STATE String %IS-A Abstract %HAS { 91 buf: String 92 } 93 } 94 95 %STATE Separator %IS-A Abstraction::Abstract 96 97 %STATE Comment %IS-A Abstraction::Abstract %HAS { 98 buf: String, 99 saved-line-num: Int, 100 comment-depth: Int 101 } 102 103 %STATE Token %IS-A Abstraction::Abstract 104 105 %STATE BasicString %IS-A Abstraction::String 106 %STATE SymbolizedString %IS-A Abstraction::String 107 108 %INITIAL %STATE = %NEW Separator {line-num: 1 braket-stack: []} 109 110 %VAL braket-map = %{"(" -> ")", "[" -> "]", "{" -> "}", ....} 111 112 113 %TRANSITION { 114 %FROM Separator {line-num:} 115 %WHEN PatBeginComment _ 116 %TO Comment { 117 buf: "" saved-line-num: line-num comment-depth: 1 118 } 119 %WHEN PatNewline matched 120 %OUTPUT TokNewline (matched, line-num) 121 %TO %SAME {line-num: line-num + 1} 122 %WHEN PatSpace matched 123 %OUTPUT TokSpace (matched, line-num) 124 %WHEN %ANY 125 %TO Token 126 | %FROM Comment { 127 buf: 128 saved-line-num: saved-line-num 129 comment-depth: depth 130 } 131 %WHEN PatBeginComment matched 132 %TO %SAME {buf: buf ^ matched comment-depth: depth + 1} 133 %WHEN PatEndComment matched 134 ! %IF depth <= 1 %THEN 135 %OUTPUT TokComment (buf, saved-line-num) 136 %TO Separator 137 %ELSE 138 %TO %SAME {buf: buf ^ matched comment-depth: depth - 1} 139 %WHEN PatNewline matched 140 %TO %SAME {line-num: line-num + 1 buf: buf ^ matched} 141 %WHEN PatAny matched 142 %TO %SAME {buf: buf ^ matched} 143 %WHEN %ANY 144 %ABORT "No case" 145 | %FROM Token {line-num: braket-stack: stack} 146 %WHEN PatBeginString 147 ! %IF symbol? 148 %TO SymbolizedString {buf: ""} 149 %ELSE 150 %TO BasicString {buf: ""} 151 %WHEN PatBeginBraket matched 152 %OUTPUT BeginBraket (matched, line-num) 153 %TO Separator {braket-stack: [matched|stack]} 154 %WHEN PatEndBraket matched-eb 155 ! %CASE stack %OF { 156 [] -> %ERROR "Unexpected end-braket" 157 | [bb|stack'] -> 158 %CASE braket-map.(lookup bb) %OF { 159 | NONE -> %ABORT ("Unknown bracket: " ^ bb) 160 | Some found-eb -> 161 %IF matched-eb == found-eb %THEN 162 %OUTPUT EndBraket (matched-eb, line-num) 163 %TO Separator {braket-stack: stack'} 164 %ELSE 165 %ERROR "Mismatched brakets" 166 } 167 } 168 %WHEN PatNumber matched 169 %OUTPUT TokNumber (to-number matched, line-num) 170 %TO Separator 171 %WHEN PatWord matched 172 ! %IF symbol? %THEN 173 %OUTPUT TokSymbol (to-symbol matched, line-num) 174 %ELSE 175 %IF reserved? %THEN 176 %OUTPUT TokReserved (to-symbol matched, line-num) 177 %ELSE 178 %OUTPUT TokIdentifier (to-symbol matched, line-num) 179 %TO Separator 180 %WHEN %ANY 181 %ERROR "Can't recognized as token" 182 | %FROM AbstractString {buf: ..} 183 %WHEN PatNewline 184 %ERROR "Unexpected end-of-string" 185 %WHEN PatAny matched 186 %TO %SAME {buf: buf ^ matched} 187 | %FROM BasicString {line-num:, buf: ..} 188 %WHEN PatEndString 189 %OUTPUT TokString (buf, line-num) 190 %TO Separator 191 %WHEN %ANY 192 %ABORT 193 | %FROM SymbolizedString {line-num:, buf: ..} 194 %WHEN PatEndString 195 %OUTPUT TokSymbol (buf, line-num) 196 %TO Separator 197 %WHEN %ANY 198 %ABORT 199 } 200 } 201 =end