@@ -5,125 +5,235 @@ type Status int
5
5
type Token struct {
6
6
Value string
7
7
Type TokenType
8
- Next * Token
9
- Prev * Token
8
+ Start [ 2 ] int
9
+ End [ 2 ] int
10
10
}
11
11
12
- func isDigit (b byte ) bool {
13
- return b >= 80 && b <= 57
12
+ type Parser struct {
13
+ CurrentToken Token
14
+ PrevToken Token
15
+ Tokens []Token
16
+ Reader Reader
17
+ inConstDeclaration bool
14
18
}
15
19
16
- func isLetterOrSlash (b byte ) bool {
17
- return isDigit (b ) || (b >= 65 && b <= 90 ) || (b >= 97 && b <= 122 ) || b == 95
20
+ func (parser * Parser ) appendToken () {
21
+ parser .Tokens = append (parser .Tokens , parser .CurrentToken )
22
+ parser .PrevToken = parser .CurrentToken
23
+ parser .CurrentToken = Token {Type : Initial }
24
+ parser .Reader .SkipSpace ()
18
25
}
19
26
20
- func isIllegalChar (b byte ) bool {
21
- // reference: https://zh.wikipedia.org/wiki/ASCII
22
- return b <= 31
27
+ func (parser * Parser ) setCurrentTokenType (t TokenType ) {
28
+ parser .CurrentToken .Type = t
29
+ parser .CurrentToken .Start = [2 ]int {parser .Reader .row , parser .Reader .col }
30
+
31
+ if t == Unknown {
32
+ parser .CurrentToken .Start = [2 ]int {parser .Reader .row , 0 }
33
+
34
+ index := len (parser .Tokens ) - 1
35
+
36
+ for index >= 0 {
37
+ if parser .Tokens [index ].Start [0 ] != parser .Reader .row {
38
+ break
39
+ }
40
+
41
+ index -= 1
42
+ }
43
+
44
+ parser .Tokens = parser .Tokens [0 : index + 1 ]
45
+
46
+ parser .CurrentToken .Value = parser .collectUnknown ()
47
+ } else if t == Assignment {
48
+ parser .CurrentToken .Value = "="
49
+ } else if t == LeftParentheses {
50
+ parser .CurrentToken .Value = "("
51
+ } else if t == RightParentheses {
52
+ parser .CurrentToken .Value = ")"
53
+ }
54
+
55
+ parser .appendToken ()
23
56
}
24
57
25
- func Parse (s string ) []Token {
26
- reader := NewReader (s )
27
- tokenList := []Token {}
28
- currentToken := Token {Type : Initial }
58
+ func (parser * Parser ) collectInt () string {
59
+ result := []byte {parser .Reader .charInByte }
29
60
30
- var next func () (string , byte , error )
61
+ for {
62
+ charInByte , err := parser .Reader .Next ()
63
+
64
+ if err != nil || ! IsDigit (charInByte ) {
65
+ parser .Reader .Back ()
66
+ break
67
+ }
31
68
32
- appendToken := func () {
33
- prevToken := & currentToken
34
- tokenList = append (tokenList , currentToken )
35
- currentToken = Token {Type : Initial , Prev : prevToken }
36
- prevToken .Next = & currentToken
69
+ result = append (result , parser .Reader .charInByte )
37
70
}
38
71
39
- maybeComment := func (char * string ) {
40
- nextChar , _ , _ := next ()
72
+ return string (result )
73
+ }
74
+
75
+ func (parser * Parser ) collectIdentifier () string {
76
+ result := []byte {parser .Reader .charInByte }
77
+
78
+ for {
79
+ charInByte , err := parser .Reader .Next ()
41
80
42
- if nextChar == "/" {
43
- currentToken .Type = LineComment
44
- } else if nextChar == "*" {
45
- currentToken .Type = BlockCommentStart
46
- } else {
47
- currentToken .Type = Unknown
81
+ if err != nil || ! IsLetterOrSlash (charInByte ) {
82
+ parser .Reader .Back ()
83
+ break
48
84
}
49
85
50
- * char += nextChar
86
+ result = append ( result , parser . Reader . charInByte )
51
87
}
52
88
89
+ return string (result )
90
+ }
91
+
92
+ func (parser * Parser ) collectString () string {
93
+ result := []byte {parser .Reader .charInByte }
94
+
53
95
for {
54
- _ , err := reader .Next ()
96
+ charInByte , err := parser . Reader .Next ()
55
97
56
- char := reader .char
57
- charByte := reader .charInByte
98
+ if parser .Reader .char == "\n " {
99
+ parser .Reader .ReportLineError ()
100
+ }
58
101
59
- if err != nil {
102
+ if err != nil || string (charInByte ) != "\" " {
103
+ parser .Reader .Back ()
60
104
break
61
105
}
62
106
63
- switch char {
64
- case "/" :
65
- if char == "/" && currentToken .Type != StringValue && currentToken .Type != LineComment || currentToken .Type != BlockCommentStart {
66
- maybeComment (& char )
67
- continue
68
- }
107
+ result = append (result , parser .Reader .charInByte )
108
+ }
109
+
110
+ return string (result )
111
+ }
112
+
113
+ func (parser * Parser ) collectLineComment () string {
114
+ row := parser .Reader .lines [parser .Reader .row ]
115
+ result := string (row [parser .Reader .col + 1 :])
116
+
117
+ parser .Reader .SkipLine ()
118
+
119
+ return result
120
+ }
121
+
122
+ func (parser * Parser ) collectUnknown () string {
123
+ parser .Reader .col = - 1
124
+ result := []byte {}
125
+ firstFlag := true
126
+
127
+ for {
128
+ _ , err := parser .Reader .Next ()
129
+
130
+ if err != nil || (! firstFlag && IsLetterOrSlash (parser .Reader .charInByte ) && parser .Reader .col == 0 ) {
131
+ parser .Reader .Back ()
132
+ break
69
133
}
70
134
71
- switch currentToken .Type {
72
- case Initial :
73
- if isLetterOrSlash (charByte ) {
74
- currentToken .Type = Indetifier
75
- } else if isDigit (charByte ) {
76
- currentToken .Type = IntValue
77
- }
135
+ firstFlag = false
78
136
79
- currentToken .Value = char
80
- case IntValue :
81
- if isIllegalChar (charByte ) {
82
- appendToken ()
83
- // skipSpace()
84
- break
85
- }
137
+ result = append (result , parser .Reader .charInByte )
138
+ }
86
139
87
- if isLetterOrSlash (charByte ) {
88
- currentToken .Type = Indetifier
89
- } else {
90
- // error()
91
- }
140
+ return string (result )
141
+ }
92
142
93
- currentToken .Value += char
94
- case StringValue :
95
- if char == "\" " {
96
- tokenList = append (tokenList , currentToken )
97
- // skipSpace()
98
- break
99
- }
143
+ func (parser * Parser ) getIdentifierTokenType (id string ) TokenType {
144
+ switch id {
145
+ case "const" :
146
+ parser .Reader .SkipSpace ()
147
+ _ , err := parser .Reader .Next ()
148
+
149
+ if err != nil {
150
+ parser .Reader .ReportLineError ()
151
+ }
152
+
153
+ if parser .Reader .char != "(" {
154
+ return Unknown
155
+ }
156
+
157
+ parser .Reader .Back ()
158
+ parser .inConstDeclaration = true
159
+ return Const
160
+ case "type" :
161
+ return Type
162
+ case "string" :
163
+ return StringType
164
+ case "int" :
165
+ return IntType
166
+ case "iota" :
167
+ return IOTA
168
+ default :
169
+ return Indetifier
170
+ }
171
+ }
172
+
173
+ func NewParser (s string ) Parser {
174
+ reader := NewReader (s )
175
+
176
+ return Parser {
177
+ Reader : * reader ,
178
+ CurrentToken : Token {Type : Initial },
179
+ Tokens : []Token {},
180
+ }
181
+ }
182
+
183
+ func (parser * Parser ) Parse () []Token {
184
+ for {
185
+ charInByte , err := parser .Reader .Next ()
186
+
187
+ if err != nil {
188
+ break
189
+ }
100
190
101
- if isIllegalChar (charByte ) {
102
- // error()
191
+ switch string (charInByte ) {
192
+ case "=" :
193
+ if parser .inConstDeclaration {
194
+ parser .setCurrentTokenType (Assignment )
195
+ } else {
196
+ parser .setCurrentTokenType (Unknown )
103
197
}
104
- case Indetifier :
105
- if isIllegalChar (charByte ) || char == " " {
106
- switch currentToken .Value {
107
- case "type" :
108
- currentToken .Type = Type
109
- case "const" :
110
- currentToken .Type = Const
111
- case "package" :
112
- currentToken .Type = Package
113
- }
114
-
115
- appendToken ()
116
- break
198
+ case "(" :
199
+ if parser .PrevToken .Type == Const {
200
+ parser .setCurrentTokenType (LeftParentheses )
201
+ } else {
202
+ parser .setCurrentTokenType (Unknown )
117
203
}
204
+ case ")" :
205
+ parser .setCurrentTokenType (RightParentheses )
206
+ parser .inConstDeclaration = false
207
+ case "/" :
208
+ nextCharInByte , err := parser .Reader .Next ()
118
209
119
- if isLetterOrSlash (charByte ) {
120
- currentToken .Value += char
121
- break
210
+ if err != nil {
211
+ parser .Reader .ReportLineError ()
122
212
}
123
213
124
- // error()
214
+ if string (nextCharInByte ) == "/" {
215
+ parser .CurrentToken .Value = parser .collectLineComment ()
216
+ parser .setCurrentTokenType (LineComment )
217
+ } else if string (nextCharInByte ) == "*" {
218
+ parser .setCurrentTokenType (LeftParentheses )
219
+ } else {
220
+ parser .setCurrentTokenType (Unknown )
221
+ }
222
+ case "\" " :
223
+ parser .setCurrentTokenType (StringValue )
224
+ parser .CurrentToken .Value = parser .collectString ()
225
+ default :
226
+ if IsDigit (charInByte ) {
227
+ parser .CurrentToken .Value = parser .collectInt ()
228
+ parser .setCurrentTokenType (IntValue )
229
+ } else if IsLetterOrSlash (charInByte ) {
230
+ parser .CurrentToken .Value = parser .collectIdentifier ()
231
+ parser .setCurrentTokenType (parser .getIdentifierTokenType (parser .CurrentToken .Value ))
232
+ } else {
233
+ parser .setCurrentTokenType (Unknown )
234
+ }
125
235
}
126
236
}
127
237
128
- return tokenList
238
+ return parser . Tokens
129
239
}
0 commit comments