Skip to content

Commit b683e9d

Browse files
committed
chore: tokenize almost ready
1 parent 03559b2 commit b683e9d

File tree

5 files changed

+245
-107
lines changed

5 files changed

+245
-107
lines changed

main.go

Lines changed: 2 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -7,19 +7,9 @@ import (
77
)
88

99
func main() {
10-
// file, err := os.Open("./main.go")
11-
12-
// if err != nil {
13-
// panic(err)
14-
// }
15-
16-
// defer file.Close()
17-
// content, _ := ioutil.ReadAll(file)
18-
19-
// fmt.Print((string(content)))
20-
21-
a := token.Parse("1=a")
10+
parser := token.NewParser("func() {\n hell\n xxx\n dsdasdsa\n \n} \na=1\ntype C string //hello\nconst ( A C = 1 \n B \n D")
2211

12+
a := parser.Parse()
2313
for _, v := range a {
2414
fmt.Printf("[type: %d, value: %s]\n", int(v.Type), v.Value)
2515
}

src/token/parse.go

Lines changed: 194 additions & 84 deletions
Original file line numberDiff line numberDiff line change
@@ -5,125 +5,235 @@ type Status int
55
type Token struct {
66
Value string
77
Type TokenType
8-
Next *Token
9-
Prev *Token
8+
Start [2]int
9+
End [2]int
1010
}
1111

12-
func isDigit(b byte) bool {
13-
return b >= 80 && b <= 57
12+
type Parser struct {
13+
CurrentToken Token
14+
PrevToken Token
15+
Tokens []Token
16+
Reader Reader
17+
inConstDeclaration bool
1418
}
1519

16-
func isLetterOrSlash(b byte) bool {
17-
return isDigit(b) || (b >= 65 && b <= 90) || (b >= 97 && b <= 122) || b == 95
20+
func (parser *Parser) appendToken() {
21+
parser.Tokens = append(parser.Tokens, parser.CurrentToken)
22+
parser.PrevToken = parser.CurrentToken
23+
parser.CurrentToken = Token{Type: Initial}
24+
parser.Reader.SkipSpace()
1825
}
1926

20-
func isIllegalChar(b byte) bool {
21-
// reference: https://zh.wikipedia.org/wiki/ASCII
22-
return b <= 31
27+
func (parser *Parser) setCurrentTokenType(t TokenType) {
28+
parser.CurrentToken.Type = t
29+
parser.CurrentToken.Start = [2]int{parser.Reader.row, parser.Reader.col}
30+
31+
if t == Unknown {
32+
parser.CurrentToken.Start = [2]int{parser.Reader.row, 0}
33+
34+
index := len(parser.Tokens) - 1
35+
36+
for index >= 0 {
37+
if parser.Tokens[index].Start[0] != parser.Reader.row {
38+
break
39+
}
40+
41+
index -= 1
42+
}
43+
44+
parser.Tokens = parser.Tokens[0 : index+1]
45+
46+
parser.CurrentToken.Value = parser.collectUnknown()
47+
} else if t == Assignment {
48+
parser.CurrentToken.Value = "="
49+
} else if t == LeftParentheses {
50+
parser.CurrentToken.Value = "("
51+
} else if t == RightParentheses {
52+
parser.CurrentToken.Value = ")"
53+
}
54+
55+
parser.appendToken()
2356
}
2457

25-
func Parse(s string) []Token {
26-
reader := NewReader(s)
27-
tokenList := []Token{}
28-
currentToken := Token{Type: Initial}
58+
func (parser *Parser) collectInt() string {
59+
result := []byte{parser.Reader.charInByte}
2960

30-
var next func() (string, byte, error)
61+
for {
62+
charInByte, err := parser.Reader.Next()
63+
64+
if err != nil || !IsDigit(charInByte) {
65+
parser.Reader.Back()
66+
break
67+
}
3168

32-
appendToken := func() {
33-
prevToken := &currentToken
34-
tokenList = append(tokenList, currentToken)
35-
currentToken = Token{Type: Initial, Prev: prevToken}
36-
prevToken.Next = &currentToken
69+
result = append(result, parser.Reader.charInByte)
3770
}
3871

39-
maybeComment := func(char *string) {
40-
nextChar, _, _ := next()
72+
return string(result)
73+
}
74+
75+
func (parser *Parser) collectIdentifier() string {
76+
result := []byte{parser.Reader.charInByte}
77+
78+
for {
79+
charInByte, err := parser.Reader.Next()
4180

42-
if nextChar == "/" {
43-
currentToken.Type = LineComment
44-
} else if nextChar == "*" {
45-
currentToken.Type = BlockCommentStart
46-
} else {
47-
currentToken.Type = Unknown
81+
if err != nil || !IsLetterOrSlash(charInByte) {
82+
parser.Reader.Back()
83+
break
4884
}
4985

50-
*char += nextChar
86+
result = append(result, parser.Reader.charInByte)
5187
}
5288

89+
return string(result)
90+
}
91+
92+
func (parser *Parser) collectString() string {
93+
result := []byte{parser.Reader.charInByte}
94+
5395
for {
54-
_, err := reader.Next()
96+
charInByte, err := parser.Reader.Next()
5597

56-
char := reader.char
57-
charByte := reader.charInByte
98+
if parser.Reader.char == "\n" {
99+
parser.Reader.ReportLineError()
100+
}
58101

59-
if err != nil {
102+
if err != nil || string(charInByte) != "\"" {
103+
parser.Reader.Back()
60104
break
61105
}
62106

63-
switch char {
64-
case "/":
65-
if char == "/" && currentToken.Type != StringValue && currentToken.Type != LineComment || currentToken.Type != BlockCommentStart {
66-
maybeComment(&char)
67-
continue
68-
}
107+
result = append(result, parser.Reader.charInByte)
108+
}
109+
110+
return string(result)
111+
}
112+
113+
func (parser *Parser) collectLineComment() string {
114+
row := parser.Reader.lines[parser.Reader.row]
115+
result := string(row[parser.Reader.col+1:])
116+
117+
parser.Reader.SkipLine()
118+
119+
return result
120+
}
121+
122+
func (parser *Parser) collectUnknown() string {
123+
parser.Reader.col = -1
124+
result := []byte{}
125+
firstFlag := true
126+
127+
for {
128+
_, err := parser.Reader.Next()
129+
130+
if err != nil || (!firstFlag && IsLetterOrSlash(parser.Reader.charInByte) && parser.Reader.col == 0) {
131+
parser.Reader.Back()
132+
break
69133
}
70134

71-
switch currentToken.Type {
72-
case Initial:
73-
if isLetterOrSlash(charByte) {
74-
currentToken.Type = Indetifier
75-
} else if isDigit(charByte) {
76-
currentToken.Type = IntValue
77-
}
135+
firstFlag = false
78136

79-
currentToken.Value = char
80-
case IntValue:
81-
if isIllegalChar(charByte) {
82-
appendToken()
83-
// skipSpace()
84-
break
85-
}
137+
result = append(result, parser.Reader.charInByte)
138+
}
86139

87-
if isLetterOrSlash(charByte) {
88-
currentToken.Type = Indetifier
89-
} else {
90-
// error()
91-
}
140+
return string(result)
141+
}
92142

93-
currentToken.Value += char
94-
case StringValue:
95-
if char == "\"" {
96-
tokenList = append(tokenList, currentToken)
97-
// skipSpace()
98-
break
99-
}
143+
func (parser *Parser) getIdentifierTokenType(id string) TokenType {
144+
switch id {
145+
case "const":
146+
parser.Reader.SkipSpace()
147+
_, err := parser.Reader.Next()
148+
149+
if err != nil {
150+
parser.Reader.ReportLineError()
151+
}
152+
153+
if parser.Reader.char != "(" {
154+
return Unknown
155+
}
156+
157+
parser.Reader.Back()
158+
parser.inConstDeclaration = true
159+
return Const
160+
case "type":
161+
return Type
162+
case "string":
163+
return StringType
164+
case "int":
165+
return IntType
166+
case "iota":
167+
return IOTA
168+
default:
169+
return Indetifier
170+
}
171+
}
172+
173+
func NewParser(s string) Parser {
174+
reader := NewReader(s)
175+
176+
return Parser{
177+
Reader: *reader,
178+
CurrentToken: Token{Type: Initial},
179+
Tokens: []Token{},
180+
}
181+
}
182+
183+
func (parser *Parser) Parse() []Token {
184+
for {
185+
charInByte, err := parser.Reader.Next()
186+
187+
if err != nil {
188+
break
189+
}
100190

101-
if isIllegalChar(charByte) {
102-
// error()
191+
switch string(charInByte) {
192+
case "=":
193+
if parser.inConstDeclaration {
194+
parser.setCurrentTokenType(Assignment)
195+
} else {
196+
parser.setCurrentTokenType(Unknown)
103197
}
104-
case Indetifier:
105-
if isIllegalChar(charByte) || char == " " {
106-
switch currentToken.Value {
107-
case "type":
108-
currentToken.Type = Type
109-
case "const":
110-
currentToken.Type = Const
111-
case "package":
112-
currentToken.Type = Package
113-
}
114-
115-
appendToken()
116-
break
198+
case "(":
199+
if parser.PrevToken.Type == Const {
200+
parser.setCurrentTokenType(LeftParentheses)
201+
} else {
202+
parser.setCurrentTokenType(Unknown)
117203
}
204+
case ")":
205+
parser.setCurrentTokenType(RightParentheses)
206+
parser.inConstDeclaration = false
207+
case "/":
208+
nextCharInByte, err := parser.Reader.Next()
118209

119-
if isLetterOrSlash(charByte) {
120-
currentToken.Value += char
121-
break
210+
if err != nil {
211+
parser.Reader.ReportLineError()
122212
}
123213

124-
// error()
214+
if string(nextCharInByte) == "/" {
215+
parser.CurrentToken.Value = parser.collectLineComment()
216+
parser.setCurrentTokenType(LineComment)
217+
} else if string(nextCharInByte) == "*" {
218+
parser.setCurrentTokenType(LeftParentheses)
219+
} else {
220+
parser.setCurrentTokenType(Unknown)
221+
}
222+
case "\"":
223+
parser.setCurrentTokenType(StringValue)
224+
parser.CurrentToken.Value = parser.collectString()
225+
default:
226+
if IsDigit(charInByte) {
227+
parser.CurrentToken.Value = parser.collectInt()
228+
parser.setCurrentTokenType(IntValue)
229+
} else if IsLetterOrSlash(charInByte) {
230+
parser.CurrentToken.Value = parser.collectIdentifier()
231+
parser.setCurrentTokenType(parser.getIdentifierTokenType(parser.CurrentToken.Value))
232+
} else {
233+
parser.setCurrentTokenType(Unknown)
234+
}
125235
}
126236
}
127237

128-
return tokenList
238+
return parser.Tokens
129239
}

0 commit comments

Comments
 (0)