anc95
diff --git a/‎main.go
Lines changed: 2 additions & 12 deletions b/‎main.go
Lines changed: 2 additions & 12 deletions
diff --git a/‎src/token/parse.go
Lines changed: 194 additions & 84 deletions b/‎src/token/parse.go
Lines changed: 194 additions & 84 deletions
@@ -7,19 +7,9 @@ import (
 )
 
 func main() {
-	// file, err := os.Open("./main.go")
-
-	// if err != nil {
-	// 	panic(err)
-	// }
-
-	// defer file.Close()
-	// content, _ := ioutil.ReadAll(file)
-
-	// fmt.Print((string(content)))
-
-	a := token.Parse("1=a")
+	parser := token.NewParser("func() {\n hell\n xxx\n dsdasdsa\n \n} \na=1\ntype C string //hello\nconst ( A C = 1 \n B \n D")
 
+	a := parser.Parse()
 	for _, v := range a {
 		fmt.Printf("[type: %d, value: %s]\n", int(v.Type), v.Value)
 	}
 
@@ -5,125 +5,235 @@ type Status int
 type Token struct {
 	Value string
 	Type  TokenType
-	Next  *Token
-	Prev  *Token
+	Start [2]int
+	End   [2]int
 }
 
-func isDigit(b byte) bool {
-	return b >= 80 && b <= 57
+type Parser struct {
+	CurrentToken       Token
+	PrevToken          Token
+	Tokens             []Token
+	Reader             Reader
+	inConstDeclaration bool
 }
 
-func isLetterOrSlash(b byte) bool {
-	return isDigit(b) || (b >= 65 && b <= 90) || (b >= 97 && b <= 122) || b == 95
+func (parser *Parser) appendToken() {
+	parser.Tokens = append(parser.Tokens, parser.CurrentToken)
+	parser.PrevToken = parser.CurrentToken
+	parser.CurrentToken = Token{Type: Initial}
+	parser.Reader.SkipSpace()
 }
 
-func isIllegalChar(b byte) bool {
-	// reference: https://zh.wikipedia.org/wiki/ASCII
-	return b <= 31
+func (parser *Parser) setCurrentTokenType(t TokenType) {
+	parser.CurrentToken.Type = t
+	parser.CurrentToken.Start = [2]int{parser.Reader.row, parser.Reader.col}
+
+	if t == Unknown {
+		parser.CurrentToken.Start = [2]int{parser.Reader.row, 0}
+
+		index := len(parser.Tokens) - 1
+
+		for index >= 0 {
+			if parser.Tokens[index].Start[0] != parser.Reader.row {
+				break
+			}
+
+			index -= 1
+		}
+
+		parser.Tokens = parser.Tokens[0 : index+1]
+
+		parser.CurrentToken.Value = parser.collectUnknown()
+	} else if t == Assignment {
+		parser.CurrentToken.Value = "="
+	} else if t == LeftParentheses {
+		parser.CurrentToken.Value = "("
+	} else if t == RightParentheses {
+		parser.CurrentToken.Value = ")"
+	}
+
+	parser.appendToken()
 }
 
-func Parse(s string) []Token {
-	reader := NewReader(s)
-	tokenList := []Token{}
-	currentToken := Token{Type: Initial}
+func (parser *Parser) collectInt() string {
+	result := []byte{parser.Reader.charInByte}
 
-	var next func() (string, byte, error)
+	for {
+		charInByte, err := parser.Reader.Next()
+
+		if err != nil || !IsDigit(charInByte) {
+			parser.Reader.Back()
+			break
+		}
 
-	appendToken := func() {
-		prevToken := &currentToken
-		tokenList = append(tokenList, currentToken)
-		currentToken = Token{Type: Initial, Prev: prevToken}
-		prevToken.Next = &currentToken
+		result = append(result, parser.Reader.charInByte)
 	}
 
-	maybeComment := func(char *string) {
-		nextChar, _, _ := next()
+	return string(result)
+}
+
+func (parser *Parser) collectIdentifier() string {
+	result := []byte{parser.Reader.charInByte}
+
+	for {
+		charInByte, err := parser.Reader.Next()
 
-		if nextChar == "/" {
-			currentToken.Type = LineComment
-		} else if nextChar == "*" {
-			currentToken.Type = BlockCommentStart
-		} else {
-			currentToken.Type = Unknown
+		if err != nil || !IsLetterOrSlash(charInByte) {
+			parser.Reader.Back()
+			break
 		}
 
-		*char += nextChar
+		result = append(result, parser.Reader.charInByte)
 	}
 
+	return string(result)
+}
+
+func (parser *Parser) collectString() string {
+	result := []byte{parser.Reader.charInByte}
+
 	for {
-		_, err := reader.Next()
+		charInByte, err := parser.Reader.Next()
 
-		char := reader.char
-		charByte := reader.charInByte
+		if parser.Reader.char == "\n" {
+			parser.Reader.ReportLineError()
+		}
 
-		if err != nil {
+		if err != nil || string(charInByte) != "\"" {
+			parser.Reader.Back()
 			break
 		}
 
-		switch char {
-		case "/":
-			if char == "/" && currentToken.Type != StringValue && currentToken.Type != LineComment || currentToken.Type != BlockCommentStart {
-				maybeComment(&char)
-				continue
-			}
+		result = append(result, parser.Reader.charInByte)
+	}
+
+	return string(result)
+}
+
+func (parser *Parser) collectLineComment() string {
+	row := parser.Reader.lines[parser.Reader.row]
+	result := string(row[parser.Reader.col+1:])
+
+	parser.Reader.SkipLine()
+
+	return result
+}
+
+func (parser *Parser) collectUnknown() string {
+	parser.Reader.col = -1
+	result := []byte{}
+	firstFlag := true
+
+	for {
+		_, err := parser.Reader.Next()
+
+		if err != nil || (!firstFlag && IsLetterOrSlash(parser.Reader.charInByte) && parser.Reader.col == 0) {
+			parser.Reader.Back()
+			break
 		}
 
-		switch currentToken.Type {
-		case Initial:
-			if isLetterOrSlash(charByte) {
-				currentToken.Type = Indetifier
-			} else if isDigit(charByte) {
-				currentToken.Type = IntValue
-			}
+		firstFlag = false
 
-			currentToken.Value = char
-		case IntValue:
-			if isIllegalChar(charByte) {
-				appendToken()
-				// skipSpace()
-				break
-			}
+		result = append(result, parser.Reader.charInByte)
+	}
 
-			if isLetterOrSlash(charByte) {
-				currentToken.Type = Indetifier
-			} else {
-				// error()
-			}
+	return string(result)
+}
 
-			currentToken.Value += char
-		case StringValue:
-			if char == "\"" {
-				tokenList = append(tokenList, currentToken)
-				// skipSpace()
-				break
-			}
+func (parser *Parser) getIdentifierTokenType(id string) TokenType {
+	switch id {
+	case "const":
+		parser.Reader.SkipSpace()
+		_, err := parser.Reader.Next()
+
+		if err != nil {
+			parser.Reader.ReportLineError()
+		}
+
+		if parser.Reader.char != "(" {
+			return Unknown
+		}
+
+		parser.Reader.Back()
+		parser.inConstDeclaration = true
+		return Const
+	case "type":
+		return Type
+	case "string":
+		return StringType
+	case "int":
+		return IntType
+	case "iota":
+		return IOTA
+	default:
+		return Indetifier
+	}
+}
+
+func NewParser(s string) Parser {
+	reader := NewReader(s)
+
+	return Parser{
+		Reader:       *reader,
+		CurrentToken: Token{Type: Initial},
+		Tokens:       []Token{},
+	}
+}
+
+func (parser *Parser) Parse() []Token {
+	for {
+		charInByte, err := parser.Reader.Next()
+
+		if err != nil {
+			break
+		}
 
-			if isIllegalChar(charByte) {
-				// error()
+		switch string(charInByte) {
+		case "=":
+			if parser.inConstDeclaration {
+				parser.setCurrentTokenType(Assignment)
+			} else {
+				parser.setCurrentTokenType(Unknown)
 			}
-		case Indetifier:
-			if isIllegalChar(charByte) || char == " " {
-				switch currentToken.Value {
-				case "type":
-					currentToken.Type = Type
-				case "const":
-					currentToken.Type = Const
-				case "package":
-					currentToken.Type = Package
-				}
-
-				appendToken()
-				break
+		case "(":
+			if parser.PrevToken.Type == Const {
+				parser.setCurrentTokenType(LeftParentheses)
+			} else {
+				parser.setCurrentTokenType(Unknown)
 			}
+		case ")":
+			parser.setCurrentTokenType(RightParentheses)
+			parser.inConstDeclaration = false
+		case "/":
+			nextCharInByte, err := parser.Reader.Next()
 
-			if isLetterOrSlash(charByte) {
-				currentToken.Value += char
-				break
+			if err != nil {
+				parser.Reader.ReportLineError()
 			}
 
-			// error()
+			if string(nextCharInByte) == "/" {
+				parser.CurrentToken.Value = parser.collectLineComment()
+				parser.setCurrentTokenType(LineComment)
+			} else if string(nextCharInByte) == "*" {
+				parser.setCurrentTokenType(LeftParentheses)
+			} else {
+				parser.setCurrentTokenType(Unknown)
+			}
+		case "\"":
+			parser.setCurrentTokenType(StringValue)
+			parser.CurrentToken.Value = parser.collectString()
+		default:
+			if IsDigit(charInByte) {
+				parser.CurrentToken.Value = parser.collectInt()
+				parser.setCurrentTokenType(IntValue)
+			} else if IsLetterOrSlash(charInByte) {
+				parser.CurrentToken.Value = parser.collectIdentifier()
+				parser.setCurrentTokenType(parser.getIdentifierTokenType(parser.CurrentToken.Value))
+			} else {
+				parser.setCurrentTokenType(Unknown)
+			}
 		}
 	}
 
-	return tokenList
+	return parser.Tokens
 }