Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 27 additions & 9 deletions main.go
Original file line number Diff line number Diff line change
@@ -1,22 +1,40 @@
package main

import (
"fmt"
. "miniSQL/src/Interpreter/lexer"
"strings"

"miniSQL/src/parser"
)

// // 解析的结果
// type LexerResult struct {
// Token int
// Literal string
// }

func main() {
s := strings.NewReader("creat table tabel_name where key <= 1")
l := parser.NewScanner(s)
l := NewScanner(s)
p := NewTokenizer()
result := &LexerResult{}
lastToken := 0
for {
tok, str := l.Scan()
fmt.Print(tok)
fmt.Print(" ")
fmt.Println(str)
if tok == 1 {
break
// fmt.Print(tok)
// fmt.Print(" ")
// fmt.Println(str)
// if tok == 1 {
// break
// }
switch tok {
case T_EOF:
// Stop lex
case T_IDENT, T_INTEGER, T_FLOAT, T_STRING, T_LEFT_PARENTHESIS, T_RIGHT_PARENTHESIS, T_COMMA, T_SEMICOLON, T_EQUAL, T_ANGLE_LEFT, T_ANGLE_RIGHT, T_ANGLE_LEFT_EQUAL, T_ANGLE_RIGHT_EQUAL, T_NOT_EQUAL, T_ASTERISK, T_POINT:
result.Literal = str
// default:
// log.Printf("UnexpectedToken: tok is %d, lit is %s\n", tok, lit)
// return nil, UnexpectedTokenErr
}

result.Token = p.FromStrLit(str, tok, lastToken)
}
}
60 changes: 60 additions & 0 deletions src/Interpreter/lexer/lexer.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
package lexer

import (
"io"
"log"
)

type Token int

// 工具类:接受由Scanner传入的经过基本处理的token,通过lookahead来赋予该token更多信息
type Tokenizer interface {
FromStrLit(lit string, TokenType Token, lastToken int) int
}

// 工具类:初步处理输入,得到基础的token
type Scanner interface {
Scan() (tok Token, lit string)
}

// 模块类:将输入解析为token的总体工具类
type LexerImpl struct {
scanner Scanner // 处理输入的工具类
tokenizer Tokenizer // 进一步赋予token信息的工具类
Result interface{}
}

// 存储类:存储token解析的结果,最终的Lex()主要利用这个对象来返回结果
type LexerResult struct {
Token int
Literal string
}

// 新建一个“将输入解析为token”的总体工具类的实例化对象
func NewLexerImpl(r io.Reader) *LexerImpl {
return &LexerImpl{
scanner: NewScanner(r),
tokenizer: NewTokenizer(),
}
}

func (li *LexerImpl) Lex(lastToken int) (*LexerResult, error) {
result := &LexerResult{}

tok, lit := li.scanner.Scan() // 这里的scanner已经完成了输入流的基础token化

switch tok {
case T_EOF:
// Stop lex
case T_IDENT, T_INTEGER, T_FLOAT, T_STRING, T_LEFT_PARENTHESIS, T_RIGHT_PARENTHESIS, T_COMMA, T_SEMICOLON, T_EQUAL, T_ANGLE_LEFT, T_ANGLE_RIGHT, T_ANGLE_LEFT_EQUAL, T_ANGLE_RIGHT_EQUAL, T_NOT_EQUAL, T_ASTERISK, T_POINT:
result.Literal = lit
default:
log.Printf("UnexpectedToken: tok is %d, lit is %s\n", tok, lit)
// return nil, UnexpectedTokenErr // ToDo:这里的错误机制需要完善
return nil, nil
}

result.Token = li.tokenizer.FromStrLit(lit, tok, lastToken)

return result, nil
}
46 changes: 46 additions & 0 deletions src/Interpreter/lexer/lexer_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
package lexer

import (
"log"
"strings"
"testing"
)

var sql_strings = []string{
// "create table 1_a",
" ",
"create table cxz(" +
"afsdfsad int unique," +
"what char(30) not null," +
"primary key (what)" +
");",
"select a,b,c,d,e,f,g from cxz where a=123 and b=456 or c=234;",
}

//DoTo:这里有个bug就是我们无法正确地停止输入
func TestLexerLex(t *testing.T) {
for _, str := range sql_strings {
LastToken := 0
io := strings.NewReader(str) // 组装io
impl := NewLexerImpl(io) // 组装待测试的LexerImpl
for LastToken != int(T_EOF) {
r, _ := impl.Lex(LastToken)

tokVal := r.Token
literal := r.Literal
LastToken = tokVal

// log.Print(tokVal)
// log.Print(" ")
log.Print(literal)
// log.Print(" ")
// log.Print(LastToken)
if tokVal == 0 {
// 检测是否到达输入末尾
break
}
}

}
t.Errorf("不要慌,我只是想要一个log而已\n")
}
226 changes: 226 additions & 0 deletions src/Interpreter/lexer/scanner.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,226 @@
package lexer

import (
"bufio"
"bytes"
"io"
)

// type Token int

// 词法分析中的初步结果(部分内容可以经过tokenize来形成语义更加明确的token)
const (
// 特殊标记
T_ILLEGAL Token = iota
T_EOF
WS // 空白字符
// 常规类型数据
T_IDENT // ID,此时我们并不区分关键词,而是归类到同一类
T_INTEGER // 整数
T_FLOAT // 浮点数
T_STRING // 字符串
// 其他标记
T_ASTERISK // *
T_COMMA // ,
T_LEFT_PARENTHESIS // (
T_RIGHT_PARENTHESIS // )
T_SEMICOLON // ;
T_EQUAL // =
T_ANGLE_LEFT // <
T_ANGLE_LEFT_EQUAL //<=
T_ANGLE_RIGHT_EQUAL //>=
T_ANGLE_RIGHT // >
T_NOT_EQUAL // <> or !=
T_POINT // .
)

type State int // 状态机的状态

const (
STATE_INIT State = iota
STATE_INTEGER
STATE_POINT
STATE_FRACTION
STATE_IDENT
STATE_ANGLE_LEFT
STATE_ANGLE_RIGHT
STATE_END
)

type CharType int // 单个字符的数据类型

const (
NUM CharType = iota
CHAR
SPECIAL_SYMBOL
ILLEGAL_SYMBOL
SPACE
UNDERLINE
)

// eof represents a marker rune for the end of the reader.
var eof = rune(0)

type InputScanner struct {
r *bufio.Reader
apostropne bool // apostropne is true means
}

func NewScanner(r io.Reader) *InputScanner {
return &InputScanner{r: bufio.NewReader(r), apostropne: false}
}

// scanner不断从输入流中读取数据,尝试拼接出一个个初步解析的token
func (s *InputScanner) Scan() (tok Token, lit string) {
ch := s.read()
var buf bytes.Buffer
state := STATE_INIT
for state != STATE_END {
if checkCharType(ch) == ILLEGAL_SYMBOL {
return T_ILLEGAL, string(ch)
}
// buf.WriteRune(ch)
switch state {
case STATE_INIT:
switch checkCharType(ch) {
case NUM:
buf.WriteRune(ch)
state = STATE_INTEGER
case CHAR:
buf.WriteRune(ch)
state = STATE_IDENT
case SPECIAL_SYMBOL:
switch ch {
case eof:
return T_EOF, ""
case '.':
return T_POINT, string(ch)
case '*':
return T_ASTERISK, string(ch)
case ',':
return T_COMMA, string(ch)
case '(':
return T_LEFT_PARENTHESIS, string(ch)
case ')':
return T_RIGHT_PARENTHESIS, string(ch)
case ';':
return T_SEMICOLON, string(ch)
case '=':
return T_EQUAL, string(ch)
case '<':
buf.WriteRune(ch)
state = STATE_ANGLE_LEFT
case '>':
buf.WriteRune(ch)
state = STATE_ANGLE_RIGHT
}
case SPACE:
case UNDERLINE:
return T_ILLEGAL, string(ch)
}
case STATE_INTEGER:
switch checkCharType(ch) {
case NUM:
buf.WriteRune(ch)
case CHAR, SPACE, UNDERLINE:
s.unread()
return T_INTEGER, buf.String()
case SPECIAL_SYMBOL:
if ch == '.' {
buf.WriteRune(ch)
state = STATE_POINT
} else {
s.unread()
return T_INTEGER, buf.String()
}
}
case STATE_POINT:
switch checkCharType(ch) {
case NUM:
buf.WriteRune(ch)
state = STATE_FRACTION
case CHAR, SPECIAL_SYMBOL, SPACE, UNDERLINE:
return T_ILLEGAL, string(ch)
}
case STATE_FRACTION:
switch checkCharType(ch) {
case NUM:
buf.WriteRune(ch)
case CHAR, SPECIAL_SYMBOL, SPACE, UNDERLINE:
s.unread()
return T_FLOAT, buf.String()
}
case STATE_IDENT:
switch checkCharType(ch) {
case NUM, CHAR, UNDERLINE:
buf.WriteRune(ch)
case SPECIAL_SYMBOL, SPACE:
s.unread()
return T_IDENT, buf.String()
}
case STATE_ANGLE_LEFT:
switch checkCharType(ch) {
case NUM, CHAR, SPACE:
s.unread()
return T_ANGLE_LEFT, buf.String()
case SPECIAL_SYMBOL:
// ch = s.read()
if ch == '=' {
return T_ANGLE_LEFT_EQUAL, "<="
} else if ch == '>' {
return T_NOT_EQUAL, "<>"
} else {
s.unread()
return T_ANGLE_LEFT, buf.String()
}
}
case STATE_ANGLE_RIGHT:
switch checkCharType(ch) {
case NUM, CHAR, SPACE:
s.unread()
return T_ANGLE_RIGHT, buf.String()
case SPECIAL_SYMBOL:
// ch = s.read()
if ch == '=' {
return T_ANGLE_RIGHT_EQUAL, ">="
} else {
s.unread()
return T_ANGLE_RIGHT, buf.String()
}
}
}
ch = s.read()
}

return T_ILLEGAL, string(ch)
}

// read reads the next rune from the buffered reader.
// Returns the rune(0) if an error occurs (or io.T_EOF is returned).
func (s *InputScanner) read() rune {
ch, _, err := s.r.ReadRune()
if err != nil {
return eof
}
return ch
}

// unread places the previously read rune back on the reader.
func (s *InputScanner) unread() { _ = s.r.UnreadRune() }

func checkCharType(ch rune) CharType {
if ch >= 'a' && ch <= 'z' || ch >= 'A' && ch <= 'Z' {
return CHAR
} else if ch >= '0' && ch <= '9' {
// fmt.Println("检测到数字")
return NUM
} else if ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r' {
return SPACE
} else if ch == '.' || ch == '*' || ch == ',' || ch == '(' || ch == ')' || ch == ';' || ch == '=' || ch == '<' || ch == '>' || ch == eof {
return SPECIAL_SYMBOL
} else if ch == '_' {
return UNDERLINE
} else {
return ILLEGAL_SYMBOL
}
}
9 changes: 9 additions & 0 deletions src/Interpreter/lexer/scanner_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
package lexer

import (
"testing"
)

func TestScan(t *testing.T) {
// ToDo:现在我是懒得写了
}
Loading