Skip to content

Commit 0aa0b22

Browse files
committed
Add support to lexer to support "not in" operator
1 parent 70c31ac commit 0aa0b22

File tree

4 files changed

+81
-37
lines changed

4 files changed

+81
-37
lines changed

parser2/lexer/lexer.go

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,8 @@ type lexer struct {
2828
input string
2929
state stateFn
3030
tokens []Token
31-
start, end int // Current position in input.
32-
width int // Last rune with.
31+
start, end int // current position in input
32+
width int // last rune with
3333
err *file.Error
3434
}
3535

@@ -91,6 +91,17 @@ func (l *lexer) acceptRun(valid string) {
9191
l.backup()
9292
}
9393

94+
func (l *lexer) acceptWord(word string) bool {
95+
pos := l.end
96+
for _, ch := range word {
97+
if l.next() != ch {
98+
l.end = pos
99+
return false
100+
}
101+
}
102+
return true
103+
}
104+
94105
func (l *lexer) error(format string, args ...interface{}) stateFn {
95106
if l.err == nil { // show first error
96107
l.err = &file.Error{

parser2/lexer/lexer_test.go

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,13 +20,15 @@ type lexErrorTest struct {
2020

2121
var lexTests = []lexTest{
2222
{
23-
"1 02 1e3 1.2e-4 1_000_000",
23+
".5 1 02 1e3 1.2e-4 1_000_000 _42",
2424
[]Token{
25+
{Kind: Number, Value: ".5"},
2526
{Kind: Number, Value: "1"},
2627
{Kind: Number, Value: "02"},
2728
{Kind: Number, Value: "1e3"},
2829
{Kind: Number, Value: "1.2e-4"},
2930
{Kind: Number, Value: "1_000_000"},
31+
{Kind: Identifier, Value: "_42"},
3032
{Kind: EOF},
3133
},
3234
},
@@ -44,18 +46,32 @@ var lexTests = []lexTest{
4446
},
4547
},
4648
{
47-
"a and orb",
49+
"a and orb().val",
4850
[]Token{
4951
{Kind: Identifier, Value: "a"},
5052
{Kind: Operator, Value: "and"},
5153
{Kind: Identifier, Value: "orb"},
54+
{Kind: Bracket, Value: "("},
55+
{Kind: Bracket, Value: ")"},
56+
{Kind: Operator, Value: "."},
57+
{Kind: Identifier, Value: "val"},
5258
{Kind: EOF},
5359
},
5460
},
5561
{
56-
`not in`,
62+
`not in not abc not i not(false) not in`,
5763
[]Token{
5864
{Kind: Operator, Value: "not in"},
65+
{Kind: Operator, Value: "not"},
66+
{Kind: Identifier, Value: "abc"},
67+
{Kind: Operator, Value: "not"},
68+
{Kind: Identifier, Value: "i"},
69+
{Kind: Operator, Value: "not"},
70+
{Kind: Bracket, Value: "("},
71+
{Kind: Identifier, Value: "false"},
72+
{Kind: Bracket, Value: ")"},
73+
{Kind: Operator, Value: "not"},
74+
{Kind: Operator, Value: "in"},
5975
{Kind: EOF},
6076
},
6177
},

parser2/lexer/state.go

Lines changed: 43 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -23,48 +23,52 @@ func root(l *lexer) stateFn {
2323
l.emitValue(String, str)
2424
case '0' <= r && r <= '9':
2525
l.backup()
26-
return lexNumber
26+
return number
2727
case strings.ContainsRune("([{", r):
2828
l.emit(Bracket)
2929
case strings.ContainsRune(")]}", r):
3030
l.emit(Bracket)
31-
case strings.ContainsRune(".,?:", r):
32-
l.emit(Punctuation)
33-
case strings.ContainsRune("!%&*+-/<=>^|~", r):
31+
case strings.ContainsRune(",?!:%#&*+-/<=>^|", r):
3432
l.backup()
35-
return lexOperator
33+
return operator
34+
case r == '.':
35+
l.backup()
36+
return dot
3637
case isAlphaNumeric(r):
3738
l.backup()
38-
return lexName
39+
return identifier
3940
default:
4041
return l.error("unrecognized character: %#U", r)
4142
}
4243
return root
4344
}
4445

45-
func lexNumber(l *lexer) stateFn {
46+
func number(l *lexer) stateFn {
4647
if !l.scanNumber() {
4748
return l.error("bad number syntax: %q", l.word())
4849
}
4950
l.emit(Number)
5051
return root
5152
}
5253

54+
const digits = "0123456789"
55+
5356
func (l *lexer) scanNumber() bool {
5457
// Is it hex?
55-
digits := "0123456789_"
56-
l.acceptRun(digits)
58+
l.accept(digits)
59+
l.acceptRun(digits + "_")
5760
if l.accept(".") {
5861
// Lookup for .. operator: if after dot there is another dot (1..2), it maybe a range operator.
5962
if l.peek() == '.' {
6063
l.backup()
6164
return true
6265
}
63-
l.acceptRun(digits)
66+
l.accept(digits)
67+
l.acceptRun(digits + "_")
6468
}
6569
if l.accept("eE") {
6670
l.accept("+-")
67-
l.acceptRun("0123456789_")
71+
l.acceptRun(digits)
6872
}
6973
// Next thing mustn't be alphanumeric.
7074
if isAlphaNumeric(l.peek()) {
@@ -74,37 +78,51 @@ func (l *lexer) scanNumber() bool {
7478
return true
7579
}
7680

77-
func lexOperator(l *lexer) stateFn {
81+
func operator(l *lexer) stateFn {
7882
l.next()
79-
l.accept("|&=*")
83+
l.accept(".|&=*")
8084
l.emit(Operator)
8185
return root
8286
}
8387

84-
func lexName(l *lexer) stateFn {
85-
Loop:
88+
func dot(l *lexer) stateFn {
89+
l.next()
90+
if l.accept(digits) {
91+
l.backup()
92+
return number
93+
}
94+
l.accept(".")
95+
l.emit(Operator)
96+
return root
97+
}
98+
99+
func identifier(l *lexer) stateFn {
100+
loop:
86101
for {
87102
switch r := l.next(); {
88103
case isAlphaNumeric(r):
89-
// absorb.
104+
// absorb
90105
default:
91106
l.backup()
92107
switch l.word() {
93108
case "not":
94-
l.emit(Operator)
95-
case "in":
96-
l.emit(Operator)
97-
case "or":
98-
l.emit(Operator)
99-
case "and":
100-
l.emit(Operator)
101-
case "matches":
109+
return not
110+
case "in", "or", "and", "matches", "contains", "startsWith", "endsWith":
102111
l.emit(Operator)
103112
default:
104113
l.emit(Identifier)
105114
}
106-
break Loop
115+
break loop
107116
}
108117
}
109118
return root
110119
}
120+
121+
func not(l *lexer) stateFn {
122+
if l.acceptWord(" in") {
123+
l.emit(Operator)
124+
} else {
125+
l.emit(Operator)
126+
}
127+
return root
128+
}

parser2/lexer/token.go

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,12 @@ import (
88
type Kind string
99

1010
const (
11-
Identifier Kind = "Identifier"
12-
Number = "Number"
13-
String = "String"
14-
Operator = "Operator"
15-
Bracket = "Bracket"
16-
Punctuation = "Punctuation"
17-
EOF = "EOF"
11+
Identifier Kind = "Identifier"
12+
Number = "Number"
13+
String = "String"
14+
Operator = "Operator"
15+
Bracket = "Bracket"
16+
EOF = "EOF"
1817
)
1918

2019
type Token struct {

0 commit comments

Comments
 (0)