Skip to content

Commit 4057101

Browse files
authored
Fix string escape sequences in lexer (#209)
1 parent 60759d4 commit 4057101

File tree

5 files changed

+847
-14
lines changed

5 files changed

+847
-14
lines changed

parser/lexer.go

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -249,10 +249,28 @@ func (l *Lexer) consumeMultiLineComment() {
249249
func (l *Lexer) consumeString() error {
250250
i := 1
251251
endChar := byte('\'')
252-
for l.peekOk(i) && l.peekN(i) != endChar {
252+
for l.peekOk(i) {
253+
c := l.peekN(i)
254+
// backslash escape
255+
if c == '\\' {
256+
i++
257+
if l.peekOk(i) {
258+
i++
259+
}
260+
continue
261+
}
262+
// single quote
263+
if c == endChar {
264+
// double single quote ''
265+
if l.peekOk(i+1) && l.peekN(i+1) == endChar {
266+
i += 2
267+
continue
268+
}
269+
break
270+
}
253271
i++
254272
}
255-
if !l.peekOk(i) {
273+
if !l.peekOk(i) || l.peekN(i) != endChar {
256274
return errors.New("invalid string")
257275
}
258276
l.lastToken = &Token{

parser/lexer_test.go

Lines changed: 70 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -32,18 +32,76 @@ func TestConsumeComment(t *testing.T) {
3232
}
3333

3434
func TestConsumeString(t *testing.T) {
35-
strs := []string{
36-
"'hello world'",
37-
"'123'",
38-
}
39-
for _, s := range strs {
40-
lexer := NewLexer(s)
41-
err := lexer.consumeToken()
42-
require.NoError(t, err)
43-
require.Equal(t, TokenKindString, lexer.lastToken.Kind)
44-
require.Equal(t, strings.Trim(s, "'"), lexer.lastToken.String)
45-
require.True(t, lexer.isEOF())
46-
}
35+
t.Run("Simple strings", func(t *testing.T) {
36+
strs := []string{
37+
"'hello world'",
38+
"'123'",
39+
}
40+
for _, s := range strs {
41+
lexer := NewLexer(s)
42+
err := lexer.consumeToken()
43+
require.NoError(t, err)
44+
require.Equal(t, TokenKindString, lexer.lastToken.Kind)
45+
require.Equal(t, strings.Trim(s, "'"), lexer.lastToken.String)
46+
require.True(t, lexer.isEOF())
47+
}
48+
})
49+
50+
t.Run("Strings with backslash-escaped quotes", func(t *testing.T) {
51+
testCases := []struct {
52+
input string
53+
expected string
54+
}{
55+
{`'hello\'world'`, `hello\'world`},
56+
{`'test\''`, `test\'`},
57+
{`'\'abc\''`, `\'abc\'`},
58+
}
59+
for _, tc := range testCases {
60+
lexer := NewLexer(tc.input)
61+
err := lexer.consumeToken()
62+
require.NoError(t, err, "Failed to parse: %s", tc.input)
63+
require.Equal(t, TokenKindString, lexer.lastToken.Kind)
64+
require.Equal(t, tc.expected, lexer.lastToken.String)
65+
require.True(t, lexer.isEOF())
66+
}
67+
})
68+
69+
t.Run("Strings with double single quotes", func(t *testing.T) {
70+
testCases := []struct {
71+
input string
72+
expected string
73+
}{
74+
{`'hello''world'`, `hello''world`},
75+
{`'test''123'`, `test''123`},
76+
{`'abc''def''ghi'`, `abc''def''ghi`},
77+
}
78+
for _, tc := range testCases {
79+
lexer := NewLexer(tc.input)
80+
err := lexer.consumeToken()
81+
require.NoError(t, err, "Failed to parse: %s", tc.input)
82+
require.Equal(t, TokenKindString, lexer.lastToken.Kind)
83+
require.Equal(t, tc.expected, lexer.lastToken.String)
84+
require.True(t, lexer.isEOF())
85+
}
86+
})
87+
88+
t.Run("Strings with backslash-escaped backslashes", func(t *testing.T) {
89+
testCases := []struct {
90+
input string
91+
expected string
92+
}{
93+
{`'a\\b'`, `a\\b`},
94+
{`'test\\123'`, `test\\123`},
95+
}
96+
for _, tc := range testCases {
97+
lexer := NewLexer(tc.input)
98+
err := lexer.consumeToken()
99+
require.NoError(t, err, "Failed to parse: %s", tc.input)
100+
require.Equal(t, TokenKindString, lexer.lastToken.Kind)
101+
require.Equal(t, tc.expected, lexer.lastToken.String)
102+
require.True(t, lexer.isEOF())
103+
}
104+
})
47105
}
48106

49107
func TestConsumeNumber(t *testing.T) {
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
-- Origin SQL:
2+
SELECT * FROM test_table SETTINGS additional_table_filters={'test_table': 'status = 1'};
3+
4+
SELECT * FROM test_table SETTINGS additional_table_filters={'test_table': 'value = \'test\''};
5+
6+
SELECT * FROM test_table SETTINGS additional_table_filters={'test_table': 'value = ''test'''};
7+
8+
SELECT * FROM test_table
9+
SETTINGS additional_table_filters={'test_table': 'id IN (\'a\', \'b\') AND status = \'active\''}
10+
FORMAT JSON;
11+
12+
SELECT number, x, y FROM (SELECT number FROM system.numbers LIMIT 5) f
13+
ANY LEFT JOIN (SELECT x, y FROM table_1) s ON f.number = s.x
14+
SETTINGS additional_table_filters={'system.numbers':'number != 3', 'table_1':'x != 2'};
15+
16+
17+
-- Format SQL:
18+
SELECT * FROM test_table SETTINGS additional_table_filters={'test_table': 'status = 1'};
19+
SELECT * FROM test_table SETTINGS additional_table_filters={'test_table': 'value = \'test\''};
20+
SELECT * FROM test_table SETTINGS additional_table_filters={'test_table': 'value = ''test'''};
21+
SELECT * FROM test_table SETTINGS additional_table_filters={'test_table': 'id IN (\'a\', \'b\') AND status = \'active\''} FORMAT JSON;
22+
SELECT number, x, y FROM (SELECT number FROM system.numbers LIMIT 5) AS f ANY LEFT JOIN (SELECT x, y FROM table_1) AS s ON f.number = s.x SETTINGS additional_table_filters={'system.numbers': 'number != 3', 'table_1': 'x != 2'};

0 commit comments

Comments
 (0)