Skip to content

Commit f68b1a8

Browse files
authored
parser: fix idents with uescape (#553)
1 parent fe36450 commit f68b1a8

File tree

7 files changed

+201
-56
lines changed

7 files changed

+201
-56
lines changed

crates/squawk_parser/src/grammar.rs

Lines changed: 51 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1485,11 +1485,7 @@ fn opt_name(p: &mut Parser<'_>) -> Option<CompletedMarker> {
14851485
return None;
14861486
}
14871487
let m = p.start();
1488-
if p.eat(IDENT) {
1489-
if p.eat(UESCAPE_KW) {
1490-
p.expect(STRING);
1491-
}
1492-
} else {
1488+
if !opt_ident(p) {
14931489
p.bump_any();
14941490
}
14951491
Some(m.complete(p, NAME))
@@ -1894,7 +1890,9 @@ fn name_ref_(p: &mut Parser<'_>) -> Option<CompletedMarker> {
18941890
INTERVAL_TYPE
18951891
}
18961892
_ => {
1897-
p.bump_any();
1893+
if !opt_ident(p) {
1894+
p.bump_any();
1895+
}
18981896
NAME_REF
18991897
}
19001898
};
@@ -3384,7 +3382,8 @@ fn opt_include_columns(p: &mut Parser<'_>) -> Option<CompletedMarker> {
33843382

33853383
// [ WITH ( storage_parameter [= value] [, ... ] ) ]
33863384
fn opt_with_params(p: &mut Parser<'_>) -> Option<CompletedMarker> {
3387-
if p.at(WITH_KW) {
3385+
// check for both in case someone forgot a semi after `create table`
3386+
if p.at(WITH_KW) && p.nth_at(1, L_PAREN) {
33883387
let m = p.start();
33893388
p.bump(WITH_KW);
33903389
p.expect(L_PAREN);
@@ -4082,7 +4081,9 @@ fn opt_compression_method(p: &mut Parser<'_>) -> Option<CompletedMarker> {
40824081
let m = p.start();
40834082
// [ COMPRESSION compression_method ]
40844083
if p.eat(COMPRESSION_KW) && (p.at(DEFAULT_KW) || p.at(IDENT)) {
4085-
p.bump_any();
4084+
if !opt_ident(p) && !p.eat(DEFAULT_KW) {
4085+
p.error("expected default or identifier");
4086+
}
40864087
Some(m.complete(p, COMPRESSION_METHOD))
40874088
} else {
40884089
m.abandon(p);
@@ -4094,7 +4095,9 @@ fn opt_storage(p: &mut Parser<'_>) -> Option<CompletedMarker> {
40944095
let m = p.start();
40954096
// [ STORAGE { PLAIN | EXTERNAL | EXTENDED | MAIN | DEFAULT } ]
40964097
if p.eat(STORAGE_KW) && (p.at(DEFAULT_KW) || p.at(EXTERNAL_KW) || p.at(IDENT)) {
4097-
p.bump_any();
4098+
if !opt_ident(p) {
4099+
p.bump_any();
4100+
}
40984101
Some(m.complete(p, STORAGE))
40994102
} else {
41004103
m.abandon(p);
@@ -4290,7 +4293,7 @@ fn window_definition(p: &mut Parser<'_>) -> Option<CompletedMarker> {
42904293
return None;
42914294
}
42924295
let m = p.start();
4293-
p.eat(IDENT);
4296+
opt_ident(p);
42944297
if p.eat(PARTITION_KW) {
42954298
p.expect(BY_KW);
42964299
if expr(p).is_none() {
@@ -4656,7 +4659,7 @@ fn part_elem(p: &mut Parser<'_>, allow_extra_params: bool) -> bool {
46564659
}
46574660
opt_collate(p);
46584661
// [ opclass ]
4659-
p.eat(IDENT);
4662+
opt_ident(p);
46604663
if allow_extra_params {
46614664
// [ ( opclass_parameter = value [, ... ] ) ]
46624665
if p.eat(L_PAREN) {
@@ -4716,10 +4719,10 @@ fn partition_option(p: &mut Parser<'_>) {
47164719
// FOR VALUES WITH (modulus 5, remainder 0)
47174720
if p.eat(WITH_KW) {
47184721
p.expect(L_PAREN);
4719-
p.expect(IDENT);
4722+
ident(p);
47204723
p.expect(INT_NUMBER);
47214724
p.expect(COMMA);
4722-
p.expect(IDENT);
4725+
ident(p);
47234726
p.expect(INT_NUMBER);
47244727
p.expect(R_PAREN);
47254728
// FOR VALUES IN '(' expr_list ')'
@@ -4951,7 +4954,9 @@ fn opt_partition_by(p: &mut Parser<'_>) -> Option<CompletedMarker> {
49514954
p.expect(BY_KW);
49524955
// name
49534956
if p.at_ts(TYPE_KEYWORDS) || p.at(IDENT) {
4954-
p.bump_any();
4957+
if !opt_ident(p) {
4958+
p.bump_any();
4959+
}
49554960
}
49564961
// (
49574962
// { column_name | ( expression ) }
@@ -8435,7 +8440,7 @@ fn create_event_trigger(p: &mut Parser<'_>) -> CompletedMarker {
84358440
p.expect(TRIGGER_KW);
84368441
name(p);
84378442
p.expect(ON_KW);
8438-
p.expect(IDENT);
8443+
ident(p);
84398444
if p.eat(WHEN_KW) {
84408445
event_trigger_when(p);
84418446
while !p.at(EOF) && p.eat(AND_KW) {
@@ -8860,7 +8865,7 @@ fn create_policy(p: &mut Parser<'_>) -> CompletedMarker {
88608865
p.expect(ON_KW);
88618866
path_name_ref(p);
88628867
if p.eat(AS_KW) {
8863-
p.expect(IDENT);
8868+
ident(p);
88648869
}
88658870
if p.eat(FOR_KW) {
88668871
let _ = p.eat(ALL_KW)
@@ -10148,7 +10153,7 @@ fn explain_option(p: &mut Parser<'_>) {
1014810153
return;
1014910154
}
1015010155
// { TEXT | XML | JSON | YAML }
10151-
if p.eat(TEXT_KW) || p.eat(XML_KW) || p.eat(JSON_KW) || p.eat(IDENT) {
10156+
if p.eat(TEXT_KW) || p.eat(XML_KW) || p.eat(JSON_KW) || opt_ident(p) {
1015210157
return;
1015310158
}
1015410159
}
@@ -10954,7 +10959,7 @@ fn set_role(p: &mut Parser<'_>) -> CompletedMarker {
1095410959
p.bump(SET_KW);
1095510960
let _ = p.eat(SESSION_KW) || p.eat(LOCAL_KW);
1095610961
p.expect(ROLE_KW);
10957-
if !p.eat(NONE_KW) && !p.eat(IDENT) && opt_string_literal(p).is_none() {
10962+
if !p.eat(NONE_KW) && !opt_ident(p) && opt_string_literal(p).is_none() {
1095810963
p.error("expected NONE or role_name");
1095910964
}
1096010965
}
@@ -12161,7 +12166,7 @@ fn insert(p: &mut Parser<'_>, m: Option<Marker>) -> CompletedMarker {
1216112166
}
1216212167
opt_collate(p);
1216312168
// [ opclass ]
12164-
p.eat(IDENT);
12169+
opt_ident(p);
1216512170
// [, ...]
1216612171
if !p.eat(COMMA) {
1216712172
break;
@@ -12336,11 +12341,7 @@ fn delete(p: &mut Parser<'_>, m: Option<Marker>) -> CompletedMarker {
1233612341
p.bump(DELETE_KW);
1233712342
p.expect(FROM_KW);
1233812343
relation_name(p);
12339-
if p.eat(AS_KW) {
12340-
p.expect(IDENT);
12341-
} else {
12342-
p.eat(IDENT);
12343-
}
12344+
opt_as_alias(p);
1234412345
{
1234512346
let m = p.start();
1234612347
if p.eat(USING_KW) {
@@ -12371,7 +12372,7 @@ fn opt_where_current_of(p: &mut Parser<'_>) {
1237112372
if p.eat(WHERE_KW) {
1237212373
if p.eat(CURRENT_KW) {
1237312374
p.expect(OF_KW);
12374-
p.expect(IDENT);
12375+
ident(p);
1237512376
}
1237612377
}
1237712378
}
@@ -12746,7 +12747,9 @@ fn opt_function_option(p: &mut Parser<'_>) -> bool {
1274612747
// string for language is deprecated but let's support it
1274712748
if opt_string_literal(p).is_none() {
1274812749
if p.at_ts(UNRESERVED_KEYWORDS) || p.at(IDENT) {
12749-
p.bump_any();
12750+
if !opt_ident(p) {
12751+
p.bump_any();
12752+
}
1275012753
} else {
1275112754
p.error(format!("expected a language name, got {:?}", p.current()));
1275212755
}
@@ -12817,7 +12820,7 @@ fn opt_function_option(p: &mut Parser<'_>) -> bool {
1281712820
// PARALLEL { UNSAFE | RESTRICTED | SAFE }
1281812821
PARALLEL_KW => {
1281912822
p.bump(PARALLEL_KW);
12820-
p.expect(IDENT);
12823+
ident(p);
1282112824
PARALLEL_FUNC_OPTION
1282212825
}
1282312826
// COST execution_cost
@@ -13157,17 +13160,34 @@ fn create_extension(p: &mut Parser<'_>) -> CompletedMarker {
1315713160
name(p);
1315813161
p.eat(WITH_KW);
1315913162
if p.eat(SCHEMA_KW) {
13160-
p.expect(IDENT);
13163+
name_ref(p);
1316113164
}
1316213165
if p.eat(VERSION_KW) {
13163-
if opt_string_literal(p).is_none() && !p.eat(IDENT) {
13166+
if opt_string_literal(p).is_none() && !opt_ident(p) {
1316413167
p.error("expected string literal or IDENT");
1316513168
}
1316613169
}
1316713170
p.eat(CASCADE_KW);
1316813171
m.complete(p, CREATE_EXTENSION)
1316913172
}
1317013173

13174+
fn opt_ident(p: &mut Parser<'_>) -> bool {
13175+
if p.eat(IDENT) {
13176+
if p.eat(UESCAPE_KW) {
13177+
p.expect(STRING);
13178+
}
13179+
true
13180+
} else {
13181+
false
13182+
}
13183+
}
13184+
13185+
fn ident(p: &mut Parser<'_>) {
13186+
if !opt_ident(p) {
13187+
p.error("expected identifier");
13188+
}
13189+
}
13190+
1317113191
// { value | 'value' | DEFAULT }
1317213192
// where value can be specified as string constants, identifiers, numbers, or
1317313193
// comma-separated lists of these
@@ -13180,7 +13200,7 @@ fn config_value(p: &mut Parser<'_>) -> bool {
1318013200
while !p.at(EOF) {
1318113201
if opt_string_literal(p).is_none()
1318213202
&& opt_numeric_literal(p).is_none()
13183-
&& !p.eat(IDENT)
13203+
&& !opt_ident(p)
1318413204
&& !opt_bool_literal(p)
1318513205
{
1318613206
if p.at_ts(BARE_LABEL_KEYWORDS) {
@@ -13838,11 +13858,7 @@ fn opt_alter_table_action(p: &mut Parser<'_>) -> Option<SyntaxKind> {
1383813858
fn opt_col_label(p: &mut Parser<'_>) -> bool {
1383913859
if p.at_ts(COL_LABEL_FIRST) {
1384013860
let m = p.start();
13841-
if p.eat(IDENT) {
13842-
if p.eat(UESCAPE_KW) {
13843-
p.expect(STRING);
13844-
}
13845-
} else {
13861+
if !opt_ident(p) {
1384613862
p.bump_any();
1384713863
}
1384813864
m.complete(p, NAME);

crates/squawk_parser/tests/data/err/create_table.sql

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,3 +45,13 @@ create unlogged table t (
4545
cycle
4646
)
4747
);
48+
49+
create table z (
50+
a int
51+
)
52+
-- ^ missing semi
53+
54+
with t as (
55+
select 1
56+
)
57+
select * from t;

crates/squawk_parser/tests/data/ok/select.sql

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -498,6 +498,10 @@ WHERE ts >= DATE '2023-12-21' AND ts < DATE '2023-12-22'
498498
GROUP BY sensor_id, DATE_TRUNC('day', ts)
499499
ORDER BY sensor_id, day;
500500

501+
-- select with uescape;
502+
select U&"d!0061t!+000061" UESCAPE '!';
503+
SELECT U&' \' UESCAPE '!';
504+
501505
-- select_from_user_table
502506
select * from user;
503507

crates/squawk_parser/tests/snapshots/tests__create_ext_ok.snap

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,8 @@ SOURCE_FILE
4040
WHITESPACE " "
4141
SCHEMA_KW "schema"
4242
WHITESPACE " "
43-
IDENT "addons"
43+
NAME_REF
44+
IDENT "addons"
4445
SEMICOLON ";"
4546
WHITESPACE "\n\n"
4647
CREATE_EXTENSION
@@ -55,7 +56,8 @@ SOURCE_FILE
5556
WHITESPACE " "
5657
SCHEMA_KW "schema"
5758
WHITESPACE " "
58-
IDENT "bar"
59+
NAME_REF
60+
IDENT "bar"
5961
WHITESPACE "\n "
6062
VERSION_KW "version"
6163
WHITESPACE " "
@@ -74,7 +76,8 @@ SOURCE_FILE
7476
WHITESPACE "\n "
7577
SCHEMA_KW "schema"
7678
WHITESPACE " "
77-
IDENT "bar"
79+
NAME_REF
80+
IDENT "bar"
7881
WHITESPACE "\n "
7982
VERSION_KW "version"
8083
WHITESPACE " "

crates/squawk_parser/tests/snapshots/tests__create_table_err.snap

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -406,8 +406,74 @@ SOURCE_FILE
406406
WHITESPACE "\n"
407407
R_PAREN ")"
408408
SEMICOLON ";"
409+
WHITESPACE "\n\n"
410+
CREATE_TABLE
411+
CREATE_KW "create"
412+
WHITESPACE " "
413+
TABLE_KW "table"
414+
WHITESPACE " "
415+
PATH
416+
PATH_SEGMENT
417+
NAME
418+
IDENT "z"
419+
WHITESPACE " "
420+
TABLE_ARG_LIST
421+
L_PAREN "("
422+
WHITESPACE "\n "
423+
COLUMN
424+
NAME
425+
IDENT "a"
426+
WHITESPACE " "
427+
PATH_TYPE
428+
PATH
429+
PATH_SEGMENT
430+
NAME_REF
431+
INT_KW "int"
432+
WHITESPACE "\n"
433+
R_PAREN ")"
434+
WHITESPACE "\n"
435+
COMMENT "-- ^ missing semi"
436+
WHITESPACE "\n\n"
437+
SELECT
438+
WITH_CLAUSE
439+
WITH_KW "with"
440+
WHITESPACE " "
441+
WITH_TABLE
442+
NAME
443+
IDENT "t"
444+
WHITESPACE " "
445+
AS_KW "as"
446+
WHITESPACE " "
447+
L_PAREN "("
448+
WHITESPACE "\n "
449+
SELECT
450+
SELECT_CLAUSE
451+
SELECT_KW "select"
452+
WHITESPACE " "
453+
TARGET_LIST
454+
TARGET
455+
LITERAL
456+
INT_NUMBER "1"
457+
WHITESPACE "\n"
458+
R_PAREN ")"
459+
WHITESPACE "\n"
460+
SELECT_CLAUSE
461+
SELECT_KW "select"
462+
WHITESPACE " "
463+
TARGET_LIST
464+
TARGET
465+
STAR "*"
466+
WHITESPACE " "
467+
FROM_CLAUSE
468+
FROM_KW "from"
469+
WHITESPACE " "
470+
FROM_ITEM
471+
NAME_REF
472+
IDENT "t"
473+
SEMICOLON ";"
409474
WHITESPACE "\n"
410475
---
411476
ERROR@39: expected path name
412477
ERROR@143: unexpected trailing comma
413478
ERROR@201: unexpected trailing comma
479+
ERROR@947: expected SEMICOLON

0 commit comments

Comments
 (0)