Skip to content

Commit 061710b

Browse files
authored
parser: fix unicode escaped quoted idents (#531)
`U&"d\0061t\+000061"` was lexing as a string instead of an ident!
1 parent a85ba23 commit 061710b

16 files changed

+184
-396
lines changed

crates/squawk_lexer/src/lib.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -222,8 +222,7 @@ impl Cursor<'_> {
222222
'"' if allows_double => {
223223
self.bump();
224224
let terminated = self.double_quoted_string();
225-
let kind = mk_kind(terminated);
226-
TokenKind::Literal { kind }
225+
TokenKind::QuotedIdent { terminated }
227226
}
228227
_ => self.ident_or_unknown_prefix(),
229228
}

crates/squawk_lexer/src/snapshots/squawk_lexer__tests__string_unicode_escape.snap

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,13 @@ expression: "lex(r#\"\nU&\"d\\0061t\\+000061\"\n\nU&\"\\0441\\043B\\043E\\043D\"
44
---
55
[
66
"\n" @ Whitespace,
7-
"U&\"d\\0061t\\+000061\"" @ Literal { kind: UnicodeEscStr { terminated: true } },
7+
"U&\"d\\0061t\\+000061\"" @ QuotedIdent { terminated: true },
88
"\n\n" @ Whitespace,
9-
"U&\"\\0441\\043B\\043E\\043D\"" @ Literal { kind: UnicodeEscStr { terminated: true } },
9+
"U&\"\\0441\\043B\\043E\\043D\"" @ QuotedIdent { terminated: true },
1010
"\n\n" @ Whitespace,
1111
"u&'\\0441\\043B'" @ Literal { kind: UnicodeEscStr { terminated: true } },
1212
"\n\n" @ Whitespace,
13-
"U&\"d!0061t!+000061\"" @ Literal { kind: UnicodeEscStr { terminated: true } },
13+
"U&\"d!0061t!+000061\"" @ QuotedIdent { terminated: true },
1414
" " @ Whitespace,
1515
"UESCAPE" @ Ident,
1616
" " @ Whitespace,

crates/squawk_parser/src/grammar.rs

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1845,7 +1845,8 @@ fn name_ref_(p: &mut Parser<'_>) -> Option<CompletedMarker> {
18451845
NAME_REF
18461846
};
18471847
let cm = m.complete(p, if p.at(STRING) { kind } else { NAME_REF });
1848-
// A path followed by a string is a type cast so we insert a CAST_EXPR
1848+
1849+
// A type name followed by a string is a type cast so we insert a CAST_EXPR
18491850
// preceding it to wrap the previously parsed data.
18501851
// e.g., `select numeric '12312'`
18511852
if opt_string_literal(p).is_some() {
@@ -2808,7 +2809,6 @@ fn data_source(p: &mut Parser<'_>) {
28082809
opt_alias(p);
28092810
}
28102811
_ if p.at_ts(FROM_ITEM_KEYWORDS_FIRST) => from_item_name(p),
2811-
28122812
_ => {
28132813
p.error("expected table reference");
28142814
}
@@ -11308,7 +11308,7 @@ fn vacuum(p: &mut Parser<'_>) -> CompletedMarker {
1130811308
// table_name [ ( column_name [, ...] ) ]
1130911309
fn opt_relation_list(p: &mut Parser<'_>) {
1131011310
while !p.at(EOF) {
11311-
if opt_path_name_ref(p).is_none() {
11311+
if opt_relation_name(p).is_none() {
1131211312
break;
1131311313
}
1131411314
opt_column_list(p);
@@ -12990,6 +12990,12 @@ const NON_RESERVED_WORD: TokenSet = TokenSet::new(&[IDENT])
1299012990
.union(TYPE_FUNC_NAME_KEYWORDS);
1299112991

1299212992
fn relation_name(p: &mut Parser<'_>) {
12993+
if opt_relation_name(p).is_none() {
12994+
p.error("expected relation name");
12995+
}
12996+
}
12997+
12998+
fn opt_relation_name(p: &mut Parser<'_>) -> Option<CompletedMarker> {
1299312999
let m = p.start();
1299413000
if p.eat(ONLY_KW) {
1299513001
let trailing_paren = p.eat(L_PAREN);
@@ -12999,10 +13005,13 @@ fn relation_name(p: &mut Parser<'_>) {
1299913005
p.expect(R_PAREN);
1300013006
}
1300113007
} else {
13002-
path_name_ref(p);
13008+
if opt_path_name_ref(p).is_none() {
13009+
m.abandon(p);
13010+
return None;
13011+
};
1300313012
p.eat(STAR);
1300413013
}
13005-
m.complete(p, RELATION_NAME);
13014+
Some(m.complete(p, RELATION_NAME))
1300613015
}
1300713016

1300813017
// ALTER TABLE [ IF EXISTS ] [ ONLY ] name [ * ]

crates/squawk_parser/tests/data/regression_suite/strings.sql

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,11 @@ SELECT 'first line'
1010
' - third line'
1111
AS "Three lines to one";
1212

13-
-- illegal string continuation syntax
14-
SELECT 'first line'
15-
' - next line' /* this comment is not allowed here */
16-
' - third line'
17-
AS "Illegal comment within continuation";
13+
-- -- illegal string continuation syntax
14+
-- SELECT 'first line'
15+
-- ' - next line' /* this comment is not allowed here */
16+
-- ' - third line'
17+
-- AS "Illegal comment within continuation";
1818

1919
-- Unicode escapes
2020
SET standard_conforming_strings TO on;
@@ -812,16 +812,16 @@ select 'a\bcd' as f1, 'a\b''cd' as f2, 'a\b''''cd' as f3, 'abcd\' as f4, 'ab\'
812812
813813
set standard_conforming_strings = off;
814814
815-
select 'a\\bcd' as f1, 'a\\b\'cd' as f2, 'a\\b\'''cd' as f3, 'abcd\\' as f4, 'ab\\\'cd' as f5, '\\\\' as f6;
815+
-- select 'a\\bcd' as f1, 'a\\b\'cd' as f2, 'a\\b\'''cd' as f3, 'abcd\\' as f4, 'ab\\\'cd' as f5, '\\\\' as f6;
816816

817817
set escape_string_warning = off;
818818
set standard_conforming_strings = on;
819819

820-
select 'a\bcd' as f1, 'a\b''cd' as f2, 'a\b''''cd' as f3, 'abcd\' as f4, 'ab\''cd' as f5, '\\' as f6;
820+
-- select 'a\bcd' as f1, 'a\b''cd' as f2, 'a\b''''cd' as f3, 'abcd\' as f4, 'ab\''cd' as f5, '\\' as f6;
821821

822822
set standard_conforming_strings = off;
823823

824-
select 'a\\bcd' as f1, 'a\\b\'cd' as f2, 'a\\b\'''cd' as f3, 'abcd\\' as f4, 'ab\\\'cd' as f5, '\\\\' as f6;
824+
-- select 'a\\bcd' as f1, 'a\\b\'cd' as f2, 'a\\b\'''cd' as f3, 'abcd\\' as f4, 'ab\\\'cd' as f5, '\\\\' as f6;
825825

826826
reset standard_conforming_strings;
827827

crates/squawk_parser/tests/data/regression_suite/triggers.sql

Lines changed: 35 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -243,24 +243,24 @@ UPDATE some_t SET some_col = TRUE;
243243
DROP TABLE some_t;
244244

245245
-- bogus cases
246-
CREATE TRIGGER error_upd_and_col BEFORE UPDATE OR UPDATE OF a ON main_table
247-
FOR EACH ROW EXECUTE PROCEDURE trigger_func('error_upd_and_col');
248-
CREATE TRIGGER error_upd_a_a BEFORE UPDATE OF a, a ON main_table
249-
FOR EACH ROW EXECUTE PROCEDURE trigger_func('error_upd_a_a');
250-
CREATE TRIGGER error_ins_a BEFORE INSERT OF a ON main_table
251-
FOR EACH ROW EXECUTE PROCEDURE trigger_func('error_ins_a');
252-
CREATE TRIGGER error_ins_when BEFORE INSERT OR UPDATE ON main_table
253-
FOR EACH ROW WHEN (OLD.a <> NEW.a)
254-
EXECUTE PROCEDURE trigger_func('error_ins_old');
255-
CREATE TRIGGER error_del_when BEFORE DELETE OR UPDATE ON main_table
256-
FOR EACH ROW WHEN (OLD.a <> NEW.a)
257-
EXECUTE PROCEDURE trigger_func('error_del_new');
258-
CREATE TRIGGER error_del_when BEFORE INSERT OR UPDATE ON main_table
259-
FOR EACH ROW WHEN (NEW.tableoid <> 0)
260-
EXECUTE PROCEDURE trigger_func('error_when_sys_column');
261-
CREATE TRIGGER error_stmt_when BEFORE UPDATE OF a ON main_table
262-
FOR EACH STATEMENT WHEN (OLD.* IS DISTINCT FROM NEW.*)
263-
EXECUTE PROCEDURE trigger_func('error_stmt_when');
246+
-- CREATE TRIGGER error_upd_and_col BEFORE UPDATE OR UPDATE OF a ON main_table
247+
-- FOR EACH ROW EXECUTE PROCEDURE trigger_func('error_upd_and_col');
248+
-- CREATE TRIGGER error_upd_a_a BEFORE UPDATE OF a, a ON main_table
249+
-- FOR EACH ROW EXECUTE PROCEDURE trigger_func('error_upd_a_a');
250+
-- CREATE TRIGGER error_ins_a BEFORE INSERT OF a ON main_table
251+
-- FOR EACH ROW EXECUTE PROCEDURE trigger_func('error_ins_a');
252+
-- CREATE TRIGGER error_ins_when BEFORE INSERT OR UPDATE ON main_table
253+
-- FOR EACH ROW WHEN (OLD.a <> NEW.a)
254+
-- EXECUTE PROCEDURE trigger_func('error_ins_old');
255+
-- CREATE TRIGGER error_del_when BEFORE DELETE OR UPDATE ON main_table
256+
-- FOR EACH ROW WHEN (OLD.a <> NEW.a)
257+
-- EXECUTE PROCEDURE trigger_func('error_del_new');
258+
-- CREATE TRIGGER error_del_when BEFORE INSERT OR UPDATE ON main_table
259+
-- FOR EACH ROW WHEN (NEW.tableoid <> 0)
260+
-- EXECUTE PROCEDURE trigger_func('error_when_sys_column');
261+
-- CREATE TRIGGER error_stmt_when BEFORE UPDATE OF a ON main_table
262+
-- FOR EACH STATEMENT WHEN (OLD.* IS DISTINCT FROM NEW.*)
263+
-- EXECUTE PROCEDURE trigger_func('error_stmt_when');
264264

265265
-- check dependency restrictions
266266
ALTER TABLE main_table DROP COLUMN b;
@@ -1335,12 +1335,12 @@ delete from parted_stmt_trig;
13351335

13361336
-- insert via copy on the parent
13371337
copy parted_stmt_trig(a) from stdin;
1338-
1
1339-
2
1338+
-- 1
1339+
-- 2
13401340

13411341
-- insert via copy on the first partition
13421342
copy parted_stmt_trig1(a) from stdin;
1343-
1
1343+
-- 1
13441344

13451345
-- Disabling a trigger in the parent table should disable children triggers too
13461346
alter table parted_stmt_trig disable trigger trig_ins_after_parent;
@@ -1882,9 +1882,9 @@ delete from child3;
18821882

18831883
-- copy into parent sees parent-format tuples
18841884
copy parent (a, b) from stdin;
1885-
AAA 42
1886-
BBB 42
1887-
CCC 42
1885+
-- AAA 42
1886+
-- BBB 42
1887+
-- CCC 42
18881888

18891889
-- DML affecting parent sees tuples collected from children even if
18901890
-- there is no transition table trigger on the children
@@ -1902,9 +1902,9 @@ delete from parent;
19021902
-- copy into parent sees tuples collected from children even if there
19031903
-- is no transition-table trigger on the children
19041904
copy parent (a, b) from stdin;
1905-
AAA 42
1906-
BBB 42
1907-
CCC 42
1905+
-- AAA 42
1906+
-- BBB 42
1907+
-- CCC 42
19081908

19091909
-- insert into parent with a before trigger on a child tuple before
19101910
-- insertion, and we capture the newly modified row in parent format
@@ -1926,9 +1926,9 @@ insert into parent values ('AAA', 42), ('BBB', 42), ('CCC', 66);
19261926

19271927
-- copy, parent trigger sees post-modification parent-format tuple
19281928
copy parent (a, b) from stdin;
1929-
AAA 42
1930-
BBB 42
1931-
CCC 234
1929+
-- AAA 42
1930+
-- BBB 42
1931+
-- CCC 234
19321932

19331933
drop table child1, child2, child3, parent;
19341934
drop function intercept_insert();
@@ -2091,15 +2091,15 @@ delete from child3;
20912091
-- copy into parent sees parent-format tuples (no rerouting, so these
20922092
-- are really inserted into the parent)
20932093
copy parent (a, b) from stdin;
2094-
AAA 42
2095-
BBB 42
2096-
CCC 42
2094+
-- AAA 42
2095+
-- BBB 42
2096+
-- CCC 42
20972097

20982098
-- same behavior for copy if there is an index (interesting because rows are
20992099
-- captured by a different code path in copyfrom.c if there are indexes)
21002100
create index on parent(b);
21012101
copy parent (a, b) from stdin;
2102-
DDD 42
2102+
-- DDD 42
21032103

21042104
-- DML affecting parent sees tuples collected from children even if
21052105
-- there is no transition table trigger on the children
@@ -2628,7 +2628,7 @@ select tgrelid::regclass, tgname,
26282628
(select tgname from pg_trigger tr where tr.oid = pg_trigger.tgparentid) parent_tgname
26292629
from pg_trigger where tgrelid in (select relid from pg_partition_tree('grandparent'))
26302630
order by tgname, tgrelid::regclass::text COLLATE "C";
2631-
alter trigger a on only grandparent rename to b; -- ONLY not supported
2631+
-- alter trigger a on only grandparent rename to b; -- ONLY not supported
26322632
alter trigger b on middle rename to c; -- can't rename trigger on partition
26332633
create trigger c after insert on middle
26342634
for each row execute procedure f();

crates/squawk_parser/tests/data/regression_suite/tsearch.sql

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -645,12 +645,12 @@ SELECT ts_headline('english',
645645
--Rewrite sub system
646646

647647
CREATE TABLE test_tsquery (txtkeyword TEXT, txtsample TEXT);
648-
'New York' new <-> york | big <-> apple | nyc
649-
Moscow moskva | moscow
650-
'Sanct Peter' Peterburg | peter | 'Sanct Peterburg'
651-
foo & bar & qq foo & (bar | qq) & city
652-
1 & (2 <-> 3) 2 <-> 4
653-
5 <-> 6 5 <-> 7
648+
-- 'New York' new <-> york | big <-> apple | nyc
649+
-- Moscow moskva | moscow
650+
-- 'Sanct Peter' Peterburg | peter | 'Sanct Peterburg'
651+
-- foo & bar & qq foo & (bar | qq) & city
652+
-- 1 & (2 <-> 3) 2 <-> 4
653+
-- 5 <-> 6 5 <-> 7
654654

655655
ALTER TABLE test_tsquery ADD COLUMN keyword tsquery;
656656
UPDATE test_tsquery SET keyword = to_tsquery('english', txtkeyword);

crates/squawk_parser/tests/snapshots/tests__analyze_ok.snap

Lines changed: 26 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -20,26 +20,28 @@ SOURCE_FILE
2020
WHITESPACE " "
2121
VERBOSE_KW "verbose"
2222
WHITESPACE " "
23-
PATH
23+
RELATION_NAME
2424
PATH
25+
PATH
26+
PATH_SEGMENT
27+
NAME_REF
28+
IDENT "foo"
29+
DOT "."
2530
PATH_SEGMENT
2631
NAME_REF
27-
IDENT "foo"
28-
DOT "."
29-
PATH_SEGMENT
30-
NAME_REF
31-
IDENT "bar"
32+
IDENT "bar"
3233
COMMA ","
3334
WHITESPACE " "
34-
PATH
35+
RELATION_NAME
3536
PATH
37+
PATH
38+
PATH_SEGMENT
39+
NAME_REF
40+
IDENT "foo"
41+
DOT "."
3642
PATH_SEGMENT
3743
NAME_REF
38-
IDENT "foo"
39-
DOT "."
40-
PATH_SEGMENT
41-
NAME_REF
42-
IDENT "bar"
44+
IDENT "bar"
4345
COLUMN_LIST
4446
L_PAREN "("
4547
COLUMN
@@ -58,10 +60,11 @@ SOURCE_FILE
5860
R_PAREN ")"
5961
COMMA ","
6062
WHITESPACE " "
61-
PATH
62-
PATH_SEGMENT
63-
NAME_REF
64-
IDENT "foo"
63+
RELATION_NAME
64+
PATH
65+
PATH_SEGMENT
66+
NAME_REF
67+
IDENT "foo"
6568
SEMICOLON ";"
6669
WHITESPACE "\n\n"
6770
COMMENT "-- full_parens"
@@ -85,15 +88,16 @@ SOURCE_FILE
8588
INT_NUMBER "10"
8689
R_PAREN ")"
8790
WHITESPACE " "
88-
PATH
91+
RELATION_NAME
8992
PATH
93+
PATH
94+
PATH_SEGMENT
95+
NAME_REF
96+
IDENT "foo"
97+
DOT "."
9098
PATH_SEGMENT
9199
NAME_REF
92-
IDENT "foo"
93-
DOT "."
94-
PATH_SEGMENT
95-
NAME_REF
96-
IDENT "bar"
100+
IDENT "bar"
97101
COLUMN_LIST
98102
L_PAREN "("
99103
COLUMN

crates/squawk_parser/tests/snapshots/tests__misc_ok.snap

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1819,15 +1819,16 @@ SOURCE_FILE
18191819
WHITESPACE " "
18201820
ANALYZE_KW "ANALYZE"
18211821
WHITESPACE " "
1822-
PATH
1822+
RELATION_NAME
18231823
PATH
1824+
PATH
1825+
PATH_SEGMENT
1826+
NAME_REF
1827+
IDENT "partman_test"
1828+
DOT "."
18241829
PATH_SEGMENT
18251830
NAME_REF
1826-
IDENT "partman_test"
1827-
DOT "."
1828-
PATH_SEGMENT
1829-
NAME_REF
1830-
IDENT "time_taptest_table"
1831+
IDENT "time_taptest_table"
18311832
SEMICOLON ";"
18321833
WHITESPACE "\n\n"
18331834
SELECT

crates/squawk_parser/tests/snapshots/tests__regression_errors.snap

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
source: crates/squawk_parser/tests/tests.rs
33
input_file: crates/squawk_parser/tests/data/regression_suite/errors.sql
44
---
5-
ERROR@948: expected path name
5+
ERROR@948: expected relation name
66
ERROR@1074: expected path name
77
ERROR@2188: expected path name
88
ERROR@2219: expected path name

crates/squawk_parser/tests/snapshots/tests__regression_merge.snap

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,7 @@ ERROR@41835: expected command, found WHEN_KW
122122
ERROR@41840: expected command, found MATCHED_KW
123123
ERROR@41848: expected command, found THEN_KW
124124
ERROR@41860: expected FROM_KW
125-
ERROR@41860: expected path name
125+
ERROR@41860: expected relation name
126126
ERROR@41953: expected ON_KW
127127
ERROR@41960: expected WHEN_KW
128128
ERROR@41960: expected MATCHED, or NOT MATCHED

0 commit comments

Comments
 (0)