Skip to content

Commit 733e718

Browse files
authored
fix: paren data source (#497)
fixes data sources within parentheses. they can be either - a select statement - a join clause this is now parsed properly: ```sql select f1, count(*) from t1 x(x0,x1) left join (t1 left join t2 using(f1)) on (x0 = 0) ``` also uses `column_list` instead of `tuple_expr` for `join using (…)` nodes. and added a little comment to the regression test to track how many are still failing.
1 parent bd62381 commit 733e718

File tree

5 files changed

+180
-44
lines changed

5 files changed

+180
-44
lines changed

crates/squawk_parser/src/grammar.rs

Lines changed: 29 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2702,8 +2702,7 @@ fn data_source(p: &mut Parser<'_>) {
27022702
p.eat(LATERAL_KW);
27032703
match p.current() {
27042704
L_PAREN => {
2705-
// TODO: this should be `paren_select` instead of a general `tuple_expr`, since only a select statement is allowed inside
2706-
tuple_expr(p);
2705+
paren_data_source(p);
27072706
opt_alias(p);
27082707
}
27092708
JSON_TABLE_KW => {
@@ -2731,6 +2730,33 @@ fn data_source(p: &mut Parser<'_>) {
27312730
}
27322731
}
27332732

2733+
fn paren_data_source(p: &mut Parser<'_>) -> CompletedMarker {
2734+
assert!(p.at(L_PAREN));
2735+
let m = p.start();
2736+
p.bump(L_PAREN);
2737+
2738+
// Try to parse as a SELECT statement first
2739+
if p.at_ts(SELECT_FIRST) {
2740+
if select(p, None).is_some() {
2741+
p.expect(R_PAREN);
2742+
return m.complete(p, PAREN_EXPR);
2743+
}
2744+
}
2745+
2746+
// Then try to parse as a FROM_ITEM (which includes table references and joins)
2747+
if opt_from_item(p) {
2748+
p.expect(R_PAREN);
2749+
return m.complete(p, PAREN_EXPR);
2750+
}
2751+
2752+
// Fall back to general expression parsing
2753+
if expr(p).is_none() {
2754+
p.error("expected an expression");
2755+
}
2756+
p.expect(R_PAREN);
2757+
m.complete(p, PAREN_EXPR)
2758+
}
2759+
27342760
// USING data_source ON join_condition
27352761
fn merge_using_clause(p: &mut Parser<'_>) {
27362762
let m = p.start();
@@ -2834,7 +2860,7 @@ fn join(p: &mut Parser<'_>) {
28342860
// USING ( join_column [, ...] )
28352861
p.expect(USING_KW);
28362862
if p.at(L_PAREN) {
2837-
tuple_expr(p);
2863+
column_list(p);
28382864
} else {
28392865
p.error("expected L_PAREN");
28402866
}

crates/squawk_parser/tests/data/ok/select.sql

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -356,6 +356,11 @@ select * from t full join t2 using (id);
356356
-- multi conditions
357357
select * from t join t2 on t2.team_id = t.team_id and t2.id = t.org_id;
358358

359+
-- nested joins
360+
select f1, count(*) from
361+
t1 x(x0,x1) left join (t1 left join t2 using(f1)) on (x0 = 0)
362+
group by f1;
363+
359364
-- using w/ join alias
360365
SELECT * from t join t2 using (id) as foo;
361366

@@ -487,10 +492,10 @@ select current_schema;
487492
select * from t order by a using >>>;
488493

489494
-- order_by_regression
490-
SELECT sensor_id, DATE_TRUNC('day', ts) AS day, MAX(value) AS max_value, MIN(value) AS min_value
491-
FROM sensors_uncompressed
495+
SELECT sensor_id, DATE_TRUNC('day', ts) AS day, MAX(value) AS max_value, MIN(value) AS min_value
496+
FROM sensors_uncompressed
492497
WHERE ts >= DATE '2023-12-21' AND ts < DATE '2023-12-22'
493-
GROUP BY sensor_id, DATE_TRUNC('day', ts)
498+
GROUP BY sensor_id, DATE_TRUNC('day', ts)
494499
ORDER BY sensor_id, day;
495500

496501
-- select_from_user_table

crates/squawk_parser/tests/snapshots/tests__misc_ok.snap

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3020,10 +3020,11 @@ SOURCE_FILE
30203020
USING_CLAUSE
30213021
USING_KW "using"
30223022
WHITESPACE " "
3023-
PAREN_EXPR
3023+
COLUMN_LIST
30243024
L_PAREN "("
3025-
NAME_REF
3026-
IDENT "jobid"
3025+
COLUMN
3026+
NAME_REF
3027+
IDENT "jobid"
30273028
R_PAREN ")"
30283029
WHITESPACE "\n"
30293030
WHERE_CLAUSE
@@ -6300,10 +6301,11 @@ SOURCE_FILE
63006301
USING_CLAUSE
63016302
USING_KW "USING"
63026303
WHITESPACE " "
6303-
PAREN_EXPR
6304+
COLUMN_LIST
63046305
L_PAREN "("
6305-
NAME_REF
6306-
IDENT "turbine_id"
6306+
COLUMN
6307+
NAME_REF
6308+
IDENT "turbine_id"
63076309
R_PAREN ")"
63086310
WHITESPACE "\n"
63096311
WHERE_CLAUSE

crates/squawk_parser/tests/snapshots/tests__select_ok.snap

Lines changed: 134 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -4366,10 +4366,11 @@ SOURCE_FILE
43664366
USING_CLAUSE
43674367
USING_KW "using"
43684368
WHITESPACE " "
4369-
PAREN_EXPR
4369+
COLUMN_LIST
43704370
L_PAREN "("
4371-
NAME_REF
4372-
IDENT "id"
4371+
COLUMN
4372+
NAME_REF
4373+
IDENT "id"
43734374
R_PAREN ")"
43744375
SEMICOLON ";"
43754376
WHITESPACE "\n"
@@ -4398,14 +4399,16 @@ SOURCE_FILE
43984399
USING_CLAUSE
43994400
USING_KW "using"
44004401
WHITESPACE " "
4401-
TUPLE_EXPR
4402+
COLUMN_LIST
44024403
L_PAREN "("
4403-
NAME_REF
4404-
IDENT "id"
4404+
COLUMN
4405+
NAME_REF
4406+
IDENT "id"
44054407
COMMA ","
44064408
WHITESPACE " "
4407-
NAME_REF
4408-
IDENT "foo"
4409+
COLUMN
4410+
NAME_REF
4411+
IDENT "foo"
44094412
R_PAREN ")"
44104413
SEMICOLON ";"
44114414
WHITESPACE "\n\n"
@@ -4436,10 +4439,11 @@ SOURCE_FILE
44364439
USING_CLAUSE
44374440
USING_KW "using"
44384441
WHITESPACE " "
4439-
PAREN_EXPR
4442+
COLUMN_LIST
44404443
L_PAREN "("
4441-
NAME_REF
4442-
IDENT "id"
4444+
COLUMN
4445+
NAME_REF
4446+
IDENT "id"
44434447
R_PAREN ")"
44444448
SEMICOLON ";"
44454449
WHITESPACE "\n\n"
@@ -4470,10 +4474,11 @@ SOURCE_FILE
44704474
USING_CLAUSE
44714475
USING_KW "using"
44724476
WHITESPACE " "
4473-
PAREN_EXPR
4477+
COLUMN_LIST
44744478
L_PAREN "("
4475-
NAME_REF
4476-
IDENT "id"
4479+
COLUMN
4480+
NAME_REF
4481+
IDENT "id"
44774482
R_PAREN ")"
44784483
SEMICOLON ";"
44794484
WHITESPACE "\n\n"
@@ -4539,6 +4544,98 @@ SOURCE_FILE
45394544
IDENT "org_id"
45404545
SEMICOLON ";"
45414546
WHITESPACE "\n\n"
4547+
COMMENT "-- nested joins"
4548+
WHITESPACE "\n"
4549+
SELECT
4550+
SELECT_CLAUSE
4551+
SELECT_KW "select"
4552+
WHITESPACE " "
4553+
TARGET_LIST
4554+
TARGET
4555+
NAME_REF
4556+
IDENT "f1"
4557+
COMMA ","
4558+
WHITESPACE " "
4559+
TARGET
4560+
CALL_EXPR
4561+
NAME_REF
4562+
IDENT "count"
4563+
ARG_LIST
4564+
L_PAREN "("
4565+
STAR "*"
4566+
R_PAREN ")"
4567+
WHITESPACE " "
4568+
FROM_CLAUSE
4569+
FROM_KW "from"
4570+
WHITESPACE "\n"
4571+
NAME_REF
4572+
IDENT "t1"
4573+
WHITESPACE " "
4574+
ALIAS
4575+
NAME
4576+
IDENT "x"
4577+
COLUMN_LIST
4578+
L_PAREN "("
4579+
COLUMN
4580+
NAME
4581+
IDENT "x0"
4582+
COMMA ","
4583+
COLUMN
4584+
NAME
4585+
IDENT "x1"
4586+
R_PAREN ")"
4587+
WHITESPACE " "
4588+
JOIN
4589+
LEFT_KW "left"
4590+
WHITESPACE " "
4591+
JOIN_KW "join"
4592+
WHITESPACE " "
4593+
PAREN_EXPR
4594+
L_PAREN "("
4595+
NAME_REF
4596+
IDENT "t1"
4597+
WHITESPACE " "
4598+
JOIN
4599+
LEFT_KW "left"
4600+
WHITESPACE " "
4601+
JOIN_KW "join"
4602+
WHITESPACE " "
4603+
NAME_REF
4604+
IDENT "t2"
4605+
WHITESPACE " "
4606+
USING_CLAUSE
4607+
USING_KW "using"
4608+
COLUMN_LIST
4609+
L_PAREN "("
4610+
COLUMN
4611+
NAME_REF
4612+
IDENT "f1"
4613+
R_PAREN ")"
4614+
R_PAREN ")"
4615+
WHITESPACE " "
4616+
ON_KW "on"
4617+
WHITESPACE " "
4618+
PAREN_EXPR
4619+
L_PAREN "("
4620+
BIN_EXPR
4621+
NAME_REF
4622+
IDENT "x0"
4623+
WHITESPACE " "
4624+
EQ "="
4625+
WHITESPACE " "
4626+
LITERAL
4627+
INT_NUMBER "0"
4628+
R_PAREN ")"
4629+
WHITESPACE "\n"
4630+
GROUP_BY_CLAUSE
4631+
GROUP_KW "group"
4632+
WHITESPACE " "
4633+
BY_KW "by"
4634+
WHITESPACE " "
4635+
NAME_REF
4636+
IDENT "f1"
4637+
SEMICOLON ";"
4638+
WHITESPACE "\n\n"
45424639
COMMENT "-- using w/ join alias"
45434640
WHITESPACE "\n"
45444641
SELECT
@@ -4564,10 +4661,11 @@ SOURCE_FILE
45644661
USING_CLAUSE
45654662
USING_KW "using"
45664663
WHITESPACE " "
4567-
PAREN_EXPR
4664+
COLUMN_LIST
45684665
L_PAREN "("
4569-
NAME_REF
4570-
IDENT "id"
4666+
COLUMN
4667+
NAME_REF
4668+
IDENT "id"
45714669
R_PAREN ")"
45724670
WHITESPACE " "
45734671
ALIAS
@@ -4707,10 +4805,11 @@ SOURCE_FILE
47074805
USING_CLAUSE
47084806
USING_KW "using"
47094807
WHITESPACE " "
4710-
PAREN_EXPR
4808+
COLUMN_LIST
47114809
L_PAREN "("
4712-
NAME_REF
4713-
IDENT "id"
4810+
COLUMN
4811+
NAME_REF
4812+
IDENT "id"
47144813
R_PAREN ")"
47154814
WHITESPACE "\n"
47164815
JOIN
@@ -4724,10 +4823,11 @@ SOURCE_FILE
47244823
USING_CLAUSE
47254824
USING_KW "using"
47264825
WHITESPACE " "
4727-
PAREN_EXPR
4826+
COLUMN_LIST
47284827
L_PAREN "("
4729-
NAME_REF
4730-
EVENT_KW "event"
4828+
COLUMN
4829+
NAME_REF
4830+
EVENT_KW "event"
47314831
R_PAREN ")"
47324832
SEMICOLON ";"
47334833
WHITESPACE "\n\n"
@@ -4832,10 +4932,11 @@ SOURCE_FILE
48324932
USING_CLAUSE
48334933
USING_KW "USING"
48344934
WHITESPACE " "
4835-
PAREN_EXPR
4935+
COLUMN_LIST
48364936
L_PAREN "("
4837-
NAME_REF
4838-
IDENT "did"
4937+
COLUMN
4938+
NAME_REF
4939+
IDENT "did"
48394940
R_PAREN ")"
48404941
SEMICOLON ";"
48414942
WHITESPACE "\n\n"
@@ -4862,10 +4963,11 @@ SOURCE_FILE
48624963
USING_CLAUSE
48634964
USING_KW "using"
48644965
WHITESPACE " "
4865-
PAREN_EXPR
4966+
COLUMN_LIST
48664967
L_PAREN "("
4867-
NAME_REF
4868-
IDENT "a_id"
4968+
COLUMN
4969+
NAME_REF
4970+
IDENT "a_id"
48694971
R_PAREN ")"
48704972
SEMICOLON ";"
48714973
WHITESPACE "\n\n"
@@ -5672,13 +5774,13 @@ SOURCE_FILE
56725774
WHITESPACE " "
56735775
NAME
56745776
IDENT "min_value"
5675-
WHITESPACE " \n"
5777+
WHITESPACE "\n"
56765778
FROM_CLAUSE
56775779
FROM_KW "FROM"
56785780
WHITESPACE " "
56795781
NAME_REF
56805782
IDENT "sensors_uncompressed"
5681-
WHITESPACE " \n"
5783+
WHITESPACE "\n"
56825784
WHERE_CLAUSE
56835785
WHERE_KW "WHERE"
56845786
WHITESPACE " "
@@ -5732,7 +5834,7 @@ SOURCE_FILE
57325834
NAME_REF
57335835
IDENT "ts"
57345836
R_PAREN ")"
5735-
WHITESPACE " \n"
5837+
WHITESPACE "\n"
57365838
ORDER_BY_CLAUSE
57375839
ORDER_KW "ORDER"
57385840
WHITESPACE " "

crates/squawk_parser/tests/tests.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@ fn parser_err(fixture: Fixture<&str>) {
7575
);
7676
}
7777

78+
// 102 failing
7879
#[dir_test(
7980
dir: "$CARGO_MANIFEST_DIR/tests/data/regression_suite",
8081
glob: "*.sql",

0 commit comments

Comments
 (0)