Skip to content

Commit 60834d8

Browse files
authored
parser: improve joins (#538)
add some more nodes to the CST & update the AST add validation to check for extraneous or missing join conditions / claueses
1 parent 446f5d6 commit 60834d8

20 files changed

+1844
-1236
lines changed

PLAN.md

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -296,6 +296,36 @@ LINE 1: set session query_max_run_time = 10m;
296296
^
297297
```
298298

299+
### Rule: cross join
300+
301+
```sql
302+
select * from t join u on true;
303+
select * from t1, t2;
304+
```
305+
306+
suggests / autofixes to:
307+
308+
```sql
309+
select * from t cross join u;
310+
select * from t1 cross join t2;
311+
```
312+
313+
with config to change desired destination format
314+
315+
### Rule: natural join
316+
317+
warn about natural joins and autofix to the equivalent
318+
319+
```sql
320+
select * from t natural join u;
321+
```
322+
323+
suggests / autofixes to:
324+
325+
```sql
326+
select * from t join u using (id, name, ip, description, meta);
327+
```
328+
299329
### Rule: using unsupported lambdas
300330

301331
This actually parsers in Postgres, but could work off a heuristic

crates/squawk_parser/src/generated/syntax_kind.rs

Lines changed: 8 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

crates/squawk_parser/src/grammar.rs

Lines changed: 76 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -2625,32 +2625,61 @@ fn opt_order_by_clause(p: &mut Parser<'_>) -> bool {
26252625
true
26262626
}
26272627

2628-
const JOIN_TYPE_FIRST: TokenSet = TokenSet::new(&[INNER_KW, JOIN_KW, LEFT_KW, RIGHT_KW, FULL_KW]);
2628+
const JOIN_TYPE_FIRST: TokenSet =
2629+
TokenSet::new(&[INNER_KW, JOIN_KW, LEFT_KW, RIGHT_KW, FULL_KW, CROSS_KW]);
26292630

26302631
// where join_type is:
26312632
// [ INNER ] JOIN
26322633
// LEFT [ OUTER ] JOIN
26332634
// RIGHT [ OUTER ] JOIN
26342635
// FULL [ OUTER ] JOIN
2635-
fn join_type(p: &mut Parser<'_>) -> bool {
2636+
fn join_type(p: &mut Parser<'_>) -> Option<CompletedMarker> {
26362637
assert!(p.at_ts(JOIN_TYPE_FIRST));
2637-
if p.eat(INNER_KW) {
2638-
p.expect(JOIN_KW)
2639-
} else if p.eat(LEFT_KW) || p.eat(RIGHT_KW) || p.eat(FULL_KW) {
2640-
p.eat(OUTER_KW);
2641-
p.expect(JOIN_KW)
2642-
} else {
2643-
p.expect(JOIN_KW)
2644-
}
2638+
let m = p.start();
2639+
let kind = match p.current() {
2640+
CROSS_KW => {
2641+
p.bump(CROSS_KW);
2642+
p.expect(JOIN_KW);
2643+
JOIN_CROSS
2644+
}
2645+
INNER_KW | JOIN_KW => {
2646+
p.eat(INNER_KW);
2647+
p.expect(JOIN_KW);
2648+
JOIN_INNER
2649+
}
2650+
LEFT_KW => {
2651+
p.bump(LEFT_KW);
2652+
p.eat(OUTER_KW);
2653+
p.expect(JOIN_KW);
2654+
JOIN_LEFT
2655+
}
2656+
RIGHT_KW => {
2657+
p.bump(RIGHT_KW);
2658+
p.eat(OUTER_KW);
2659+
p.expect(JOIN_KW);
2660+
JOIN_RIGHT
2661+
}
2662+
FULL_KW => {
2663+
p.bump(FULL_KW);
2664+
p.eat(OUTER_KW);
2665+
p.expect(JOIN_KW);
2666+
JOIN_FULL
2667+
}
2668+
_ => {
2669+
p.error("expected join type");
2670+
return None;
2671+
}
2672+
};
2673+
Some(m.complete(p, kind))
26452674
}
26462675

26472676
const JOIN_FIRST: TokenSet = TokenSet::new(&[NATURAL_KW, CROSS_KW]).union(JOIN_TYPE_FIRST);
26482677

2649-
fn opt_from_clause(p: &mut Parser<'_>) -> bool {
2678+
fn opt_from_clause(p: &mut Parser<'_>) -> Option<CompletedMarker> {
26502679
let m = p.start();
26512680
if !p.eat(FROM_KW) {
26522681
m.abandon(p);
2653-
return false;
2682+
return None;
26542683
}
26552684
if !opt_from_item(p) {
26562685
p.error(format!("expected from item, got {:?}", p.current()));
@@ -2661,8 +2690,7 @@ fn opt_from_clause(p: &mut Parser<'_>) -> bool {
26612690
break;
26622691
}
26632692
}
2664-
m.complete(p, FROM_CLAUSE);
2665-
true
2693+
Some(m.complete(p, FROM_CLAUSE))
26662694
}
26672695

26682696
// https://github.com/postgres/postgres/blob/b3219c69fc1e161df8d380c464b3f2cce3b6cab9/src/backend/parser/gram.y#L18042
@@ -2994,7 +3022,7 @@ fn paren_data_source(p: &mut Parser<'_>) -> CompletedMarker {
29943022
fn merge_using_clause(p: &mut Parser<'_>) {
29953023
let m = p.start();
29963024
p.expect(USING_KW);
2997-
data_source(p);
3025+
opt_from_item(p);
29983026
p.expect(ON_KW);
29993027
// join_condition
30003028
if expr(p).is_none() {
@@ -3037,17 +3065,18 @@ fn merge_using_clause(p: &mut Parser<'_>) {
30373065
// RIGHT [ OUTER ] JOIN
30383066
// FULL [ OUTER ] JOIN
30393067
//
3040-
#[must_use]
30413068
fn opt_from_item(p: &mut Parser<'_>) -> bool {
30423069
if !p.at_ts(FROM_ITEM_FIRST) {
30433070
return false;
30443071
}
30453072
let m = p.start();
30463073
data_source(p);
3074+
let mut cm = m.complete(p, FROM_ITEM);
30473075
while p.at_ts(JOIN_FIRST) {
3076+
let m = cm.precede(p);
30483077
join(p);
3078+
cm = m.complete(p, JOIN_EXPR);
30493079
}
3050-
m.complete(p, FROM_ITEM);
30513080
true
30523081
}
30533082

@@ -3066,52 +3095,40 @@ fn opt_from_item(p: &mut Parser<'_>) -> bool {
30663095
fn join(p: &mut Parser<'_>) {
30673096
assert!(p.at_ts(JOIN_FIRST));
30683097
let m = p.start();
3069-
if p.eat(NATURAL_KW) {
3070-
if !join_type(p) {
3071-
p.error("expected join type");
3072-
}
3073-
if !opt_from_item(p) {
3074-
p.error("expected from_item");
3075-
}
3076-
} else if p.eat(CROSS_KW) {
3077-
p.expect(JOIN_KW);
3078-
if !opt_from_item(p) {
3079-
p.error("expected from_item");
3080-
}
3081-
} else {
3082-
if !join_type(p) {
3083-
p.error("expected join type");
3084-
}
3085-
if !opt_from_item(p) {
3086-
p.error("expected from_item");
3098+
p.eat(NATURAL_KW);
3099+
if join_type(p).is_none() {
3100+
p.error("expected join type");
3101+
}
3102+
if !opt_from_item(p) {
3103+
p.error("expected from_item");
3104+
}
3105+
if p.at(ON_KW) {
3106+
let m = p.start();
3107+
p.bump(ON_KW);
3108+
if expr(p).is_none() {
3109+
p.error("expected an expression");
30873110
}
3088-
if p.eat(ON_KW) {
3089-
if expr(p).is_none() {
3090-
p.error("expected an expression");
3091-
}
3111+
m.complete(p, ON_CLAUSE);
3112+
} else if p.at(USING_KW) {
3113+
let m = p.start();
3114+
// USING ( join_column [, ...] )
3115+
p.expect(USING_KW);
3116+
if p.at(L_PAREN) {
3117+
column_list(p);
30923118
} else {
3093-
{
3094-
let m = p.start();
3095-
// USING ( join_column [, ...] )
3096-
p.expect(USING_KW);
3097-
if p.at(L_PAREN) {
3098-
column_list(p);
3099-
} else {
3100-
p.error("expected L_PAREN");
3101-
}
3102-
m.complete(p, USING_CLAUSE);
3103-
}
3104-
{
3105-
let m = p.start();
3106-
// [ AS join_using_alias ]
3107-
if p.eat(AS_KW) {
3108-
name(p);
3109-
m.complete(p, ALIAS);
3110-
} else {
3111-
m.abandon(p);
3112-
}
3119+
p.error("expected L_PAREN");
3120+
}
3121+
{
3122+
let m = p.start();
3123+
// [ AS join_using_alias ]
3124+
if p.eat(AS_KW) {
3125+
name(p);
3126+
m.complete(p, ALIAS);
3127+
} else {
3128+
m.abandon(p);
31133129
}
31143130
}
3131+
m.complete(p, USING_CLAUSE);
31153132
}
31163133
m.complete(p, JOIN);
31173134
}

crates/squawk_parser/tests/data/regression_suite/tuplesort.sql

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -296,12 +296,12 @@ SELECT $$
296296
$$ AS qry ;
297297

298298
-- test mark/restore with in-memory sorts
299-
EXPLAIN (COSTS OFF) 'qry';
300-
'qry';
299+
-- EXPLAIN (COSTS OFF) 'qry';
300+
-- 'qry';
301301

302302
-- test mark/restore with on-disk sorts
303303
SET LOCAL work_mem = '100kB';
304-
EXPLAIN (COSTS OFF) 'qry';
305-
'qry';
304+
-- EXPLAIN (COSTS OFF) 'qry';
305+
-- 'qry';
306306

307307
COMMIT;

crates/squawk_parser/tests/snapshots/tests__delete_ok.snap

Lines changed: 30 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -208,18 +208,20 @@ SOURCE_FILE
208208
USING_CLAUSE
209209
USING_KW "using"
210210
WHITESPACE " "
211-
FROM_ITEM
212-
NAME_REF
213-
IDENT "order_items"
214-
WHITESPACE " "
215-
ALIAS
216-
NAME
217-
IDENT "oi"
211+
JOIN_EXPR
212+
FROM_ITEM
213+
NAME_REF
214+
IDENT "order_items"
215+
WHITESPACE " "
216+
ALIAS
217+
NAME
218+
IDENT "oi"
218219
WHITESPACE "\n "
219220
JOIN
220-
LEFT_KW "left"
221-
WHITESPACE " "
222-
JOIN_KW "join"
221+
JOIN_LEFT
222+
LEFT_KW "left"
223+
WHITESPACE " "
224+
JOIN_KW "join"
223225
WHITESPACE " "
224226
FROM_ITEM
225227
NAME_REF
@@ -229,24 +231,25 @@ SOURCE_FILE
229231
NAME
230232
IDENT "o"
231233
WHITESPACE " "
232-
ON_KW "on"
233-
WHITESPACE " "
234-
BIN_EXPR
235-
FIELD_EXPR
236-
NAME_REF
237-
IDENT "oi"
238-
DOT "."
239-
NAME_REF
240-
IDENT "order_id"
234+
ON_CLAUSE
235+
ON_KW "on"
241236
WHITESPACE " "
242-
EQ "="
243-
WHITESPACE " "
244-
FIELD_EXPR
245-
NAME_REF
246-
IDENT "o"
247-
DOT "."
248-
NAME_REF
249-
IDENT "id"
237+
BIN_EXPR
238+
FIELD_EXPR
239+
NAME_REF
240+
IDENT "oi"
241+
DOT "."
242+
NAME_REF
243+
IDENT "order_id"
244+
WHITESPACE " "
245+
EQ "="
246+
WHITESPACE " "
247+
FIELD_EXPR
248+
NAME_REF
249+
IDENT "o"
250+
DOT "."
251+
NAME_REF
252+
IDENT "id"
250253
SEMICOLON ";"
251254
WHITESPACE "\n\n"
252255
DELETE

0 commit comments

Comments
 (0)