Skip to content

Commit 2ec9628

Browse files
[CALCITE-7394] Nested sub-query with multiple levels of correlation returns incorrect results
1 parent f33c6a2 commit 2ec9628

File tree

3 files changed

+329
-11
lines changed

3 files changed

+329
-11
lines changed

core/src/main/java/org/apache/calcite/sql2rel/RelDecorrelator.java

Lines changed: 22 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -954,10 +954,7 @@ private RelNode rewriteScalarAggregate(Aggregate oldRel,
954954
RelNode newRel,
955955
Map<Integer, Integer> outputMap,
956956
NavigableMap<CorDef, Integer> corDefOutputs) {
957-
final CorelMap localCorelMap = new CorelMapBuilder().build(oldRel);
958-
final List<CorRef> corVarList = new ArrayList<>(localCorelMap.mapRefRelToCorRef.values());
959-
Collections.sort(corVarList);
960-
957+
final List<CorRef> corVarList = collectExternalCorVars(oldRel);
961958
final NavigableMap<CorDef, Integer> valueGenCorDefOutputs = new TreeMap<>();
962959
final RelNode valueGen =
963960
requireNonNull(createValueGenerator(corVarList, 0, valueGenCorDefOutputs));
@@ -1260,10 +1257,7 @@ private static void shiftMapping(Map<Integer, Integer> mapping, int startIndex,
12601257
return decorrelateRel((RelNode) rel, false, parentPropagatesNullValues);
12611258
}
12621259

1263-
final CorelMap localCorelMap = new CorelMapBuilder().build(rel);
1264-
final List<CorRef> corVarList = new ArrayList<>(localCorelMap.mapRefRelToCorRef.values());
1265-
Collections.sort(corVarList);
1266-
1260+
final List<CorRef> corVarList = collectExternalCorVars(rel);
12671261
final NavigableMap<CorDef, Integer> valueGenCorDefOutputs = new TreeMap<>();
12681262
final RelNode valueGen =
12691263
requireNonNull(createValueGenerator(corVarList, 0, valueGenCorDefOutputs));
@@ -1958,9 +1952,7 @@ private static boolean isWidening(RelDataType type, RelDataType type1) {
19581952
}
19591953

19601954
// 1. Collect all CorRefs involved
1961-
final CorelMap localCorelMap = new CorelMapBuilder().build(rel);
1962-
final List<CorRef> corVarList = new ArrayList<>(localCorelMap.mapRefRelToCorRef.values());
1963-
Collections.sort(corVarList);
1955+
final List<CorRef> corVarList = collectExternalCorVars(rel);
19641956

19651957
// 2. Ensure CorVars are present in inputs (adding ValueGenerators if needed)
19661958
Frame newLeftFrame = leftFrame;
@@ -3849,6 +3841,25 @@ private static boolean isFieldNotNullRecursive(RelNode rel, int index) {
38493841
}
38503842
}
38513843

3844+
/**
3845+
* Collects all correlated variables used in the given relational expression
3846+
* that are not defined within the expression itself.
3847+
*
3848+
* @param rel The relational expression to inspect
3849+
* @return A sorted list of external correlated variables
3850+
*/
3851+
private static List<CorRef> collectExternalCorVars(RelNode rel) {
3852+
final CorelMap localCorelMap = new CorelMapBuilder().build(rel);
3853+
final List<CorRef> corVarList = new ArrayList<>();
3854+
for (CorRef corVar : localCorelMap.mapRefRelToCorRef.values()) {
3855+
// if (!localCorelMap.mapCorToCorRel.containsKey(corVar.corr)) {
3856+
corVarList.add(corVar);
3857+
// }
3858+
}
3859+
Collections.sort(corVarList);
3860+
return corVarList;
3861+
}
3862+
38523863
/**
38533864
* Ensures that the correlated variables in {@code allCorDefs} are present
38543865
* in the output of the frame.

core/src/test/java/org/apache/calcite/sql2rel/RelDecorrelatorTest.java

Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -356,6 +356,138 @@ public static Frameworks.ConfigBuilder config() {
356356
assertThat(after, hasTree(planAfter));
357357
}
358358

359+
/** Test case for <a href="https://issues.apache.org/jira/browse/CALCITE-7394">[CALCITE-7394]
360+
* Nested sub-query with multiple levels of correlation returns incorrect results</a>. */
361+
@Test void test7394() {
362+
final FrameworkConfig frameworkConfig = config().build();
363+
final RelBuilder builder = RelBuilder.create(frameworkConfig);
364+
final RelOptCluster cluster = builder.getCluster();
365+
final Planner planner = Frameworks.getPlanner(frameworkConfig);
366+
final String sql = ""
367+
+ "select d.dname,\n"
368+
+ " (select count(*)\n"
369+
+ " from emp e\n"
370+
+ " where e.deptno = d.deptno\n"
371+
+ " and exists (\n"
372+
+ " select 1\n"
373+
+ " from (values (1000), (2000), (3000)) as v(sal)\n"
374+
+ " where e.sal > v.sal\n"
375+
+ " and d.deptno * 100 < v.sal\n"
376+
+ " )\n"
377+
+ " ) as c\n"
378+
+ "from dept d\n"
379+
+ "order by d.dname";
380+
final RelNode originalRel;
381+
try {
382+
final SqlNode parse = planner.parse(sql);
383+
final SqlNode validate = planner.validate(parse);
384+
originalRel = planner.rel(validate).rel;
385+
} catch (Exception e) {
386+
throw TestUtil.rethrow(e);
387+
}
388+
389+
final HepProgram hepProgram = HepProgram.builder()
390+
.addRuleCollection(
391+
ImmutableList.of(
392+
// SubQuery program rules
393+
CoreRules.FILTER_SUB_QUERY_TO_CORRELATE,
394+
CoreRules.PROJECT_SUB_QUERY_TO_CORRELATE,
395+
CoreRules.JOIN_SUB_QUERY_TO_CORRELATE))
396+
.build();
397+
final Program program =
398+
Programs.of(hepProgram, true,
399+
requireNonNull(cluster.getMetadataProvider()));
400+
final RelNode before =
401+
program.run(cluster.getPlanner(), originalRel, cluster.traitSet(),
402+
Collections.emptyList(), Collections.emptyList());
403+
final String planBefore = ""
404+
+ "LogicalSort(sort0=[$0], dir0=[ASC])\n"
405+
+ " LogicalProject(DNAME=[$1], C=[$3])\n"
406+
+ " LogicalCorrelate(correlation=[$cor0], joinType=[left], requiredColumns=[{0}])\n"
407+
+ " LogicalTableScan(table=[[scott, DEPT]])\n"
408+
+ " LogicalAggregate(group=[{}], EXPR$0=[COUNT()])\n"
409+
+ " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5], COMM=[$6], DEPTNO=[$7])\n"
410+
+ " LogicalFilter(condition=[=($7, $cor0.DEPTNO)])\n"
411+
+ " LogicalCorrelate(correlation=[$cor1], joinType=[inner], requiredColumns=[{5}])\n"
412+
+ " LogicalTableScan(table=[[scott, EMP]])\n"
413+
+ " LogicalAggregate(group=[{0}])\n"
414+
+ " LogicalProject(i=[true])\n"
415+
+ " LogicalFilter(condition=[AND(>(CAST($cor1.SAL):DECIMAL(12, 2), CAST($0):DECIMAL(12, 2) NOT NULL), <(*($cor0.DEPTNO, 100), $0))])\n"
416+
+ " LogicalValues(tuples=[[{ 1000 }, { 2000 }, { 3000 }]])\n";
417+
assertThat(before, hasTree(planBefore));
418+
419+
// Decorrelate without any rules, just "purely" decorrelation algorithm on RelDecorrelator
420+
final RelNode after =
421+
RelDecorrelator.decorrelateQuery(before, builder, RuleSets.ofList(Collections.emptyList()),
422+
RuleSets.ofList(Collections.emptyList()));
423+
// before fix:
424+
//
425+
// LogicalSort(sort0=[$0], dir0=[ASC])
426+
// LogicalProject(DNAME=[$1], C=[$7])
427+
// LogicalJoin(condition=[AND(=($0, $5), =($4, $6))], joinType=[left])
428+
// LogicalProject(DEPTNO=[$0], DNAME=[$1], LOC=[$2], DEPTNO0=[$0], $f4=[*($0, 100)])
429+
// LogicalTableScan(table=[[scott, DEPT]])
430+
// LogicalProject(DEPTNO8=[$0], $f4=[$1], EXPR$0=[CASE(IS NOT NULL($5), $5, 0)])
431+
// LogicalJoin(condition=[AND(IS NOT DISTINCT FROM($0, $3),
432+
// IS NOT DISTINCT FROM($1, $4))], joinType=[left])
433+
// LogicalJoin(condition=[true], joinType=[inner])
434+
// LogicalProject(DEPTNO=[$0], $f4=[*($0, 100)])
435+
// LogicalTableScan(table=[[scott, DEPT]])
436+
// LogicalAggregate(group=[{0}])
437+
// LogicalProject(SAL0=[CAST($5):DECIMAL(12, 2)])
438+
// LogicalTableScan(table=[[scott, EMP]])
439+
// LogicalAggregate(group=[{0, 1}], EXPR$0=[COUNT()])
440+
// LogicalProject(DEPTNO8=[$7], $f4=[$9])
441+
// LogicalFilter(condition=[IS NOT NULL($7)])
442+
// LogicalProject(..., DEPTNO=[$7], i=[$11], $f4=[$9])
443+
// LogicalJoin(condition=[=($8, $10)], joinType=[inner])
444+
// LogicalProject(..., SAL0=[CAST($5):DECIMAL(12, 2)])
445+
// LogicalTableScan(table=[[scott, EMP]])
446+
// LogicalProject($f4=[$0], SAL0=[$1], $f2=[true])
447+
// LogicalAggregate(group=[{0, 1}])
448+
// LogicalProject($f4=[$1], SAL0=[$2])
449+
// LogicalJoin(condition=[AND(>($2, CAST($0):DECIMAL(12, 2) NOT NULL),
450+
// <($1, $0))], joinType=[inner])
451+
// LogicalValues(tuples=[[{ 1000 }, { 2000 }, { 3000 }]])
452+
// LogicalJoin(condition=[true], joinType=[inner])
453+
// LogicalAggregate(group=[{0}])
454+
// LogicalProject($f4=[*($0, 100)])
455+
// LogicalTableScan(table=[[scott, DEPT]])
456+
// LogicalAggregate(group=[{0}])
457+
// LogicalProject(SAL0=[CAST($5):DECIMAL(12, 2)])
458+
// LogicalTableScan(table=[[scott, EMP]])
459+
final String planAfter = ""
460+
+ "LogicalSort(sort0=[$0], dir0=[ASC])\n"
461+
+ " LogicalProject(DNAME=[$1], C=[$7])\n"
462+
+ " LogicalJoin(condition=[AND(=($0, $5), =($4, $6))], joinType=[left])\n"
463+
+ " LogicalProject(DEPTNO=[$0], DNAME=[$1], LOC=[$2], DEPTNO0=[$0], $f4=[*($0, 100)])\n"
464+
+ " LogicalTableScan(table=[[scott, DEPT]])\n"
465+
+ " LogicalProject(DEPTNO8=[$0], $f4=[$1], EXPR$0=[CASE(IS NOT NULL($4), $4, 0)])\n"
466+
+ " LogicalJoin(condition=[AND(IS NOT DISTINCT FROM($0, $2), IS NOT DISTINCT FROM($1, $3))], joinType=[left])\n"
467+
+ " LogicalProject(DEPTNO=[$0], $f4=[*($0, 100)])\n"
468+
+ " LogicalTableScan(table=[[scott, DEPT]])\n"
469+
+ " LogicalAggregate(group=[{0, 1}], EXPR$0=[COUNT()])\n"
470+
+ " LogicalProject(DEPTNO8=[$7], $f4=[$9])\n"
471+
+ " LogicalFilter(condition=[IS NOT NULL($7)])\n"
472+
+ " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5], COMM=[$6], DEPTNO=[$7], i=[$11], $f4=[$9])\n"
473+
+ " LogicalJoin(condition=[=($8, $10)], joinType=[inner])\n"
474+
+ " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5], COMM=[$6], DEPTNO=[$7], SAL0=[CAST($5):DECIMAL(12, 2)])\n"
475+
+ " LogicalTableScan(table=[[scott, EMP]])\n"
476+
+ " LogicalProject($f4=[$0], SAL0=[$1], $f2=[true])\n"
477+
+ " LogicalAggregate(group=[{0, 1}])\n"
478+
+ " LogicalProject($f4=[$1], SAL0=[$2])\n"
479+
+ " LogicalJoin(condition=[AND(>($2, CAST($0):DECIMAL(12, 2) NOT NULL), <($1, $0))], joinType=[inner])\n"
480+
+ " LogicalValues(tuples=[[{ 1000 }, { 2000 }, { 3000 }]])\n"
481+
+ " LogicalJoin(condition=[true], joinType=[inner])\n"
482+
+ " LogicalAggregate(group=[{0}])\n"
483+
+ " LogicalProject($f4=[*($0, 100)])\n"
484+
+ " LogicalTableScan(table=[[scott, DEPT]])\n"
485+
+ " LogicalAggregate(group=[{0}])\n"
486+
+ " LogicalProject(SAL0=[CAST($5):DECIMAL(12, 2)])\n"
487+
+ " LogicalTableScan(table=[[scott, EMP]])\n";
488+
assertThat(after, hasTree(planAfter));
489+
}
490+
359491
/** Test case for <a href="https://issues.apache.org/jira/browse/CALCITE-7297">[CALCITE-7297]
360492
* The result is incorrect when the GROUP BY key in a subquery is a RexFieldAccess</a>. */
361493
@Test void testSkipsRedundantValueGenerator() {

core/src/test/resources/sql/sub-query.iq

Lines changed: 175 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5617,6 +5617,181 @@ ORDER BY deptno;
56175617

56185618
!ok
56195619

5620+
# [CALCITE-7394] Nested sub-query with multiple levels of correlation returns incorrect results
5621+
select d.dname,
5622+
(select count(*)
5623+
from emp e
5624+
where e.deptno = d.deptno
5625+
and e.sal > (
5626+
select min(s.losal)
5627+
from (VALUES (1, 700, 1200), (2, 1201, 1400), (3, 1401, 2000), (4, 2001, 3000), (5, 3001, 9999)) AS s(grade, losal, hisal)
5628+
where e.sal BETWEEN s.losal AND s.hisal
5629+
and s.hisal > d.deptno * 10
5630+
)
5631+
) as high_paid_count
5632+
from dept d
5633+
order by d.dname;
5634+
+------------+-----------------+
5635+
| DNAME | HIGH_PAID_COUNT |
5636+
+------------+-----------------+
5637+
| ACCOUNTING | 3 |
5638+
| OPERATIONS | 0 |
5639+
| RESEARCH | 5 |
5640+
| SALES | 6 |
5641+
+------------+-----------------+
5642+
(4 rows)
5643+
5644+
!ok
5645+
5646+
# [CALCITE-7394] Nested sub-query with multiple levels of correlation returns incorrect results
5647+
select e.ename
5648+
from emp e
5649+
where e.sal > (
5650+
select avg(e2.sal)
5651+
from emp e2
5652+
where e2.deptno = e.deptno
5653+
and exists (
5654+
select 1
5655+
from (values (7369, 20)) as b(empno, deptno)
5656+
where b.empno = e2.empno
5657+
and b.deptno = e.deptno
5658+
)
5659+
)
5660+
and e.sal < 2000
5661+
order by e.ename;
5662+
+-------+
5663+
| ENAME |
5664+
+-------+
5665+
| ADAMS |
5666+
+-------+
5667+
(1 row)
5668+
5669+
!ok
5670+
5671+
# [CALCITE-7394] Nested sub-query with multiple levels of correlation returns incorrect results
5672+
select d.deptno
5673+
from dept d
5674+
where exists (
5675+
select 1
5676+
from emp e
5677+
where e.deptno = d.deptno
5678+
and exists (
5679+
select 1
5680+
from (VALUES (1, 700, 1200), (2, 1201, 1400), (3, 1401, 2000), (4, 2001, 3000), (5, 3001, 9999)) AS s(grade, losal, hisal)
5681+
where s.grade = 1
5682+
and s.hisal >= e.sal
5683+
and s.losal <= d.deptno * 20
5684+
)
5685+
)
5686+
order by d.deptno;
5687+
+--------+
5688+
| DEPTNO |
5689+
+--------+
5690+
+--------+
5691+
(0 rows)
5692+
5693+
!ok
5694+
5695+
# [CALCITE-7394] Nested sub-query with multiple levels of correlation returns incorrect results
5696+
select e.ename
5697+
from emp e
5698+
where e.deptno in (
5699+
select d.deptno
5700+
from dept d
5701+
where d.deptno = e.deptno and d.deptno = 10
5702+
union
5703+
select d.deptno
5704+
from dept d
5705+
where d.deptno = e.deptno
5706+
and exists (
5707+
select 1
5708+
from emp e2
5709+
where e2.deptno = d.deptno
5710+
and e2.empno = e.empno
5711+
and e2.sal > 2000
5712+
)
5713+
)
5714+
order by e.ename;
5715+
+--------+
5716+
| ENAME |
5717+
+--------+
5718+
| BLAKE |
5719+
| CLARK |
5720+
| FORD |
5721+
| JONES |
5722+
| KING |
5723+
| MILLER |
5724+
| SCOTT |
5725+
+--------+
5726+
(7 rows)
5727+
5728+
!ok
5729+
5730+
# [CALCITE-7394] Nested sub-query with multiple levels of correlation returns incorrect results
5731+
select e.ename
5732+
from emp e
5733+
where exists (
5734+
select 1
5735+
from dept d
5736+
join emp e2 on d.deptno = e2.deptno
5737+
where d.deptno = e.deptno
5738+
and exists (
5739+
select 1
5740+
from (values (10), (20), (30)) as v(deptno)
5741+
where v.deptno = e2.deptno
5742+
and v.deptno = e.deptno
5743+
)
5744+
and e2.empno = e.empno
5745+
)
5746+
order by e.ename;
5747+
+--------+
5748+
| ENAME |
5749+
+--------+
5750+
| ADAMS |
5751+
| ALLEN |
5752+
| BLAKE |
5753+
| CLARK |
5754+
| FORD |
5755+
| JAMES |
5756+
| JONES |
5757+
| KING |
5758+
| MARTIN |
5759+
| MILLER |
5760+
| SCOTT |
5761+
| SMITH |
5762+
| TURNER |
5763+
| WARD |
5764+
+--------+
5765+
(14 rows)
5766+
5767+
!ok
5768+
5769+
# [CALCITE-7394] Nested sub-query with multiple levels of correlation returns incorrect results
5770+
select d.dname,
5771+
(select count(*)
5772+
from emp e
5773+
where e.deptno = d.deptno
5774+
and exists (
5775+
select 1
5776+
from (values (1000), (2000), (3000)) as v(sal)
5777+
where e.sal > v.sal
5778+
and d.deptno * 100 < v.sal
5779+
)
5780+
) as c
5781+
from dept d
5782+
order by d.dname;
5783+
+------------+---+
5784+
| DNAME | C |
5785+
+------------+---+
5786+
| ACCOUNTING | 2 |
5787+
| OPERATIONS | 0 |
5788+
| RESEARCH | 0 |
5789+
| SALES | 0 |
5790+
+------------+---+
5791+
(4 rows)
5792+
5793+
!ok
5794+
56205795
# [CALCITE-7303] Subqueries cannot be decorrelated if filter condition have multi CorrelationId
56215796
SELECT deptno
56225797
FROM emp e

0 commit comments

Comments
 (0)