Skip to content

Commit fcb86f0

Browse files
iwanttobepowerfulmihaibudiu
authored andcommitted
[CALCITE-7257] Subqueries cannot be decorrelated if join condition contains RexFieldAccess
1 parent fabf648 commit fcb86f0

File tree

3 files changed

+489
-24
lines changed

3 files changed

+489
-24
lines changed

core/src/main/java/org/apache/calcite/sql2rel/RelDecorrelator.java

Lines changed: 54 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1543,17 +1543,17 @@ private RelNode getCorRel(CorRef corVar) {
15431543
/** Adds a value generator to satisfy the correlating variables used by
15441544
* a relational expression, if those variables are not already provided by
15451545
* its input. */
1546-
private Frame maybeAddValueGenerator(RelNode rel, Frame frame) {
1547-
final CorelMap cm1 = new CorelMapBuilder().build(frame.r, rel);
1546+
private Frame maybeAddValueGenerator(RelNode rel, Frame inputFrame) {
1547+
final CorelMap cm1 = new CorelMapBuilder().build(inputFrame.r, rel);
15481548
if (!cm1.mapRefRelToCorRef.containsKey(rel)) {
1549-
return frame;
1549+
return inputFrame;
15501550
}
15511551
final Collection<CorRef> needs = cm1.mapRefRelToCorRef.get(rel);
1552-
final ImmutableSortedSet<CorDef> haves = frame.corDefOutputs.keySet();
1552+
final ImmutableSortedSet<CorDef> haves = inputFrame.corDefOutputs.keySet();
15531553
if (hasAll(needs, haves)) {
1554-
return frame;
1554+
return inputFrame;
15551555
}
1556-
return decorrelateInputWithValueGenerator(rel, frame);
1556+
return decorrelateInputWithValueGenerator(rel, inputFrame);
15571557
}
15581558

15591559
/** Returns whether all of a collection of {@link CorRef}s are satisfied
@@ -1579,13 +1579,13 @@ private static boolean has(Collection<CorDef> corDefs, CorRef corr) {
15791579
return false;
15801580
}
15811581

1582-
private Frame decorrelateInputWithValueGenerator(RelNode rel, Frame frame) {
1582+
private Frame decorrelateInputWithValueGenerator(RelNode rel, Frame inputFrame) {
15831583
// currently only handles one input
15841584
assert rel.getInputs().size() == 1;
1585-
RelNode oldInput = frame.r;
1585+
RelNode oldInput = inputFrame.r;
15861586

15871587
final NavigableMap<CorDef, Integer> corDefOutputs =
1588-
new TreeMap<>(frame.corDefOutputs);
1588+
new TreeMap<>(inputFrame.corDefOutputs);
15891589

15901590
final Collection<CorRef> corVarList = cm.mapRefRelToCorRef.get(rel);
15911591

@@ -1606,16 +1606,15 @@ private Frame decorrelateInputWithValueGenerator(RelNode rel, Frame frame) {
16061606
if (node instanceof RexInputRef) {
16071607
map.put(def, ((RexInputRef) node).getIndex());
16081608
} else {
1609-
map.put(def,
1610-
frame.r.getRowType().getFieldCount() + projects.size());
1609+
map.put(def, inputFrame.r.getRowType().getFieldCount() + projects.size());
16111610
projects.add((RexNode) node);
16121611
}
16131612
}
16141613
}
16151614
// If all correlation variables are now satisfied, skip creating a value
16161615
// generator.
16171616
if (map.size() == corVarList.size()) {
1618-
map.putAll(frame.corDefOutputs);
1617+
map.putAll(inputFrame.corDefOutputs);
16191618
final RelNode r;
16201619
if (!projects.isEmpty()) {
16211620
relBuilder.push(oldInput)
@@ -1624,17 +1623,40 @@ private Frame decorrelateInputWithValueGenerator(RelNode rel, Frame frame) {
16241623
} else {
16251624
r = oldInput;
16261625
}
1627-
return register(rel.getInput(0), r,
1628-
frame.oldToNewOutputs, map);
1626+
return register(rel.getInput(0), r, inputFrame.oldToNewOutputs, map);
16291627
}
16301628
}
16311629

1632-
int leftInputOutputCount = frame.r.getRowType().getFieldCount();
1630+
return createFrameWithValueGenerator(rel.getInput(0), inputFrame, corVarList, corDefOutputs);
1631+
}
1632+
1633+
/**
1634+
* Creates a new {@link Frame} for the given rel by joining its current
1635+
* decorrelated rel with a value generator that produces the required
1636+
* correlation variables.
1637+
*
1638+
* <p>The value generator is built from {@code corVarList} and joined with
1639+
* {@code frame.r} using an INNER join. The provided
1640+
* {@code corDefOutputs} map is updated to reflect the positions of all
1641+
* correlation definitions in the join output, and the resulting frame is
1642+
* registered for {@code rel}.
1643+
*
1644+
* @param rel target RelNode whose frame is updated to use the join of
1645+
* {@code frame.r} and the value generator
1646+
* @param frame existing Frame of the rel
1647+
* @param corVarList correlated variables that still need to be produced
1648+
* @param corDefOutputs mapping from {@link CorDef} to output positions; updated in place
1649+
* to include positions in the new join
1650+
* @return a new Frame describing {@code rel} after attaching the value generator
1651+
*/
1652+
private Frame createFrameWithValueGenerator(RelNode rel, Frame frame,
1653+
Collection<CorRef> corVarList, NavigableMap<CorDef, Integer> corDefOutputs) {
1654+
int leftFieldCount = frame.r.getRowType().getFieldCount();
16331655

16341656
// can directly add positions into corDefOutputs since join
16351657
// does not change the output ordering from the inputs.
16361658
final RelNode valueGen =
1637-
createValueGenerator(corVarList, leftInputOutputCount, corDefOutputs);
1659+
createValueGenerator(corVarList, leftFieldCount, corDefOutputs);
16381660
requireNonNull(valueGen, "valueGen");
16391661

16401662
RelNode join =
@@ -1647,8 +1669,7 @@ private Frame decorrelateInputWithValueGenerator(RelNode rel, Frame frame) {
16471669
// Join or Filter does not change the old input ordering. All
16481670
// input fields from newLeftInput (i.e. the original input to the old
16491671
// Filter) are in the output and in the same position.
1650-
return register(rel.getInput(0), join, frame.oldToNewOutputs,
1651-
corDefOutputs);
1672+
return register(rel, join, frame.oldToNewOutputs, corDefOutputs);
16521673
}
16531674

16541675
/** Finds a {@link RexInputRef} that is equivalent to a {@link CorRef},
@@ -1931,8 +1952,19 @@ private static boolean isWidening(RelDataType type, RelDataType type1) {
19311952
return null;
19321953
}
19331954

1955+
Frame newLeftFrame = leftFrame;
1956+
boolean joinConditionContainsFieldAccess = RexUtil.containsFieldAccess(rel.getCondition());
1957+
if (joinConditionContainsFieldAccess && isCorVarDefined) {
1958+
final CorelMap localCorelMap = new CorelMapBuilder().build(rel);
1959+
final List<CorRef> corVarList = new ArrayList<>(localCorelMap.mapRefRelToCorRef.values());
1960+
Collections.sort(corVarList);
1961+
1962+
final NavigableMap<CorDef, Integer> corDefOutputs = new TreeMap<>();
1963+
newLeftFrame = createFrameWithValueGenerator(oldLeft, leftFrame, corVarList, corDefOutputs);
1964+
}
1965+
19341966
RelNode newJoin = relBuilder
1935-
.push(leftFrame.r)
1967+
.push(newLeftFrame.r)
19361968
.push(rightFrame.r)
19371969
.join(rel.getJoinType(),
19381970
decorrelateExpr(castNonNull(currentRel), map, cm, rel.getCondition()),
@@ -1944,25 +1976,23 @@ private static boolean isWidening(RelDataType type, RelDataType type1) {
19441976
Map<Integer, Integer> mapOldToNewOutputs = new HashMap<>();
19451977

19461978
int oldLeftFieldCount = oldLeft.getRowType().getFieldCount();
1947-
int newLeftFieldCount = leftFrame.r.getRowType().getFieldCount();
1979+
int newLeftFieldCount = newLeftFrame.r.getRowType().getFieldCount();
19481980

19491981
int oldRightFieldCount = oldRight.getRowType().getFieldCount();
19501982
//noinspection AssertWithSideEffects
19511983
assert rel.getRowType().getFieldCount()
19521984
== oldLeftFieldCount + oldRightFieldCount;
19531985

19541986
// Left input positions are not changed.
1955-
mapOldToNewOutputs.putAll(leftFrame.oldToNewOutputs);
1956-
1987+
mapOldToNewOutputs.putAll(newLeftFrame.oldToNewOutputs);
19571988
// Right input positions are shifted by newLeftFieldCount.
19581989
for (int i = 0; i < oldRightFieldCount; i++) {
19591990
mapOldToNewOutputs.put(i + oldLeftFieldCount,
19601991
requireNonNull(rightFrame.oldToNewOutputs.get(i)) + newLeftFieldCount);
19611992
}
19621993

19631994
final NavigableMap<CorDef, Integer> corDefOutputs =
1964-
new TreeMap<>(leftFrame.corDefOutputs);
1965-
1995+
new TreeMap<>(newLeftFrame.corDefOutputs);
19661996
// Right input positions are shifted by newLeftFieldCount.
19671997
for (Map.Entry<CorDef, Integer> entry
19681998
: rightFrame.corDefOutputs.entrySet()) {

core/src/test/java/org/apache/calcite/sql2rel/RelDecorrelatorTest.java

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1198,4 +1198,78 @@ public static Frameworks.ConfigBuilder config() {
11981198
+ " LogicalTableScan(table=[[scott, EMP]])\n";
11991199
assertThat(after, hasTree(planAfter));
12001200
}
1201+
1202+
/** Test case for <a href="https://issues.apache.org/jira/browse/CALCITE-7257">[CALCITE-7257]
1203+
* Subqueries cannot be decorrelated if join condition contains RexFieldAccess</a>. */
1204+
@Test void testJoinConditionContainsRexFieldAccess() {
1205+
final FrameworkConfig frameworkConfig = config().build();
1206+
final RelBuilder builder = RelBuilder.create(frameworkConfig);
1207+
final RelOptCluster cluster = builder.getCluster();
1208+
final Planner planner = Frameworks.getPlanner(frameworkConfig);
1209+
final String sql = ""
1210+
+ "SELECT E1.* \n"
1211+
+ "FROM\n"
1212+
+ " EMP E1\n"
1213+
+ "WHERE\n"
1214+
+ " E1.EMPNO = (\n"
1215+
+ " SELECT D1.DEPTNO FROM DEPT D1\n"
1216+
+ " WHERE E1.ENAME IN (SELECT B1.ENAME FROM BONUS B1))";
1217+
final RelNode originalRel;
1218+
try {
1219+
final SqlNode parse = planner.parse(sql);
1220+
final SqlNode validate = planner.validate(parse);
1221+
originalRel = planner.rel(validate).rel;
1222+
} catch (Exception e) {
1223+
throw TestUtil.rethrow(e);
1224+
}
1225+
1226+
final HepProgram hepProgram = HepProgram.builder()
1227+
.addRuleCollection(
1228+
ImmutableList.of(
1229+
// SubQuery program rules
1230+
CoreRules.FILTER_SUB_QUERY_TO_CORRELATE,
1231+
CoreRules.PROJECT_SUB_QUERY_TO_CORRELATE,
1232+
CoreRules.JOIN_SUB_QUERY_TO_CORRELATE))
1233+
.build();
1234+
final Program program =
1235+
Programs.of(hepProgram, true,
1236+
requireNonNull(cluster.getMetadataProvider()));
1237+
final RelNode before =
1238+
program.run(cluster.getPlanner(), originalRel, cluster.traitSet(),
1239+
Collections.emptyList(), Collections.emptyList());
1240+
final String planBefore = ""
1241+
+ "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5], COMM=[$6], DEPTNO=[$7])\n"
1242+
+ " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5], COMM=[$6], DEPTNO=[$7])\n"
1243+
+ " LogicalFilter(condition=[=($0, CAST($8):SMALLINT)])\n"
1244+
+ " LogicalCorrelate(correlation=[$cor0], joinType=[left], requiredColumns=[{1}])\n"
1245+
+ " LogicalTableScan(table=[[scott, EMP]])\n"
1246+
+ " LogicalAggregate(group=[{}], agg#0=[SINGLE_VALUE($0)])\n"
1247+
+ " LogicalProject(DEPTNO=[$0])\n"
1248+
+ " LogicalProject(DEPTNO=[$0], DNAME=[$1], LOC=[$2])\n"
1249+
+ " LogicalJoin(condition=[=($cor0.ENAME, $3)], joinType=[inner])\n"
1250+
+ " LogicalTableScan(table=[[scott, DEPT]])\n"
1251+
+ " LogicalProject(ENAME=[$0])\n"
1252+
+ " LogicalTableScan(table=[[scott, BONUS]])\n";
1253+
assertThat(before, hasTree(planBefore));
1254+
1255+
// Decorrelate without any rules, just "purely" decorrelation algorithm on RelDecorrelator
1256+
final RelNode after =
1257+
RelDecorrelator.decorrelateQuery(before, builder, RuleSets.ofList(Collections.emptyList()),
1258+
RuleSets.ofList(Collections.emptyList()));
1259+
final String planAfter = ""
1260+
+ "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5], COMM=[$6], DEPTNO=[$7])\n"
1261+
+ " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5], COMM=[$6], DEPTNO=[$7], ENAME0=[$8], $f1=[CAST($9):TINYINT])\n"
1262+
+ " LogicalJoin(condition=[AND(=($1, $8), =($0, CAST($9):SMALLINT))], joinType=[inner])\n"
1263+
+ " LogicalTableScan(table=[[scott, EMP]])\n"
1264+
+ " LogicalAggregate(group=[{0}], agg#0=[SINGLE_VALUE($1)])\n"
1265+
+ " LogicalProject(ENAME=[$3], DEPTNO=[$0])\n"
1266+
+ " LogicalJoin(condition=[=($3, $4)], joinType=[inner])\n"
1267+
+ " LogicalJoin(condition=[true], joinType=[inner])\n"
1268+
+ " LogicalTableScan(table=[[scott, DEPT]])\n"
1269+
+ " LogicalProject(ENAME=[$1])\n"
1270+
+ " LogicalTableScan(table=[[scott, EMP]])\n"
1271+
+ " LogicalProject(ENAME=[$0])\n"
1272+
+ " LogicalTableScan(table=[[scott, BONUS]])\n";
1273+
assertThat(after, hasTree(planAfter));
1274+
}
12011275
}

0 commit comments

Comments
 (0)