Skip to content

Commit d5bc50d

Browse files
silundongiwanttobepowerful
authored andcommitted
[CALCITE-7327] Support IS NOT DISTINCT FROM as equi condition of hash join
1 parent c478e38 commit d5bc50d

File tree

19 files changed

+325
-80
lines changed

19 files changed

+325
-80
lines changed

core/src/main/java/org/apache/calcite/adapter/enumerable/EnumerableHashJoin.java

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -218,8 +218,10 @@ private Result implementHashSemiJoin(EnumerableRelImplementor implementor, Prefe
218218
Expressions.list(
219219
leftExpression,
220220
rightExpression,
221-
leftResult.physType.generateAccessorWithoutNulls(joinInfo.leftKeys),
222-
rightResult.physType.generateAccessorWithoutNulls(joinInfo.rightKeys),
221+
leftResult.physType.generateNullAwareAccessor(
222+
joinInfo.leftKeys, joinInfo.nullExclusionFlags),
223+
rightResult.physType.generateNullAwareAccessor(
224+
joinInfo.rightKeys, joinInfo.nullExclusionFlags),
223225
Util.first(keyPhysType.comparer(),
224226
Expressions.constant(null)),
225227
predicate)))
@@ -264,8 +266,10 @@ private Result implementHashJoin(EnumerableRelImplementor implementor, Prefer pr
264266
BuiltInMethod.HASH_JOIN.method,
265267
Expressions.list(
266268
rightExpression,
267-
leftResult.physType.generateAccessorWithoutNulls(joinInfo.leftKeys),
268-
rightResult.physType.generateAccessorWithoutNulls(joinInfo.rightKeys),
269+
leftResult.physType.generateNullAwareAccessor(
270+
joinInfo.leftKeys, joinInfo.nullExclusionFlags),
271+
rightResult.physType.generateNullAwareAccessor(
272+
joinInfo.rightKeys, joinInfo.nullExclusionFlags),
269273
EnumUtils.joinSelector(joinType,
270274
physType,
271275
ImmutableList.of(

core/src/main/java/org/apache/calcite/adapter/enumerable/EnumerableMergeJoin.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
import org.apache.calcite.rel.RelNode;
3535
import org.apache.calcite.rel.core.CorrelationId;
3636
import org.apache.calcite.rel.core.Join;
37+
import org.apache.calcite.rel.core.JoinInfo;
3738
import org.apache.calcite.rel.core.JoinRelType;
3839
import org.apache.calcite.rel.metadata.RelMdCollation;
3940
import org.apache.calcite.rel.metadata.RelMetadataQuery;
@@ -71,6 +72,9 @@
7172
* {@link EnumerableConvention enumerable calling convention} using
7273
* a merge algorithm. */
7374
public class EnumerableMergeJoin extends Join implements EnumerableRel {
75+
@SuppressWarnings("HidingField")
76+
private final JoinInfo joinInfo;
77+
7478
protected EnumerableMergeJoin(
7579
RelOptCluster cluster,
7680
RelTraitSet traits,
@@ -80,6 +84,12 @@ protected EnumerableMergeJoin(
8084
Set<CorrelationId> variablesSet,
8185
JoinRelType joinType) {
8286
super(cluster, traits, ImmutableList.of(), left, right, condition, variablesSet, joinType);
87+
// TODO: support IS NOT DISTINCT FROM condition as join keys of MergeJoin
88+
// EnumerableMergeJoin cannot use IS NOT DISTINCT FROM condition as join keys
89+
// (In the algorithm of MergeJoin in Enumerable convention, it will stop
90+
// when leftKey or rightKey is NULL), so we create a new JoinInfo that only
91+
// considers EQUALS.
92+
this.joinInfo = JoinInfo.createWithStrictEquality(left, right, condition);
8393
assert getConvention() instanceof EnumerableConvention;
8494
final List<RelCollation> leftCollations = getCollations(left.getTraitSet());
8595
final List<RelCollation> rightCollations = getCollations(right.getTraitSet());

core/src/main/java/org/apache/calcite/adapter/enumerable/EnumerableMergeJoinRule.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,10 @@ protected EnumerableMergeJoinRule(Config config) {
6060

6161
@Override public @Nullable RelNode convert(RelNode rel) {
6262
Join join = (Join) rel;
63-
final JoinInfo info = join.analyzeCondition();
63+
// EnumerableMergeJoin cannot use IS NOT DISTINCT FROM condition as join keys. More details
64+
// in EnumerableMergeJoin.java.
65+
final JoinInfo info =
66+
JoinInfo.createWithStrictEquality(join.getLeft(), join.getRight(), join.getCondition());
6467
if (!EnumerableMergeJoin.isMergeJoinSupported(join.getJoinType())) {
6568
// EnumerableMergeJoin only supports certain join types.
6669
return null;

core/src/main/java/org/apache/calcite/adapter/enumerable/PhysType.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,15 @@ Expression fieldReference(Expression expression, int field,
134134
*/
135135
Expression generateAccessorWithoutNulls(List<Integer> fields);
136136

137+
/**
138+
* Similar to {@link #generateAccessor(List)} and {@link #generateAccessorWithoutNulls(List)},
139+
* but it's null-aware. It returns a Expression which evaluates to null (if one of
140+
* field is null and it isn't null-safe) or a list of
141+
* fields that may contain null (no field is null, or there are fields with null but they are
142+
* null-safe) at runtime.
143+
*/
144+
Expression generateNullAwareAccessor(List<Integer> fields, List<Boolean> nullExclusionFlags);
145+
137146
/** Generates a selector for the given fields from an expression, with the
138147
* default row format. */
139148
Expression generateSelector(

core/src/main/java/org/apache/calcite/adapter/enumerable/PhysTypeImpl.java

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -642,6 +642,21 @@ private List<Expression> fieldReferences(
642642
}
643643
}
644644

645+
private static Expression getListExpressionAllowSingleElement(
646+
Expressions.FluentList<Expression> list) {
647+
assert list.size() > 0;
648+
649+
if (list.size() == 1) {
650+
return Expressions.call(
651+
List.class,
652+
null,
653+
BuiltInMethod.LIST1.method,
654+
list);
655+
} else {
656+
return getListExpression(list);
657+
}
658+
}
659+
645660
private static Expression getListExpression(Expressions.FluentList<Expression> list) {
646661
assert list.size() >= 2;
647662

@@ -713,6 +728,41 @@ private static Expression getListExpression(Expressions.FluentList<Expression> l
713728
return Expressions.lambda(Function1.class, exp, v1);
714729
}
715730

731+
@Override public Expression generateNullAwareAccessor(
732+
List<Integer> fields,
733+
List<Boolean> nullExclusionFlags) {
734+
assert fields.size() == nullExclusionFlags.size();
735+
ParameterExpression v1 = Expressions.parameter(javaRowClass, "v1");
736+
if (fields.isEmpty()) {
737+
return Expressions.lambda(
738+
Function1.class,
739+
Expressions.field(
740+
null,
741+
BuiltInMethod.COMPARABLE_EMPTY_LIST.field),
742+
v1);
743+
}
744+
Expressions.FluentList<Expression> list = Expressions.list();
745+
for (int field : fields) {
746+
list.add(fieldReference(v1, field));
747+
}
748+
749+
// in the HashJoin key selector scenario, when there is exactly one join key and it is
750+
// null-safe, a row whose join key is null must still be correctly recognized and extracted.
751+
// Therefore, when list.size() == 1, this method returns a list containing a single
752+
// element (which may be null) rather than returning the element directly.
753+
Expression exp = getListExpressionAllowSingleElement(list);
754+
for (int i = list.size() - 1; i >= 0; i--) {
755+
if (nullExclusionFlags.get(i)) {
756+
exp =
757+
Expressions.condition(
758+
Expressions.equal(list.get(i), Expressions.constant(null)),
759+
Expressions.constant(null),
760+
exp);
761+
}
762+
}
763+
return Expressions.lambda(Function1.class, exp, v1);
764+
}
765+
716766
@Override public Expression fieldReference(
717767
Expression expression, int field) {
718768
return fieldReference(expression, field, null);

core/src/main/java/org/apache/calcite/plan/RelOptUtil.java

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1470,11 +1470,24 @@ private static void splitJoinCondition(
14701470
nonEquiList.add(condition);
14711471
}
14721472

1473-
/** Builds an equi-join condition from a set of left and right keys. */
1473+
/** Builds an equi-join condition by conjoining EQUALS operator for each corresponding pair of
1474+
* leftKeys and rightKeys. */
14741475
public static RexNode createEquiJoinCondition(
14751476
final RelNode left, final List<Integer> leftKeys,
14761477
final RelNode right, final List<Integer> rightKeys,
14771478
final RexBuilder rexBuilder) {
1479+
List<Boolean> filterNulls = Collections.nCopies(leftKeys.size(), Boolean.TRUE);
1480+
return createHashJoinCondition(left, leftKeys, right, rightKeys,
1481+
filterNulls, rexBuilder);
1482+
}
1483+
1484+
/** Builds an equi-join condition by conjoining operators for each corresponding pair of
1485+
* leftKeys and rightKeys. The operator is EQUALS if filterNulls is true for that
1486+
* position, otherwise IS NOT DISTINCT FROM. */
1487+
public static RexNode createHashJoinCondition(
1488+
final RelNode left, final List<Integer> leftKeys,
1489+
final RelNode right, final List<Integer> rightKeys,
1490+
final List<Boolean> filterNulls, final RexBuilder rexBuilder) {
14781491
final List<RelDataType> leftTypes =
14791492
RelOptUtil.getFieldTypeList(left.getRowType());
14801493
final List<RelDataType> rightTypes =
@@ -1484,7 +1497,11 @@ public static RexNode createEquiJoinCondition(
14841497
@Override public RexNode get(int index) {
14851498
final int leftKey = leftKeys.get(index);
14861499
final int rightKey = rightKeys.get(index);
1487-
return rexBuilder.makeCall(SqlStdOperatorTable.EQUALS,
1500+
final SqlOperator operator =
1501+
filterNulls.get(index)
1502+
? SqlStdOperatorTable.EQUALS
1503+
: SqlStdOperatorTable.IS_NOT_DISTINCT_FROM;
1504+
return rexBuilder.makeCall(operator,
14881505
rexBuilder.makeInputRef(leftTypes.get(leftKey), leftKey),
14891506
rexBuilder.makeInputRef(rightTypes.get(rightKey),
14901507
leftTypes.size() + rightKey));

core/src/main/java/org/apache/calcite/rel/core/Join.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ protected Join(
104104
this.condition = requireNonNull(condition, "condition");
105105
this.variablesSet = ImmutableSet.copyOf(variablesSet);
106106
this.joinType = requireNonNull(joinType, "joinType");
107-
this.joinInfo = JoinInfo.createWithStrictEquality(left, right, condition);
107+
this.joinInfo = JoinInfo.of(left, right, condition);
108108
this.hints = ImmutableList.copyOf(hints);
109109
}
110110

core/src/main/java/org/apache/calcite/rel/core/JoinInfo.java

Lines changed: 25 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -29,45 +29,55 @@
2929
import com.google.common.collect.ImmutableList;
3030

3131
import java.util.ArrayList;
32+
import java.util.Collections;
3233
import java.util.List;
3334

3435
import static java.util.Objects.requireNonNull;
3536

3637
/** An analyzed join condition.
3738
*
3839
* <p>It is useful for the many algorithms that care whether a join has an
39-
* equi-join condition.
40+
* equi-join condition (contains EQUALS and IS NOT DISTINCT FROM).
4041
*
41-
* <p>You can create one using {@link #createWithStrictEquality}, or call
42+
* <p>You can create one using {@link #of(RelNode, RelNode, RexNode)},
43+
* {@link #createWithStrictEquality}, or call
4244
* {@link Join#analyzeCondition()}; many kinds of join cache their
4345
* join info, especially those that are equi-joins.
4446
*
4547
* @see Join#analyzeCondition() */
4648
public class JoinInfo {
4749
public final ImmutableIntList leftKeys;
4850
public final ImmutableIntList rightKeys;
51+
// for each join key, whether it filters out nulls. If TRUE, the join key uses EQUALS semantics
52+
// (not null-safe); if FALSE, it uses IS NOT DISTINCT FROM semantics (null-safe).
53+
public final ImmutableList<Boolean> nullExclusionFlags;
54+
// non-equi parts of join condition.
55+
// after CALCITE-7327, IS NOT DISTINCT FROM can be treated as a hash join key and is no longer
56+
// part of nonEquiConditions.
4957
public final ImmutableList<RexNode> nonEquiConditions;
5058

5159
/** Creates a JoinInfo. */
5260
protected JoinInfo(ImmutableIntList leftKeys, ImmutableIntList rightKeys,
53-
ImmutableList<RexNode> nonEquiConditions) {
61+
ImmutableList<Boolean> nullExclusionFlags, ImmutableList<RexNode> nonEquiConditions) {
5462
this.leftKeys = requireNonNull(leftKeys, "leftKeys");
5563
this.rightKeys = requireNonNull(rightKeys, "rightKeys");
64+
this.nullExclusionFlags = requireNonNull(nullExclusionFlags, "nullExclusionFlags");
5665
this.nonEquiConditions =
5766
requireNonNull(nonEquiConditions, "nonEquiConditions");
58-
assert leftKeys.size() == rightKeys.size();
67+
assert leftKeys.size() == rightKeys.size() && leftKeys.size() == nullExclusionFlags.size();
5968
}
6069

6170
/** Creates a {@code JoinInfo} by analyzing a condition. */
6271
public static JoinInfo of(RelNode left, RelNode right, RexNode condition) {
6372
final List<Integer> leftKeys = new ArrayList<>();
6473
final List<Integer> rightKeys = new ArrayList<>();
65-
final List<Boolean> filterNulls = new ArrayList<>();
74+
final List<Boolean> nullExclusionFlags = new ArrayList<>();
6675
final List<RexNode> nonEquiList = new ArrayList<>();
6776
RelOptUtil.splitJoinCondition(left, right, condition, leftKeys, rightKeys,
68-
filterNulls, nonEquiList);
77+
nullExclusionFlags, nonEquiList);
6978
return new JoinInfo(ImmutableIntList.copyOf(leftKeys),
70-
ImmutableIntList.copyOf(rightKeys), ImmutableList.copyOf(nonEquiList));
79+
ImmutableIntList.copyOf(rightKeys), ImmutableList.copyOf(nullExclusionFlags),
80+
ImmutableList.copyOf(nonEquiList));
7181
}
7282

7383
/** Creates a {@code JoinInfo} by analyzing a condition.
@@ -82,14 +92,18 @@ public static JoinInfo createWithStrictEquality(RelNode left,
8292
final List<RexNode> nonEquiList = new ArrayList<>();
8393
RelOptUtil.splitJoinCondition(left, right, condition, leftKeys, rightKeys,
8494
null, nonEquiList);
95+
List<Boolean> nullExclusionFlags = Collections.nCopies(leftKeys.size(), Boolean.TRUE);
8596
return new JoinInfo(ImmutableIntList.copyOf(leftKeys),
86-
ImmutableIntList.copyOf(rightKeys), ImmutableList.copyOf(nonEquiList));
97+
ImmutableIntList.copyOf(rightKeys), ImmutableList.copyOf(nullExclusionFlags),
98+
ImmutableList.copyOf(nonEquiList));
8799
}
88100

89-
/** Creates an equi-join. */
101+
/** Creates an equi-join (only considers EQUALS operations). */
90102
public static JoinInfo of(ImmutableIntList leftKeys,
91103
ImmutableIntList rightKeys) {
92-
return new JoinInfo(leftKeys, rightKeys, ImmutableList.of());
104+
List<Boolean> nullExclusionFlags = Collections.nCopies(leftKeys.size(), Boolean.TRUE);
105+
return new JoinInfo(leftKeys, rightKeys,
106+
ImmutableList.copyOf(nullExclusionFlags), ImmutableList.of());
93107
}
94108

95109
/** Returns whether this is an equi-join. */
@@ -117,7 +131,7 @@ public RexNode getRemaining(RexBuilder rexBuilder) {
117131

118132
public RexNode getEquiCondition(RelNode left, RelNode right,
119133
RexBuilder rexBuilder) {
120-
return RelOptUtil.createEquiJoinCondition(left, leftKeys, right, rightKeys,
134+
return RelOptUtil.createHashJoinCondition(left, leftKeys, right, rightKeys, nullExclusionFlags,
121135
rexBuilder);
122136
}
123137

core/src/main/java/org/apache/calcite/rel/rules/LoptOptimizeJoinRule.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2053,7 +2053,7 @@ public static boolean isRemovableSelfJoin(Join joinRel) {
20532053
*/
20542054
private static boolean areSelfJoinKeysUnique(RelMetadataQuery mq,
20552055
RelNode leftRel, RelNode rightRel, RexNode joinFilters) {
2056-
final JoinInfo joinInfo = JoinInfo.createWithStrictEquality(leftRel, rightRel, joinFilters);
2056+
final JoinInfo joinInfo = JoinInfo.of(leftRel, rightRel, joinFilters);
20572057

20582058
// Make sure each key on the left maps to the same simple column as the
20592059
// corresponding key on the right

core/src/main/java/org/apache/calcite/rel/rules/LoptSemiJoinOptimizer.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -264,7 +264,7 @@ private static int isSuitableFilter(
264264

265265
RelNode factRel = multiJoin.getJoinFactor(factIdx);
266266
RelNode dimRel = multiJoin.getJoinFactor(dimIdx);
267-
final JoinInfo joinInfo = JoinInfo.createWithStrictEquality(factRel, dimRel, semiJoinCondition);
267+
final JoinInfo joinInfo = JoinInfo.of(factRel, dimRel, semiJoinCondition);
268268
assert !joinInfo.leftKeys.isEmpty();
269269

270270
// mutable copies

0 commit comments

Comments
 (0)