Skip to content

Commit f626831

Browse files
authored
Merge branch 'master' into see-master
2 parents ff95c0c + 7e8701b commit f626831

File tree

1 file changed

+13
-25
lines changed

1 file changed

+13
-25
lines changed

fe/fe-core/src/main/java/org/apache/doris/nereids/stats/JoinEstimation.java

Lines changed: 13 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@
2626
import org.apache.doris.nereids.trees.expressions.literal.Literal;
2727
import org.apache.doris.nereids.trees.plans.JoinType;
2828
import org.apache.doris.nereids.trees.plans.algebra.Join;
29-
import org.apache.doris.nereids.trees.plans.logical.LogicalJoin;
3029
import org.apache.doris.nereids.util.ExpressionUtils;
3130
import org.apache.doris.statistics.ColumnStatistic;
3231
import org.apache.doris.statistics.ColumnStatisticBuilder;
@@ -52,6 +51,7 @@ public class JoinEstimation {
5251
private static double TRUSTABLE_CONDITION_SELECTIVITY_POW_FACTOR = 2.0;
5352
private static double UNTRUSTABLE_CONDITION_SELECTIVITY_LINEAR_FACTOR = 0.9;
5453
private static double TRUSTABLE_UNIQ_THRESHOLD = 0.9;
54+
private static double OUTER_JOIN_NULL_SUPPLELMENT_RATIO = 0.1;
5555

5656
private static EqualPredicate normalizeEqualPredJoinCondition(EqualPredicate equal, Statistics rightStats) {
5757
boolean changeOrder = equal.left().getInputSlots().stream()
@@ -363,16 +363,17 @@ private static Statistics estimateSemiOrAnti(Statistics leftStats, Statistics ri
363363
* outer join generates nulls.
364364
* for example, T1 left outer join T2,
365365
* in join results, columns from T2 contain nulls.
366-
* we estimate the numNulls as inner_join_rows - semi_join_rows
366+
* we estimate the numNulls as max(T1.row - inner_join_rows, T1.row * 0.1)
367367
*/
368-
private static void updateNumNullsForOuterJoin(Statistics crossJoinStats, Statistics targetSide,
369-
double supplementNulls) {
370-
for (Map.Entry<Expression, ColumnStatistic> entry : targetSide.columnStatistics().entrySet()) {
371-
double numNulls = supplementNulls;
368+
private static void updateNumNullsForOuterJoin(Statistics crossJoinStats, Statistics innerJoinStats,
369+
Statistics probeStats, Statistics buildStats, double estJoinRowCount) {
370+
for (Map.Entry<Expression, ColumnStatistic> entry : buildStats.columnStatistics().entrySet()) {
371+
double numNulls = Math.max(probeStats.getRowCount() - innerJoinStats.getRowCount(),
372+
probeStats.getRowCount() * OUTER_JOIN_NULL_SUPPLELMENT_RATIO);
372373
if (!entry.getValue().isUnKnown()) {
373374
if (entry.getValue().numNulls > 0) {
374-
numNulls = Math.max(1, supplementNulls);
375-
numNulls = Math.min(entry.getValue().numNulls, numNulls);
375+
numNulls += entry.getValue().numNulls / buildStats.getRowCount() * estJoinRowCount;
376+
numNulls = Math.max(1, numNulls);
376377
}
377378
ColumnStatistic colStats = new ColumnStatisticBuilder(entry.getValue())
378379
.setNumNulls(numNulls)
@@ -402,32 +403,19 @@ public static Statistics estimate(Statistics leftStats, Statistics rightStats, J
402403
return innerJoinStats;
403404
} else if (joinType == JoinType.LEFT_OUTER_JOIN) {
404405
double rowCount = Math.max(leftStats.getRowCount(), innerJoinStats.getRowCount());
405-
LogicalJoin leftSemi = ((LogicalJoin) join).withJoinType(JoinType.LEFT_SEMI_JOIN);
406-
Statistics semiStats = estimateSemiOrAnti(leftStats, rightStats, innerJoinStats, leftSemi);
407-
double supplementNull = Math.max(1, leftStats.getRowCount() - semiStats.getRowCount());
408-
updateNumNullsForOuterJoin(crossJoinStats, rightStats, supplementNull);
406+
updateNumNullsForOuterJoin(crossJoinStats, innerJoinStats, leftStats, rightStats, rowCount);
409407
updateJoinConditionColumnStatistics(crossJoinStats, join);
410408
return crossJoinStats.withRowCountAndEnforceValid(rowCount);
411409
} else if (joinType == JoinType.RIGHT_OUTER_JOIN) {
412410
double rowCount = Math.max(rightStats.getRowCount(), innerJoinStats.getRowCount());
413-
LogicalJoin rightSemi = ((LogicalJoin) join).withJoinType(JoinType.RIGHT_SEMI_JOIN);
414-
Statistics semiStats = estimateSemiOrAnti(leftStats, rightStats, innerJoinStats, rightSemi);
415-
double supplementNull = Math.max(1, rightStats.getRowCount() - semiStats.getRowCount());
416-
updateNumNullsForOuterJoin(crossJoinStats, leftStats, supplementNull);
411+
updateNumNullsForOuterJoin(crossJoinStats, innerJoinStats, rightStats, leftStats, rowCount);
417412
updateJoinConditionColumnStatistics(crossJoinStats, join);
418413
return crossJoinStats.withRowCountAndEnforceValid(rowCount);
419414
} else if (joinType == JoinType.FULL_OUTER_JOIN) {
420415
double rowCount = Math.max(leftStats.getRowCount(), innerJoinStats.getRowCount());
421416
rowCount = Math.max(rightStats.getRowCount(), rowCount);
422-
LogicalJoin leftSemiJoin = ((LogicalJoin) join).withJoinType(JoinType.LEFT_SEMI_JOIN);
423-
Statistics leftSemiStats = estimateSemiOrAnti(leftStats, rightStats, innerJoinStats, leftSemiJoin);
424-
double supplementNullRight = Math.max(1, leftStats.getRowCount() - leftSemiStats.getRowCount());
425-
updateNumNullsForOuterJoin(crossJoinStats, rightStats, supplementNullRight);
426-
427-
LogicalJoin rightSemiJoin = ((LogicalJoin) join).withJoinType(JoinType.RIGHT_SEMI_JOIN);
428-
Statistics rightSemiStats = estimateSemiOrAnti(leftStats, leftStats, innerJoinStats, rightSemiJoin);
429-
double supplementNullLeft = Math.max(1, rightStats.getRowCount() - rightSemiStats.getRowCount());
430-
updateNumNullsForOuterJoin(crossJoinStats, leftStats, supplementNullLeft);
417+
updateNumNullsForOuterJoin(crossJoinStats, innerJoinStats, leftStats, rightStats, rowCount);
418+
updateNumNullsForOuterJoin(crossJoinStats, innerJoinStats, rightStats, leftStats, rowCount);
431419
updateJoinConditionColumnStatistics(crossJoinStats, join);
432420
return crossJoinStats.withRowCountAndEnforceValid(rowCount);
433421
} else if (joinType == JoinType.CROSS_JOIN) {

0 commit comments

Comments
 (0)