Skip to content

Commit ad1ad7f

Browse files
authored
[performance](agg) support count push agg in no null column (#58103)
### What problem does this PR solve? before : ``` select count(1), max(l_orderkey), min(l_quantity) from lineitem_bak; +-----------+-----------------+-----------------+ | count(1) | max(l_orderkey) | min(l_quantity) | +-----------+-----------------+-----------------+ | 600037902 | 600000000 | 1.00 | +-----------+-----------------+-----------------+ 1 row in set (1.89 sec) ``` after: ``` select count(1), max(l_orderkey), min(l_quantity) from lineitem_bak; +-----------+-----------------+-----------------+ | count(1) | max(l_orderkey) | min(l_quantity) | +-----------+-----------------+-----------------+ | 600037902 | 600000000 | 1.00 | +-----------+-----------------+-----------------+ 1 row in set (0.83 sec) ```
1 parent 2c94ebd commit ad1ad7f

File tree

2 files changed

+90
-13
lines changed

2 files changed

+90
-13
lines changed

fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/AggregateStrategies.java

Lines changed: 40 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@
6161

6262
import com.google.common.collect.ImmutableList;
6363

64+
import java.util.HashSet;
6465
import java.util.List;
6566
import java.util.Map;
6667
import java.util.Optional;
@@ -558,28 +559,54 @@ private LogicalAggregate<? extends Plan> storageLayerAggregate(
558559
}
559560

560561
Set<AggregateFunction> aggregateFunctions = aggregate.getAggregateFunctions();
561-
Set<Class<? extends AggregateFunction>> functionClasses = aggregateFunctions
562-
.stream()
563-
.map(AggregateFunction::getClass)
564-
.collect(Collectors.toSet());
565-
562+
// Use for loop to replace Stream API
563+
Set<Class<? extends AggregateFunction>> functionClasses = new HashSet<>();
566564
Map<Class<? extends AggregateFunction>, PushDownAggOp> supportedAgg = PushDownAggOp.supportedFunctions();
567-
if (!supportedAgg.keySet().containsAll(functionClasses)) {
568-
return canNotPush;
565+
566+
boolean containsCount = false;
567+
Set<SlotReference> checkNullSlots = new HashSet<>();
568+
569+
// Single loop through aggregateFunctions to handle multiple logic
570+
for (AggregateFunction function : aggregateFunctions) {
571+
Class<? extends AggregateFunction> functionClass = function.getClass();
572+
functionClasses.add(functionClass);
573+
// Check if any function has arity > 1
574+
if (function.arity() > 1) {
575+
return canNotPush;
576+
}
577+
578+
// Check if contains Count function
579+
if (functionClass.equals(Count.class)) {
580+
containsCount = true;
581+
if (!function.getArguments().isEmpty()) {
582+
Expression arg0 = function.getArguments().get(0);
583+
if (arg0 instanceof SlotReference) {
584+
checkNullSlots.add((SlotReference) arg0);
585+
} else if (arg0 instanceof Cast) {
586+
Expression child0 = arg0.child(0);
587+
if (child0 instanceof SlotReference) {
588+
checkNullSlots.add((SlotReference) child0);
589+
}
590+
}
591+
}
592+
}
593+
594+
// Check if function is supported by supportedAgg
595+
if (!supportedAgg.containsKey(functionClass)) {
596+
return canNotPush;
597+
}
569598
}
599+
570600
if (logicalScan instanceof LogicalOlapScan) {
571601
LogicalOlapScan logicalOlapScan = (LogicalOlapScan) logicalScan;
572602
KeysType keysType = logicalOlapScan.getTable().getKeysType();
573-
if (functionClasses.contains(Count.class) && keysType != KeysType.DUP_KEYS) {
603+
if (containsCount && keysType != KeysType.DUP_KEYS) {
574604
return canNotPush;
575605
}
576-
if (functionClasses.contains(Count.class) && logicalOlapScan.isDirectMvScan()) {
606+
if (containsCount && logicalOlapScan.isDirectMvScan()) {
577607
return canNotPush;
578608
}
579609
}
580-
if (aggregateFunctions.stream().anyMatch(fun -> fun.arity() > 1)) {
581-
return canNotPush;
582-
}
583610

584611
// TODO: refactor this to process slot reference or expression together
585612
boolean onlyContainsSlotOrNumericCastSlot = aggregateFunctions.stream()
@@ -665,7 +692,7 @@ private LogicalAggregate<? extends Plan> storageLayerAggregate(
665692
// NULL value behavior in `count` function is zero, so
666693
// we should not use row_count to speed up query. the col
667694
// must be not null
668-
if (column.isAllowNull()) {
695+
if (column.isAllowNull() && checkNullSlots.contains(slot)) {
669696
return canNotPush;
670697
}
671698
}

regression-test/suites/nereids_p0/explain/test_pushdown_explain.groovy

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,56 @@ suite("test_pushdown_explain") {
7474
contains "pushAggOp=NONE"
7575
}
7676

77+
// Test cases for NULL column handling in count pushdown optimization
78+
sql "DROP TABLE IF EXISTS test_null_columns"
79+
sql """ CREATE TABLE `test_null_columns` (
80+
`id` INT NOT NULL COMMENT 'ID',
81+
`nullable_col` VARCHAR(11) NULL COMMENT 'Nullable column',
82+
`non_nullable_col` VARCHAR(11) NOT NULL COMMENT 'Non-nullable column'
83+
) ENGINE=OLAP
84+
DUPLICATE KEY(`id`)
85+
DISTRIBUTED BY HASH(`id`) BUCKETS 48
86+
PROPERTIES (
87+
"replication_allocation" = "tag.location.default: 1",
88+
"min_load_replica_num" = "-1",
89+
"is_being_synced" = "false",
90+
"colocate_with" = "groupa1",
91+
"storage_format" = "V2",
92+
"light_schema_change" = "true",
93+
"disable_auto_compaction" = "false",
94+
"enable_single_replica_compaction" = "false"
95+
); """
96+
sql """ insert into test_null_columns values(1, NULL, "value1"); """
97+
sql """ insert into test_null_columns values(2, NULL, "value2"); """
98+
sql """ insert into test_null_columns values(3, "not_null", "value3"); """
99+
100+
// Test count(1) and count(*) with NULL columns - should push Count optimization
101+
explain {
102+
sql("select count(1) from test_null_columns;")
103+
contains "pushAggOp=COUNT"
104+
}
105+
explain {
106+
sql("select count(*) from test_null_columns;")
107+
contains "pushAggOp=COUNT"
108+
}
109+
110+
explain {
111+
sql("select count(non_nullable_col), min(non_nullable_col), max(non_nullable_col) from test_null_columns;")
112+
contains "pushAggOp=MIX"
113+
}
114+
explain {
115+
sql("select count(), min(non_nullable_col), max(non_nullable_col) from test_null_columns;")
116+
contains "pushAggOp=MIX"
117+
}
118+
explain {
119+
sql("select count(*), min(non_nullable_col), max(non_nullable_col) from test_null_columns;")
120+
contains "pushAggOp=MIX"
121+
}
122+
explain {
123+
sql("select count(nullable_col), min(nullable_col), max(nullable_col) from test_null_columns;")
124+
contains "pushAggOp=NONE"
125+
}
126+
77127
sql "DROP TABLE IF EXISTS table_unique0"
78128
sql """
79129
CREATE TABLE `table_unique0` (

0 commit comments

Comments
 (0)