[fix](count) fix wrong count push down logic (apache#56182)

morningman · web-flow · commit 643caa7f5c94 · 2025-09-20T19:06:16.000-07:00
### What problem does this PR solve?

Introduced from topn optimization.

When executing query like `select count(*) from tbl`, it will trigger
"count push down optimization".
which means it will send some "dummy" split to BE, each with a part of
row count number.
But due to the bug, BE will use the range offset info in these dummy
split to do the row group filter logic,
which is incorrect and will result in empty result because all row group
will be filtered.

This PR fix it, to not filter the row group if it is a dummy split.

How to reproduce:

1. find an iceberg table with file size at least 16MB
2. set file_split_size=4MB
3. select count(*) from table, it will return empty result
diff --git a/be/src/vec/exec/scan/file_scanner.cpp b/be/src/vec/exec/scan/file_scanner.cpp
@@ -1154,7 +1154,17 @@ Status FileScanner::_get_next_reader() {
         }
 
         _cur_reader->set_push_down_agg_type(_get_push_down_agg_type());
-        RETURN_IF_ERROR(_set_fill_or_truncate_columns(need_to_get_parsed_schema));
+        if (_get_push_down_agg_type() == TPushAggOp::type::COUNT &&
+            range.__isset.table_format_params &&
+            range.table_format_params.table_level_row_count >= 0) {
+            // This is a table level count push down operation, no need to call
+            // _set_fill_or_truncate_columns.
+            // in _set_fill_or_truncate_columns, we will use [range.start_offset, end offset]
+            // to filter the row group. But if this is count push down, the offset is undefined,
+            // causing incorrect row group filter and may return empty result.
+        } else {
+            RETURN_IF_ERROR(_set_fill_or_truncate_columns(need_to_get_parsed_schema));
+        }
         _cur_reader_eof = false;
         break;
     }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java
@@ -467,10 +467,12 @@ protected void setScanParams(TFileRangeDesc rangeDesc, Split split) {
                 }
                 transactionalHiveDesc.setDeleteDeltas(deleteDeltaDescs);
                 tableFormatFileDesc.setTransactionalHiveParams(transactionalHiveDesc);
+                tableFormatFileDesc.setTableLevelRowCount(-1);
                 rangeDesc.setTableFormatParams(tableFormatFileDesc);
             } else {
                 TTableFormatFileDesc tableFormatFileDesc = new TTableFormatFileDesc();
                 tableFormatFileDesc.setTableFormatType(TableFormatType.HIVE.value());
+                tableFormatFileDesc.setTableLevelRowCount(-1);
                 rangeDesc.setTableFormatParams(tableFormatFileDesc);
             }
         }
@@ -593,3 +595,4 @@ protected TFileCompressType getFileCompressType(FileSplit fileSplit) throws User
     }
 }
 
+
diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/source/IcebergScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/source/IcebergScanNode.java
@@ -181,6 +181,9 @@ private void setIcebergParams(TFileRangeDesc rangeDesc, IcebergSplit icebergSpli
         tableFormatFileDesc.setTableFormatType(icebergSplit.getTableFormatType().value());
         if (tableLevelPushDownCount) {
             tableFormatFileDesc.setTableLevelRowCount(icebergSplit.getTableLevelRowCount());
+        } else {
+            // MUST explicitly set to -1, to be distinct from valid row count >= 0
+            tableFormatFileDesc.setTableLevelRowCount(-1);
         }
         TIcebergFileDesc fileDesc = new TIcebergFileDesc();
         fileDesc.setFormatVersion(formatVersion);
@@ -621,3 +624,4 @@ private Optional<NotSupportedException> checkNotSupportedException(Exception e)
         return Optional.empty();
     }
 }
+
diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/source/PaimonScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/source/PaimonScanNode.java
@@ -243,6 +243,9 @@ private void setPaimonParams(TFileRangeDesc rangeDesc, PaimonSplit paimonSplit)
         }
         if (paimonSplit.getRowCount().isPresent()) {
             tableFormatFileDesc.setTableLevelRowCount(paimonSplit.getRowCount().get());
+        } else {
+            // MUST explicitly set to -1, to be distinct from valid row count >= 0
+            tableFormatFileDesc.setTableLevelRowCount(-1);
         }
         tableFormatFileDesc.setPaimonParams(fileDesc);
         Map<String, String> partitionValues = paimonSplit.getPaimonPartitionValues();
@@ -714,3 +717,4 @@ private Table getProcessedTable() throws UserException {
         return baseTable;
     }
 }
+
diff --git a/gensrc/thrift/PlanNodes.thrift b/gensrc/thrift/PlanNodes.thrift
@@ -398,7 +398,7 @@ struct TTableFormatFileDesc {
     6: optional TMaxComputeFileDesc max_compute_params
     7: optional TTrinoConnectorFileDesc trino_connector_params
     8: optional TLakeSoulFileDesc lakesoul_params
-    9: optional i64 table_level_row_count
+    9: optional i64 table_level_row_count = -1
 }
 
 // Deprecated, hive text talbe is a special format, not a serde type
diff --git a/regression-test/data/external_table_p0/iceberg/test_iceberg_optimize_count.out b/regression-test/data/external_table_p0/iceberg/test_iceberg_optimize_count.out
@@ -11,6 +11,18 @@
 -- !q04 --
 1000
 
+-- !q01 --
+1000
+
+-- !q02 --
+1000
+
+-- !q03 --
+1000
+
+-- !q04 --
+1000
+
 -- !q05 --
 1000
 
diff --git a/regression-test/suites/external_table_p0/iceberg/test_iceberg_optimize_count.groovy b/regression-test/suites/external_table_p0/iceberg/test_iceberg_optimize_count.groovy
@@ -50,10 +50,14 @@ suite("test_iceberg_optimize_count", "p0,external,doris,external_docker,external
         // use push down count
         sql """ set enable_count_push_down_for_external_table=true; """
 
-        qt_q01 """${sqlstr1}""" 
-        qt_q02 """${sqlstr2}""" 
-        qt_q03 """${sqlstr3}""" 
-        qt_q04 """${sqlstr4}""" 
+        for (String val: ["1K", "0"]) {
+            sql "set file_split_size=${val}"
+            qt_q01 """${sqlstr1}""" 
+            qt_q02 """${sqlstr2}""" 
+            qt_q03 """${sqlstr3}""" 
+            qt_q04 """${sqlstr4}""" 
+        }
+        sql "unset variable file_split_size;"
 
         // traditional mode
         sql """set num_files_in_batch_mode=100000"""

Original file line number	Diff line number	Diff line change
`@@ -467,10 +467,12 @@ protected void setScanParams(TFileRangeDesc rangeDesc, Split split) {`
`467`	`467`	`}`
`468`	`468`	`transactionalHiveDesc.setDeleteDeltas(deleteDeltaDescs);`
`469`	`469`	`tableFormatFileDesc.setTransactionalHiveParams(transactionalHiveDesc);`
	`470`	`+ tableFormatFileDesc.setTableLevelRowCount(-1);`
`470`	`471`	`rangeDesc.setTableFormatParams(tableFormatFileDesc);`
`471`	`472`	`} else {`
`472`	`473`	`TTableFormatFileDesc tableFormatFileDesc = new TTableFormatFileDesc();`
`473`	`474`	`tableFormatFileDesc.setTableFormatType(TableFormatType.HIVE.value());`
	`475`	`+ tableFormatFileDesc.setTableLevelRowCount(-1);`
`474`	`476`	`rangeDesc.setTableFormatParams(tableFormatFileDesc);`
`475`	`477`	`}`
`476`	`478`	`}`
`@@ -593,3 +595,4 @@ protected TFileCompressType getFileCompressType(FileSplit fileSplit) throws User`
`593`	`595`	`}`
`594`	`596`	`}`
`595`	`597`
	`598`	`+`
Original file line number	Diff line number	Diff line change
`@@ -181,6 +181,9 @@ private void setIcebergParams(TFileRangeDesc rangeDesc, IcebergSplit icebergSpli`
`181`	`181`	`tableFormatFileDesc.setTableFormatType(icebergSplit.getTableFormatType().value());`
`182`	`182`	`if (tableLevelPushDownCount) {`
`183`	`183`	`tableFormatFileDesc.setTableLevelRowCount(icebergSplit.getTableLevelRowCount());`
	`184`	`+ } else {`
	`185`	`+ // MUST explicitly set to -1, to be distinct from valid row count >= 0`
	`186`	`+ tableFormatFileDesc.setTableLevelRowCount(-1);`
`184`	`187`	`}`
`185`	`188`	`TIcebergFileDesc fileDesc = new TIcebergFileDesc();`
`186`	`189`	`fileDesc.setFormatVersion(formatVersion);`
`@@ -621,3 +624,4 @@ private Optional<NotSupportedException> checkNotSupportedException(Exception e)`
`621`	`624`	`return Optional.empty();`
`622`	`625`	`}`
`623`	`626`	`}`
	`627`	`+`
Original file line number	Diff line number	Diff line change
`@@ -243,6 +243,9 @@ private void setPaimonParams(TFileRangeDesc rangeDesc, PaimonSplit paimonSplit)`
`243`	`243`	`}`
`244`	`244`	`if (paimonSplit.getRowCount().isPresent()) {`
`245`	`245`	`tableFormatFileDesc.setTableLevelRowCount(paimonSplit.getRowCount().get());`
	`246`	`+ } else {`
	`247`	`+ // MUST explicitly set to -1, to be distinct from valid row count >= 0`
	`248`	`+ tableFormatFileDesc.setTableLevelRowCount(-1);`
`246`	`249`	`}`
`247`	`250`	`tableFormatFileDesc.setPaimonParams(fileDesc);`
`248`	`251`	`Map<String, String> partitionValues = paimonSplit.getPaimonPartitionValues();`
`@@ -714,3 +717,4 @@ private Table getProcessedTable() throws UserException {`
`714`	`717`	`return baseTable;`
`715`	`718`	`}`
`716`	`719`	`}`
	`720`	`+`
Original file line number	Diff line number	Diff line change
`@@ -398,7 +398,7 @@ struct TTableFormatFileDesc {`
`398`	`398`	`6: optional TMaxComputeFileDesc max_compute_params`
`399`	`399`	`7: optional TTrinoConnectorFileDesc trino_connector_params`
`400`	`400`	`8: optional TLakeSoulFileDesc lakesoul_params`
`401`		`- 9: optional i64 table_level_row_count`
	`401`	`+ 9: optional i64 table_level_row_count = -1`
`402`	`402`	`}`
`403`	`403`
`404`	`404`	`// Deprecated, hive text talbe is a special format, not a serde type`
-Original file line number
+Diff line change
 -- !q04 --
 +-- !q01 --
 +1000
++
 +-- !q02 --
 +1000
++
 +-- !q03 --
 +1000
++
 +-- !q04 --
 +1000
++
 -- !q05 --