Skip to content

Commit 643caa7

Browse files
authored
[fix](count) fix wrong count push down logic (apache#56182)
### What problem does this PR solve? Introduced from topn optimization. When executing query like `select count(*) from tbl`, it will trigger "count push down optimization". which means it will send some "dummy" split to BE, each with a part of row count number. But due to the bug, BE will use the range offset info in these dummy split to do the row group filter logic, which is incorrect and will result in empty result because all row group will be filtered. This PR fix it, to not filter the row group if it is a dummy split. How to reproduce: 1. find an iceberg table with file size at least 16MB 2. set file_split_size=4MB 3. select count(*) from table, it will return empty result
1 parent 972941b commit 643caa7

File tree

7 files changed

+43
-6
lines changed

7 files changed

+43
-6
lines changed

be/src/vec/exec/scan/file_scanner.cpp

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1154,7 +1154,17 @@ Status FileScanner::_get_next_reader() {
11541154
}
11551155

11561156
_cur_reader->set_push_down_agg_type(_get_push_down_agg_type());
1157-
RETURN_IF_ERROR(_set_fill_or_truncate_columns(need_to_get_parsed_schema));
1157+
if (_get_push_down_agg_type() == TPushAggOp::type::COUNT &&
1158+
range.__isset.table_format_params &&
1159+
range.table_format_params.table_level_row_count >= 0) {
1160+
// This is a table level count push down operation, no need to call
1161+
// _set_fill_or_truncate_columns.
1162+
// in _set_fill_or_truncate_columns, we will use [range.start_offset, end offset]
1163+
// to filter the row group. But if this is count push down, the offset is undefined,
1164+
// causing incorrect row group filter and may return empty result.
1165+
} else {
1166+
RETURN_IF_ERROR(_set_fill_or_truncate_columns(need_to_get_parsed_schema));
1167+
}
11581168
_cur_reader_eof = false;
11591169
break;
11601170
}

fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -467,10 +467,12 @@ protected void setScanParams(TFileRangeDesc rangeDesc, Split split) {
467467
}
468468
transactionalHiveDesc.setDeleteDeltas(deleteDeltaDescs);
469469
tableFormatFileDesc.setTransactionalHiveParams(transactionalHiveDesc);
470+
tableFormatFileDesc.setTableLevelRowCount(-1);
470471
rangeDesc.setTableFormatParams(tableFormatFileDesc);
471472
} else {
472473
TTableFormatFileDesc tableFormatFileDesc = new TTableFormatFileDesc();
473474
tableFormatFileDesc.setTableFormatType(TableFormatType.HIVE.value());
475+
tableFormatFileDesc.setTableLevelRowCount(-1);
474476
rangeDesc.setTableFormatParams(tableFormatFileDesc);
475477
}
476478
}
@@ -593,3 +595,4 @@ protected TFileCompressType getFileCompressType(FileSplit fileSplit) throws User
593595
}
594596
}
595597

598+

fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/source/IcebergScanNode.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -181,6 +181,9 @@ private void setIcebergParams(TFileRangeDesc rangeDesc, IcebergSplit icebergSpli
181181
tableFormatFileDesc.setTableFormatType(icebergSplit.getTableFormatType().value());
182182
if (tableLevelPushDownCount) {
183183
tableFormatFileDesc.setTableLevelRowCount(icebergSplit.getTableLevelRowCount());
184+
} else {
185+
// MUST explicitly set to -1, to be distinct from valid row count >= 0
186+
tableFormatFileDesc.setTableLevelRowCount(-1);
184187
}
185188
TIcebergFileDesc fileDesc = new TIcebergFileDesc();
186189
fileDesc.setFormatVersion(formatVersion);
@@ -621,3 +624,4 @@ private Optional<NotSupportedException> checkNotSupportedException(Exception e)
621624
return Optional.empty();
622625
}
623626
}
627+

fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/source/PaimonScanNode.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -243,6 +243,9 @@ private void setPaimonParams(TFileRangeDesc rangeDesc, PaimonSplit paimonSplit)
243243
}
244244
if (paimonSplit.getRowCount().isPresent()) {
245245
tableFormatFileDesc.setTableLevelRowCount(paimonSplit.getRowCount().get());
246+
} else {
247+
// MUST explicitly set to -1, to be distinct from valid row count >= 0
248+
tableFormatFileDesc.setTableLevelRowCount(-1);
246249
}
247250
tableFormatFileDesc.setPaimonParams(fileDesc);
248251
Map<String, String> partitionValues = paimonSplit.getPaimonPartitionValues();
@@ -714,3 +717,4 @@ private Table getProcessedTable() throws UserException {
714717
return baseTable;
715718
}
716719
}
720+

gensrc/thrift/PlanNodes.thrift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -398,7 +398,7 @@ struct TTableFormatFileDesc {
398398
6: optional TMaxComputeFileDesc max_compute_params
399399
7: optional TTrinoConnectorFileDesc trino_connector_params
400400
8: optional TLakeSoulFileDesc lakesoul_params
401-
9: optional i64 table_level_row_count
401+
9: optional i64 table_level_row_count = -1
402402
}
403403

404404
// Deprecated, hive text talbe is a special format, not a serde type

regression-test/data/external_table_p0/iceberg/test_iceberg_optimize_count.out

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,18 @@
1111
-- !q04 --
1212
1000
1313

14+
-- !q01 --
15+
1000
16+
17+
-- !q02 --
18+
1000
19+
20+
-- !q03 --
21+
1000
22+
23+
-- !q04 --
24+
1000
25+
1426
-- !q05 --
1527
1000
1628

regression-test/suites/external_table_p0/iceberg/test_iceberg_optimize_count.groovy

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -50,10 +50,14 @@ suite("test_iceberg_optimize_count", "p0,external,doris,external_docker,external
5050
// use push down count
5151
sql """ set enable_count_push_down_for_external_table=true; """
5252

53-
qt_q01 """${sqlstr1}"""
54-
qt_q02 """${sqlstr2}"""
55-
qt_q03 """${sqlstr3}"""
56-
qt_q04 """${sqlstr4}"""
53+
for (String val: ["1K", "0"]) {
54+
sql "set file_split_size=${val}"
55+
qt_q01 """${sqlstr1}"""
56+
qt_q02 """${sqlstr2}"""
57+
qt_q03 """${sqlstr3}"""
58+
qt_q04 """${sqlstr4}"""
59+
}
60+
sql "unset variable file_split_size;"
5761

5862
// traditional mode
5963
sql """set num_files_in_batch_mode=100000"""

0 commit comments

Comments
 (0)