Skip to content

Commit 99a4cf9

Browse files
authored
feat: Use max/min to estimate ndv (#17468)
* feat: Use max/min to estimate ndv Signed-off-by: Xuanwo <[email protected]> * Address cases where max doesn't vaild Signed-off-by: Xuanwo <[email protected]> * Fix show databases Signed-off-by: Xuanwo <[email protected]> * Fix ndv Signed-off-by: Xuanwo <[email protected]> * Fix overflow Signed-off-by: Xuanwo <[email protected]> * Fix scan Signed-off-by: Xuanwo <[email protected]> * Fix explain Signed-off-by: Xuanwo <[email protected]> * Fix test Signed-off-by: Xuanwo <[email protected]> * test show tables Signed-off-by: Xuanwo <[email protected]> --------- Signed-off-by: Xuanwo <[email protected]>
1 parent fd1b0c1 commit 99a4cf9

File tree

4 files changed

+38
-5
lines changed

4 files changed

+38
-5
lines changed

src/query/sql/src/planner/binder/ddl/database.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ impl Binder {
5959
limit,
6060
} = stmt;
6161

62-
let mut select_builder = SelectBuilder::from("system.databases");
62+
let mut select_builder = SelectBuilder::from("default.system.databases");
6363

6464
let ctl = if let Some(ctl) = catalog {
6565
normalize_identifier(ctl, &self.name_resolution_ctx).name
@@ -101,7 +101,7 @@ impl Binder {
101101
stmt: &ShowDropDatabasesStmt,
102102
) -> Result<Plan> {
103103
let ShowDropDatabasesStmt { catalog, limit } = stmt;
104-
let mut select_builder = SelectBuilder::from("system.databases_with_history");
104+
let mut select_builder = SelectBuilder::from("default.system.databases_with_history");
105105

106106
let ctl = if let Some(ctl) = catalog {
107107
normalize_identifier(ctl, &self.name_resolution_ctx).name

src/query/sql/src/planner/plans/scan.rs

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ use databend_common_catalog::table_context::TableContext;
2424
use databend_common_exception::ErrorCode;
2525
use databend_common_exception::Result;
2626
use databend_common_expression::TableSchemaRef;
27+
use databend_common_storage::Datum;
2728
use databend_common_storage::Histogram;
2829
use databend_common_storage::DEFAULT_HISTOGRAM_BUCKETS;
2930
use databend_storages_common_table_meta::table::ChangeType;
@@ -240,9 +241,23 @@ impl Operator for Scan {
240241
// ndv could be `None`, we will use `num_rows - null_count` as ndv instead.
241242
//
242243
// NOTE: don't touch the original num_rows, since it will be used in other places.
243-
let ndv = col_stat
244+
let mut ndv = col_stat
244245
.ndv
245246
.unwrap_or_else(|| num_rows.saturating_sub(col_stat.null_count));
247+
248+
// Alter ndv based on min and max if the datum is uint or int.
249+
match (&max, &min) {
250+
(Datum::UInt(m), Datum::UInt(n)) if m >= n => ndv = ndv.min(m - n + 1),
251+
(Datum::Int(m), Datum::Int(n)) if m >= n => {
252+
ndv = ndv.min(m.saturating_add(1).saturating_sub(*n) as u64)
253+
}
254+
_ => {
255+
if max == min {
256+
ndv = 1
257+
}
258+
}
259+
};
260+
246261
let histogram = if let Some(histogram) = self.statistics.histograms.get(k)
247262
&& histogram.is_some()
248263
{

tests/sqllogictests/suites/tpch_iceberg/prune.test

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -82,11 +82,11 @@ explain select 1 from ctl.tpch.lineitem where l_orderkey > 1 and l_commitdate =
8282
EvalScalar
8383
├── output columns: [1 (#16)]
8484
├── expressions: [1]
85-
├── estimated rows: 1.00
85+
├── estimated rows: 243.54
8686
└── Filter
8787
├── output columns: []
8888
├── filters: [is_true(lineitem.l_orderkey (#0) > 1), is_true(lineitem.l_commitdate (#11) = '1992-01-22')]
89-
├── estimated rows: 1.00
89+
├── estimated rows: 243.54
9090
└── TableScan
9191
├── table: ctl.tpch.lineitem
9292
├── output columns: [l_orderkey (#0), l_commitdate (#11)]
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
statement ok
2+
DROP CATALOG IF EXISTS ctl;
3+
4+
statement ok
5+
CREATE CATALOG ctl
6+
TYPE=ICEBERG
7+
CONNECTION=(
8+
TYPE='rest'
9+
ADDRESS='http://127.0.0.1:8181'
10+
WAREHOUSE='s3://iceberg-tpch'
11+
"s3.region"='us-east-1'
12+
"s3.endpoint"='http://127.0.0.1:9000'
13+
);
14+
15+
query T
16+
show databases from ctl;
17+
----
18+
tpch

0 commit comments

Comments
 (0)