diff --git a/src/query/service/tests/it/sql/planner/optimizer/data/results/obfuscated/01_multi_join_avg_case_expression_optimized.txt b/src/query/service/tests/it/sql/planner/optimizer/data/results/obfuscated/01_multi_join_avg_case_expression_optimized.txt index 110a512ec1ec9..6d4bc21f5fcd1 100644 --- a/src/query/service/tests/it/sql/planner/optimizer/data/results/obfuscated/01_multi_join_avg_case_expression_optimized.txt +++ b/src/query/service/tests/it/sql/planner/optimizer/data/results/obfuscated/01_multi_join_avg_case_expression_optimized.txt @@ -18,13 +18,13 @@ Exchange(Merge) ├── probe keys: [a.a0m (#9)] ├── other filters: [] ├── Join(Inner) - │ ├── build keys: [b.a0k (#48), b.a0n (#50)] - │ ├── probe keys: [a.a0k (#7), a.a0n (#10)] - │ ├── other filters: [lte(b.a2c (#52), a.a0d (#0)), gt(b.a2k (#61), a.a0d (#0))] + │ ├── build keys: [d.a5t (#151)] + │ ├── probe keys: [a.a0l (#8)] + │ ├── other filters: [] │ ├── Join(Inner) - │ │ ├── build keys: [d.a5t (#151)] - │ │ ├── probe keys: [a.a0l (#8)] - │ │ ├── other filters: [] + │ │ ├── build keys: [b.a0k (#48), b.a0n (#50)] + │ │ ├── probe keys: [a.a0k (#7), a.a0n (#10)] + │ │ ├── other filters: [lte(b.a2c (#52), a.a0d (#0)), gt(b.a2k (#61), a.a0d (#0))] │ │ ├── Scan │ │ │ ├── table: default.a0c (#0) │ │ │ ├── filters: [gte(a0c.a0d (#0), '20240526'), lte(a0c.a0d (#0), '20250525')] @@ -32,14 +32,14 @@ Exchange(Merge) │ │ │ └── limit: NONE │ │ └── Exchange(Broadcast) │ │ └── Scan - │ │ ├── table: default.a5r (#3) - │ │ ├── filters: [eq(substring(a5r.a5w (#156), 1, 1), '1')] + │ │ ├── table: default.a1z (#1) + │ │ ├── filters: [eq(a1z.a2t (#70), '624100')] │ │ ├── order by: [] │ │ └── limit: NONE │ └── Exchange(Broadcast) │ └── Scan - │ ├── table: default.a1z (#1) - │ ├── filters: [eq(a1z.a2t (#70), '624100')] + │ ├── table: default.a5r (#3) + │ ├── filters: [eq(substring(a5r.a5w (#156), 1, 1), '1')] │ ├── order by: [] │ └── limit: NONE └── Exchange(Broadcast) diff --git a/src/query/service/tests/it/sql/planner/optimizer/data/results/obfuscated/01_multi_join_avg_case_expression_physical.txt b/src/query/service/tests/it/sql/planner/optimizer/data/results/obfuscated/01_multi_join_avg_case_expression_physical.txt index 63de3e7d22ec8..4e2143ed88fbb 100644 --- a/src/query/service/tests/it/sql/planner/optimizer/data/results/obfuscated/01_multi_join_avg_case_expression_physical.txt +++ b/src/query/service/tests/it/sql/planner/optimizer/data/results/obfuscated/01_multi_join_avg_case_expression_physical.txt @@ -4,27 +4,27 @@ Exchange └── EvalScalar ├── output columns: [sell_mnt = 0 (#170)] ├── expressions: [t.sell_mnt (#169) = 0] - ├── estimated rows: 7119376617326129446912.00 + ├── estimated rows: 889922077165.77 └── EvalScalar ├── output columns: [sell_mnt (#169)] ├── expressions: [sum(CASE WHEN d.a1v = '603020' THEN 1 ELSE 0 END) (#167) / CAST(if(CAST(count(CASE WHEN d.a1v = '603020' THEN 1 ELSE 0 END) (#168) = 0 AS Boolean NULL), 1, count(CASE WHEN d.a1v = '603020' THEN 1 ELSE 0 END) (#168)) AS UInt64 NULL) + 3] - ├── estimated rows: 7119376617326129446912.00 + ├── estimated rows: 889922077165.77 └── AggregateFinal ├── output columns: [sum(CASE WHEN d.a1v = '603020' THEN 1 ELSE 0 END) (#167), count(CASE WHEN d.a1v = '603020' THEN 1 ELSE 0 END) (#168), a.a0d (#0), a.a0k (#7), a.a0m (#9), c.a5m (#144)] ├── group by: [a0d, a0k, a0m, a5m] ├── aggregate functions: [sum(sum_arg_0), count()] - ├── estimated rows: 7119376617326129446912.00 + ├── estimated rows: 889922077165.77 └── Exchange ├── output columns: [sum(CASE WHEN d.a1v = '603020' THEN 1 ELSE 0 END) (#167), count(CASE WHEN d.a1v = '603020' THEN 1 ELSE 0 END) (#168), a.a0d (#0), a.a0k (#7), a.a0m (#9), c.a5m (#144)] ├── exchange type: Hash(0, 1, 2, 3) └── AggregatePartial ├── group by: [a0d, a0k, a0m, a5m] ├── aggregate functions: [sum(sum_arg_0), count()] - ├── estimated rows: 7119376617326129446912.00 + ├── estimated rows: 889922077165.77 └── EvalScalar ├── output columns: [a.a0d (#0), a.a0k (#7), a.a0m (#9), c.a5m (#144), sum_arg_0 (#166)] ├── expressions: [if(d.a1v (#154) = '603020', 1, 0)] - ├── estimated rows: 7119376617326129446912.00 + ├── estimated rows: 889922077165.77 └── HashJoin ├── output columns: [a.a0d (#0), a.a0k (#7), a.a0m (#9), d.a1v (#154), c.a5m (#144)] ├── join type: INNER @@ -34,7 +34,7 @@ Exchange ├── filters: [] ├── build join filters: │ └── filter id:3, build key:c.a0m (#149), probe key:a.a0m (#9), filter type:inlist,min_max - ├── estimated rows: 7119376617326129446912.00 + ├── estimated rows: 889922077165.77 ├── Exchange(Build) │ ├── output columns: [c.a5m (#144), a0m (#149)] │ ├── exchange type: Broadcast @@ -54,48 +54,48 @@ Exchange └── HashJoin(Probe) ├── output columns: [a.a0d (#0), a.a0k (#7), a.a0m (#9), d.a1v (#154)] ├── join type: INNER - ├── build keys: [b.a0k (#48), b.a0n (#50)] - ├── probe keys: [a.a0k (#7), a.a0n (#10)] - ├── keys is null equal: [false, false] - ├── filters: [b.a2c (#52) <= a.a0d (#0), b.a2k (#61) > a.a0d (#0)] + ├── build keys: [d.a5t (#151)] + ├── probe keys: [a.a0l (#8)] + ├── keys is null equal: [false] + ├── filters: [] ├── build join filters: - │ ├── filter id:1, build key:b.a0k (#48), probe key:a.a0k (#7), filter type:inlist,min_max - │ └── filter id:2, build key:b.a0n (#50), probe key:a.a0n (#10), filter type:inlist,min_max - ├── estimated rows: 111635169056257280.00 + │ └── filter id:2, build key:d.a5t (#151), probe key:a.a0l (#8), filter type:inlist,min_max + ├── estimated rows: 2790879226.41 ├── Exchange(Build) - │ ├── output columns: [b.a0k (#48), b.a0n (#50), b.a2c (#52), b.a2k (#61)] + │ ├── output columns: [d.a5t (#151), d.a1v (#154)] │ ├── exchange type: Broadcast │ └── TableScan - │ ├── table: default.default.a1z - │ ├── output columns: [a0k (#48), a0n (#50), a2c (#52), a2k (#61)] + │ ├── table: default.default.a5r + │ ├── output columns: [a5t (#151), a1v (#154)] │ ├── read rows: 0 │ ├── read size: 0 │ ├── partitions total: 0 │ ├── partitions scanned: 0 - │ ├── push downs: [filters: [is_true(a1z.a2t (#70) = '624100')], limit: NONE] - │ └── estimated rows: 45493.85 + │ ├── push downs: [filters: [is_true(substr(a5r.a5w (#156), 1, 1) = '1')], limit: NONE] + │ └── estimated rows: 806.60 └── HashJoin(Probe) - ├── output columns: [a.a0d (#0), a.a0k (#7), a.a0m (#9), a.a0n (#10), d.a1v (#154)] + ├── output columns: [a.a0d (#0), a.a0k (#7), a.a0l (#8), a.a0m (#9)] ├── join type: INNER - ├── build keys: [d.a5t (#151)] - ├── probe keys: [a.a0l (#8)] - ├── keys is null equal: [false] - ├── filters: [] + ├── build keys: [b.a0k (#48), b.a0n (#50)] + ├── probe keys: [a.a0k (#7), a.a0n (#10)] + ├── keys is null equal: [false, false] + ├── filters: [b.a2c (#52) <= a.a0d (#0), b.a2k (#61) > a.a0d (#0)] ├── build join filters: - │ └── filter id:0, build key:d.a5t (#151), probe key:a.a0l (#8), filter type:inlist,min_max - ├── estimated rows: 2453851765646.43 + │ ├── filter id:0, build key:b.a0k (#48), probe key:a.a0k (#7), filter type:inlist,min_max + │ └── filter id:1, build key:b.a0n (#50), probe key:a.a0n (#10), filter type:inlist,min_max + ├── estimated rows: 692010718.18 ├── Exchange(Build) - │ ├── output columns: [d.a5t (#151), d.a1v (#154)] + │ ├── output columns: [b.a0k (#48), b.a0n (#50), b.a2c (#52), b.a2k (#61)] │ ├── exchange type: Broadcast │ └── TableScan - │ ├── table: default.default.a5r - │ ├── output columns: [a5t (#151), a1v (#154)] + │ ├── table: default.default.a1z + │ ├── output columns: [a0k (#48), a0n (#50), a2c (#52), a2k (#61)] │ ├── read rows: 0 │ ├── read size: 0 │ ├── partitions total: 0 │ ├── partitions scanned: 0 - │ ├── push downs: [filters: [is_true(substr(a5r.a5w (#156), 1, 1) = '1')], limit: NONE] - │ └── estimated rows: 806.60 + │ ├── push downs: [filters: [is_true(a1z.a2t (#70) = '624100')], limit: NONE] + │ └── estimated rows: 45493.85 └── TableScan(Probe) ├── table: default.default.a0c ├── output columns: [a0d (#0), a0k (#7), a0l (#8), a0m (#9), a0n (#10)] @@ -104,6 +104,6 @@ Exchange ├── partitions total: 0 ├── partitions scanned: 0 ├── push downs: [filters: [and_filters(a0c.a0d (#0) >= '20240526', a0c.a0d (#0) <= '20250525')], limit: NONE] - ├── apply join filters: [#3, #1, #2, #0] + ├── apply join filters: [#3, #2, #0, #1] └── estimated rows: 3042216421.58 diff --git a/src/query/service/tests/it/sql/planner/optimizer/data/results/obfuscated/01_multi_join_sum_case_expression_optimized.txt b/src/query/service/tests/it/sql/planner/optimizer/data/results/obfuscated/01_multi_join_sum_case_expression_optimized.txt index ff959972f90fd..5719029b2fb7a 100644 --- a/src/query/service/tests/it/sql/planner/optimizer/data/results/obfuscated/01_multi_join_sum_case_expression_optimized.txt +++ b/src/query/service/tests/it/sql/planner/optimizer/data/results/obfuscated/01_multi_join_sum_case_expression_optimized.txt @@ -24,13 +24,13 @@ Exchange(Merge) │ └── EvalScalar │ ├── scalars: [a.a0d (#0) AS (#0), a.a0k (#7) AS (#7), a.a0m (#9) AS (#9), if(eq(d.a1v (#154), '603020'), 1, 0) AS (#166), b.a2t (#70) AS (#169), d.a5w (#156) AS (#171)] │ └── Join(Inner) - │ ├── build keys: [b.a0k (#48), b.a0n (#50)] - │ ├── probe keys: [a.a0k (#7), a.a0n (#10)] - │ ├── other filters: [lte(b.a2c (#52), a.a0d (#0)), gt(b.a2k (#61), a.a0d (#0))] + │ ├── build keys: [d.a5t (#151)] + │ ├── probe keys: [a.a0l (#8)] + │ ├── other filters: [] │ ├── Join(Inner) - │ │ ├── build keys: [d.a5t (#151)] - │ │ ├── probe keys: [a.a0l (#8)] - │ │ ├── other filters: [] + │ │ ├── build keys: [b.a0k (#48), b.a0n (#50)] + │ │ ├── probe keys: [a.a0k (#7), a.a0n (#10)] + │ │ ├── other filters: [lte(b.a2c (#52), a.a0d (#0)), gt(b.a2k (#61), a.a0d (#0))] │ │ ├── Scan │ │ │ ├── table: default.a0c (#0) │ │ │ ├── filters: [gte(a0c.a0d (#0), '20240526'), lte(a0c.a0d (#0), '20250525')] @@ -38,14 +38,14 @@ Exchange(Merge) │ │ │ └── limit: NONE │ │ └── Exchange(Broadcast) │ │ └── Scan - │ │ ├── table: default.a5r (#3) - │ │ ├── filters: [eq(substring(a5r.a5w (#156), 1, 1), '1')] + │ │ ├── table: default.a1z (#1) + │ │ ├── filters: [eq(a1z.a2t (#70), '624100')] │ │ ├── order by: [] │ │ └── limit: NONE │ └── Exchange(Broadcast) │ └── Scan - │ ├── table: default.a1z (#1) - │ ├── filters: [eq(a1z.a2t (#70), '624100')] + │ ├── table: default.a5r (#3) + │ ├── filters: [eq(substring(a5r.a5w (#156), 1, 1), '1')] │ ├── order by: [] │ └── limit: NONE └── Exchange(Broadcast) diff --git a/src/query/service/tests/it/sql/planner/optimizer/data/results/obfuscated/01_multi_join_sum_case_expression_physical.txt b/src/query/service/tests/it/sql/planner/optimizer/data/results/obfuscated/01_multi_join_sum_case_expression_physical.txt index 4e8a8c0d72d69..4c71dcfeb3f13 100644 --- a/src/query/service/tests/it/sql/planner/optimizer/data/results/obfuscated/01_multi_join_sum_case_expression_physical.txt +++ b/src/query/service/tests/it/sql/planner/optimizer/data/results/obfuscated/01_multi_join_sum_case_expression_physical.txt @@ -4,19 +4,19 @@ Exchange └── EvalScalar ├── output columns: [sell_mnt = 0 (#168)] ├── expressions: [t.sell_mnt (#173) = 0] - ├── estimated rows: 376949189626631488.00 + ├── estimated rows: 889922077165.77 └── AggregateFinal ├── output columns: [_eager_final_sum (#173), a.a0d (#0), a.a0k (#7), a.a0m (#9), c.a5m (#144)] ├── group by: [a0d, a0k, a0m, a5m] ├── aggregate functions: [sum(sum(CASE WHEN d.a1v = '603020' THEN 1 ELSE 0 END))] - ├── estimated rows: 376949189626631488.00 + ├── estimated rows: 889922077165.77 └── Exchange ├── output columns: [_eager_final_sum (#173), a.a0d (#0), a.a0k (#7), a.a0m (#9), c.a5m (#144)] ├── exchange type: Hash(0, 1, 2, 3) └── AggregatePartial ├── group by: [a0d, a0k, a0m, a5m] ├── aggregate functions: [sum(sum(CASE WHEN d.a1v = '603020' THEN 1 ELSE 0 END))] - ├── estimated rows: 376949189626631488.00 + ├── estimated rows: 889922077165.77 └── HashJoin ├── output columns: [sum(CASE WHEN d.a1v = '603020' THEN 1 ELSE 0 END) (#167), a.a0d (#0), a.a0k (#7), a.a0m (#9), c.a5m (#144)] ├── join type: INNER @@ -26,7 +26,7 @@ Exchange ├── filters: [] ├── build join filters: │ └── filter id:3, build key:c.a0m (#149), probe key:a.a0m (#9), filter type:inlist,min_max - ├── estimated rows: 376949189626631488.00 + ├── estimated rows: 889922077165.77 ├── Exchange(Build) │ ├── output columns: [c.a5m (#144), a0m (#149)] │ ├── exchange type: Broadcast @@ -47,63 +47,63 @@ Exchange ├── output columns: [sum(CASE WHEN d.a1v = '603020' THEN 1 ELSE 0 END) (#167), a.a0d (#0), a.a0k (#7), a.a0m (#9)] ├── group by: [a0d, a0k, a0m] ├── aggregate functions: [sum(sum_arg_0)] - ├── estimated rows: 5910740331840.00 + ├── estimated rows: 2790879226.41 └── Exchange ├── output columns: [sum(CASE WHEN d.a1v = '603020' THEN 1 ELSE 0 END) (#167), a.a0d (#0), a.a0k (#7), a.a0m (#9)] ├── exchange type: Hash(0, 1, 2) └── AggregatePartial ├── group by: [a0d, a0k, a0m] ├── aggregate functions: [sum(sum_arg_0)] - ├── estimated rows: 5910740331840.00 + ├── estimated rows: 2790879226.41 └── EvalScalar ├── output columns: [a.a0d (#0), a.a0k (#7), a.a0m (#9), sum_arg_0 (#166)] ├── expressions: [if(d.a1v (#154) = '603020', 1, 0)] - ├── estimated rows: 111635169056257280.00 + ├── estimated rows: 2790879226.41 └── HashJoin ├── output columns: [a.a0d (#0), a.a0k (#7), a.a0m (#9), d.a1v (#154)] ├── join type: INNER - ├── build keys: [b.a0k (#48), b.a0n (#50)] - ├── probe keys: [a.a0k (#7), a.a0n (#10)] - ├── keys is null equal: [false, false] - ├── filters: [b.a2c (#52) <= a.a0d (#0), b.a2k (#61) > a.a0d (#0)] + ├── build keys: [d.a5t (#151)] + ├── probe keys: [a.a0l (#8)] + ├── keys is null equal: [false] + ├── filters: [] ├── build join filters: - │ ├── filter id:1, build key:b.a0k (#48), probe key:a.a0k (#7), filter type:inlist,min_max - │ └── filter id:2, build key:b.a0n (#50), probe key:a.a0n (#10), filter type:inlist,min_max - ├── estimated rows: 111635169056257280.00 + │ └── filter id:2, build key:d.a5t (#151), probe key:a.a0l (#8), filter type:inlist,min_max + ├── estimated rows: 2790879226.41 ├── Exchange(Build) - │ ├── output columns: [b.a0k (#48), b.a0n (#50), b.a2c (#52), b.a2k (#61)] + │ ├── output columns: [d.a5t (#151), d.a1v (#154)] │ ├── exchange type: Broadcast │ └── TableScan - │ ├── table: default.default.a1z - │ ├── output columns: [a0k (#48), a0n (#50), a2c (#52), a2k (#61)] + │ ├── table: default.default.a5r + │ ├── output columns: [a5t (#151), a1v (#154)] │ ├── read rows: 0 │ ├── read size: 0 │ ├── partitions total: 0 │ ├── partitions scanned: 0 - │ ├── push downs: [filters: [is_true(a1z.a2t (#70) = '624100')], limit: NONE] - │ └── estimated rows: 45493.85 + │ ├── push downs: [filters: [is_true(substr(a5r.a5w (#156), 1, 1) = '1')], limit: NONE] + │ └── estimated rows: 806.60 └── HashJoin(Probe) - ├── output columns: [a.a0d (#0), a.a0k (#7), a.a0m (#9), a.a0n (#10), d.a1v (#154)] + ├── output columns: [a.a0d (#0), a.a0k (#7), a.a0l (#8), a.a0m (#9)] ├── join type: INNER - ├── build keys: [d.a5t (#151)] - ├── probe keys: [a.a0l (#8)] - ├── keys is null equal: [false] - ├── filters: [] + ├── build keys: [b.a0k (#48), b.a0n (#50)] + ├── probe keys: [a.a0k (#7), a.a0n (#10)] + ├── keys is null equal: [false, false] + ├── filters: [b.a2c (#52) <= a.a0d (#0), b.a2k (#61) > a.a0d (#0)] ├── build join filters: - │ └── filter id:0, build key:d.a5t (#151), probe key:a.a0l (#8), filter type:inlist,min_max - ├── estimated rows: 2453851765646.43 + │ ├── filter id:0, build key:b.a0k (#48), probe key:a.a0k (#7), filter type:inlist,min_max + │ └── filter id:1, build key:b.a0n (#50), probe key:a.a0n (#10), filter type:inlist,min_max + ├── estimated rows: 692010718.18 ├── Exchange(Build) - │ ├── output columns: [d.a5t (#151), d.a1v (#154)] + │ ├── output columns: [b.a0k (#48), b.a0n (#50), b.a2c (#52), b.a2k (#61)] │ ├── exchange type: Broadcast │ └── TableScan - │ ├── table: default.default.a5r - │ ├── output columns: [a5t (#151), a1v (#154)] + │ ├── table: default.default.a1z + │ ├── output columns: [a0k (#48), a0n (#50), a2c (#52), a2k (#61)] │ ├── read rows: 0 │ ├── read size: 0 │ ├── partitions total: 0 │ ├── partitions scanned: 0 - │ ├── push downs: [filters: [is_true(substr(a5r.a5w (#156), 1, 1) = '1')], limit: NONE] - │ └── estimated rows: 806.60 + │ ├── push downs: [filters: [is_true(a1z.a2t (#70) = '624100')], limit: NONE] + │ └── estimated rows: 45493.85 └── TableScan(Probe) ├── table: default.default.a0c ├── output columns: [a0d (#0), a0k (#7), a0l (#8), a0m (#9), a0n (#10)] @@ -112,6 +112,6 @@ Exchange ├── partitions total: 0 ├── partitions scanned: 0 ├── push downs: [filters: [and_filters(a0c.a0d (#0) >= '20240526', a0c.a0d (#0) <= '20250525')], limit: NONE] - ├── apply join filters: [#3, #1, #2, #0] + ├── apply join filters: [#3, #2, #0, #1] └── estimated rows: 3042216421.58 diff --git a/src/query/service/tests/it/sql/planner/optimizer/data/results/tpcds/Q01_physical.txt b/src/query/service/tests/it/sql/planner/optimizer/data/results/tpcds/Q01_physical.txt index 44eb30514a715..a253a60e01b6f 100644 --- a/src/query/service/tests/it/sql/planner/optimizer/data/results/tpcds/Q01_physical.txt +++ b/src/query/service/tests/it/sql/planner/optimizer/data/results/tpcds/Q01_physical.txt @@ -127,14 +127,14 @@ Limit │ ├── output columns: [Sum(sr_return_amt) (#48), store_returns.sr_customer_sk (#3), store_returns.sr_store_sk (#7)] │ ├── group by: [sr_customer_sk, sr_store_sk] │ ├── aggregate functions: [sum(sr_return_amt)] - │ ├── estimated rows: 841298963.13 + │ ├── estimated rows: 4206494.82 │ └── Exchange │ ├── output columns: [Sum(sr_return_amt) (#48), store_returns.sr_customer_sk (#3), store_returns.sr_store_sk (#7)] │ ├── exchange type: Hash(0, 1) │ └── AggregatePartial │ ├── group by: [sr_customer_sk, sr_store_sk] │ ├── aggregate functions: [sum(sr_return_amt)] - │ ├── estimated rows: 841298963.13 + │ ├── estimated rows: 4206494.82 │ └── HashJoin │ ├── output columns: [store_returns.sr_customer_sk (#3), store_returns.sr_store_sk (#7), store_returns.sr_return_amt (#11)] │ ├── join type: INNER @@ -144,7 +144,7 @@ Limit │ ├── filters: [] │ ├── build join filters: │ │ └── filter id:0, build key:date_dim.d_date_sk (#20), probe key:store_returns.sr_returned_date_sk (#0), filter type:inlist,min_max - │ ├── estimated rows: 841298963.13 + │ ├── estimated rows: 4206494.82 │ ├── Exchange(Build) │ │ ├── output columns: [date_dim.d_date_sk (#20)] │ │ ├── exchange type: Broadcast diff --git a/src/query/service/tests/it/sql/planner/optimizer/data/results/tpcds/Q03_physical.txt b/src/query/service/tests/it/sql/planner/optimizer/data/results/tpcds/Q03_physical.txt index 1ee7ed611a957..fed9cb4004650 100644 --- a/src/query/service/tests/it/sql/planner/optimizer/data/results/tpcds/Q03_physical.txt +++ b/src/query/service/tests/it/sql/planner/optimizer/data/results/tpcds/Q03_physical.txt @@ -6,26 +6,26 @@ Limit └── Sort(Final) ├── output columns: [SUM(ss_ext_sales_price) (#73), dt.d_year (#6), item.i_brand (#59), item.i_brand_id (#58)] ├── sort keys: [d_year ASC NULLS LAST, SUM(ss_ext_sales_price) DESC NULLS LAST, i_brand_id ASC NULLS LAST] - ├── estimated rows: 143057683321996.78 + ├── estimated rows: 3576442083.05 └── Exchange ├── output columns: [SUM(ss_ext_sales_price) (#73), dt.d_year (#6), item.i_brand (#59), item.i_brand_id (#58), #_order_col] ├── exchange type: Merge └── Sort(Partial) ├── output columns: [SUM(ss_ext_sales_price) (#73), dt.d_year (#6), item.i_brand (#59), item.i_brand_id (#58), #_order_col] ├── sort keys: [d_year ASC NULLS LAST, SUM(ss_ext_sales_price) DESC NULLS LAST, i_brand_id ASC NULLS LAST] - ├── estimated rows: 143057683321996.78 + ├── estimated rows: 3576442083.05 └── AggregateFinal ├── output columns: [SUM(ss_ext_sales_price) (#73), dt.d_year (#6), item.i_brand (#59), item.i_brand_id (#58)] ├── group by: [d_year, i_brand, i_brand_id] ├── aggregate functions: [sum(ss_ext_sales_price)] - ├── estimated rows: 143057683321996.78 + ├── estimated rows: 3576442083.05 └── Exchange ├── output columns: [SUM(ss_ext_sales_price) (#73), dt.d_year (#6), item.i_brand (#59), item.i_brand_id (#58)] ├── exchange type: Hash(0, 1, 2) └── AggregatePartial ├── group by: [d_year, i_brand, i_brand_id] ├── aggregate functions: [sum(ss_ext_sales_price)] - ├── estimated rows: 143057683321996.78 + ├── estimated rows: 3576442083.05 └── HashJoin ├── output columns: [store_sales.ss_ext_sales_price (#43), item.i_brand_id (#58), item.i_brand (#59), dt.d_year (#6)] ├── join type: INNER @@ -35,7 +35,7 @@ Limit ├── filters: [] ├── build join filters: │ └── filter id:1, build key:dt.d_date_sk (#0), probe key:store_sales.ss_sold_date_sk (#28), filter type:inlist,min_max - ├── estimated rows: 143057683321996.78 + ├── estimated rows: 3576442083.05 ├── Exchange(Build) │ ├── output columns: [dt.d_date_sk (#0), dt.d_year (#6)] │ ├── exchange type: Broadcast @@ -57,7 +57,7 @@ Limit ├── filters: [] ├── build join filters: │ └── filter id:0, build key:item.i_item_sk (#51), probe key:store_sales.ss_item_sk (#30), filter type:inlist,min_max - ├── estimated rows: 23500557158.40 + ├── estimated rows: 117502785.79 ├── Exchange(Build) │ ├── output columns: [item.i_item_sk (#51), item.i_brand_id (#58), item.i_brand (#59)] │ ├── exchange type: Broadcast diff --git a/src/query/sql/src/planner/plans/join.rs b/src/query/sql/src/planner/plans/join.rs index 401fd6e7a789e..718738ecc7e91 100644 --- a/src/query/sql/src/planner/plans/join.rs +++ b/src/query/sql/src/planner/plans/join.rs @@ -12,15 +12,14 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::cmp::max; use std::collections::HashMap; +use std::collections::HashSet; use std::fmt::Display; use std::fmt::Formatter; use std::sync::Arc; use databend_common_catalog::table_context::TableContext; use databend_common_exception::Result; -use databend_common_expression::types::F64; use databend_common_storage::Datum; use databend_common_storage::Histogram; use databend_common_storage::DEFAULT_HISTOGRAM_BUCKETS; @@ -33,9 +32,11 @@ use crate::optimizer::ir::PhysicalProperty; use crate::optimizer::ir::RelExpr; use crate::optimizer::ir::RelationalProperty; use crate::optimizer::ir::RequiredProperty; +use crate::optimizer::ir::SelectivityEstimator; use crate::optimizer::ir::StatInfo; use crate::optimizer::ir::Statistics; use crate::optimizer::ir::UniformSampleSet; +use crate::optimizer::ir::MAX_SELECTIVITY; use crate::plans::Operator; use crate::plans::RelOp; use crate::plans::ScalarExpr; @@ -197,6 +198,8 @@ impl JoinEquiCondition { } } +static DEFAULT_EQ_SELECTIVITY: f64 = 0.005; // 1/200 + /// Join operator. We will choose hash join by default. /// In the case that using hash join, the right child /// is always the build side, and the left child is always @@ -258,167 +261,195 @@ impl Join { Ok(used_columns) } - fn inner_join_cardinality( + fn inner_join_eq_conditions_cardinality( &self, - left_cardinality: &mut f64, - right_cardinality: &mut f64, + left_cardinality: f64, + right_cardinality: f64, left_statistics: &mut Statistics, right_statistics: &mut Statistics, ) -> Result { - let mut join_card = *left_cardinality * *right_cardinality; - let mut join_card_updated = false; - let mut left_column_index = 0; - let mut right_column_index = 0; + let mut join_selectivity = 1f64; + let mut left_need_update_columns_stat = vec![]; + let mut right_need_update_columns_stat = vec![]; + for condition in self.equi_conditions.iter() { let left_condition = &condition.left; let right_condition = &condition.right; - if join_card == 0 as f64 { - break; + + let left_used_columns = left_condition.used_columns(); + let right_used_columns = right_condition.used_columns(); + + // Unable to calculate join_selectivity + if left_used_columns.is_empty() || right_used_columns.is_empty() { + continue; } - // Currently don't consider the case such as: `t1.a + t1.b = t2.a` - if left_condition.used_columns().len() != 1 || right_condition.used_columns().len() != 1 - { + + let mut left_columns_stat = left_used_columns + .iter() + .filter_map(|x| left_statistics.column_stats.get(x).map(|v| (*x, v.clone()))) + .collect::>(); + + if left_used_columns.len() != left_columns_stat.len() { + // The left table contains columns with unknown statistics. + join_selectivity *= DEFAULT_EQ_SELECTIVITY; continue; } - let left_col_stat = left_statistics - .column_stats - .get(left_condition.used_columns().iter().next().unwrap()); - let right_col_stat = right_statistics - .column_stats - .get(right_condition.used_columns().iter().next().unwrap()); - match (left_col_stat, right_col_stat) { - (Some(left_col_stat), Some(right_col_stat)) => { - if !left_col_stat.min.type_comparable(&right_col_stat.min) { - continue; - } - let left_interval = - UniformSampleSet::new(left_col_stat.min.clone(), left_col_stat.max.clone()); - let right_interval = UniformSampleSet::new( - right_col_stat.min.clone(), - right_col_stat.max.clone(), - ); - if !left_interval.has_intersection(&right_interval)? { - join_card = 0.0; - continue; - } - // Update column min and max value - let mut new_ndv = None; - let (new_min, new_max) = left_interval.intersection(&right_interval)?; - - if let Datum::Bytes(_) | Datum::Bool(_) = left_col_stat.min { - let card = evaluate_by_ndv( - left_col_stat, - right_col_stat, - *left_cardinality, - *right_cardinality, - &mut new_ndv, - ); - let (left_index, right_index) = update_statistic( - left_statistics, - right_statistics, - left_condition, - right_condition, - NewStatistic { - min: new_min, - max: new_max, - ndv: new_ndv, - }, - ); - if card < join_card { - join_card = card; - join_card_updated = true; - left_column_index = left_index; - right_column_index = right_index; - } - continue; - } - let card = match (&left_col_stat.histogram, &right_col_stat.histogram) { - (Some(left_hist), Some(right_hist)) => { - // Evaluate join cardinality by histogram. - evaluate_by_histogram(left_hist, right_hist, &mut new_ndv)? - } - _ => evaluate_by_ndv( - left_col_stat, - right_col_stat, - *left_cardinality, - *right_cardinality, - &mut new_ndv, - ), + let mut right_columns_stat = right_used_columns + .iter() + .filter_map(|x| { + right_statistics + .column_stats + .get(x) + .map(|v| (*x, v.clone())) + }) + .collect::>(); + + if right_used_columns.len() != right_columns_stat.len() { + // The right table contains columns with unknown statistics. + join_selectivity *= DEFAULT_EQ_SELECTIVITY; + continue; + } + + if left_columns_stat.len() != 1 || right_columns_stat.len() != 1 { + join_selectivity *= DEFAULT_EQ_SELECTIVITY; + continue; + } + + let (left_idx, left_column_stat) = left_columns_stat.pop().unwrap(); + let (right_idx, right_column_stat) = right_columns_stat.pop().unwrap(); + + if !left_column_stat.min.type_comparable(&right_column_stat.min) { + continue; + } + + let left_interval = + UniformSampleSet::new(left_column_stat.min.clone(), left_column_stat.max.clone()); + + let right_interval = + UniformSampleSet::new(right_column_stat.min.clone(), right_column_stat.max.clone()); + + if !left_interval.has_intersection(&right_interval)? { + join_selectivity = 0.0; + break; + } + + let (min, max) = left_interval.intersection(&right_interval)?; + + let has_histogram = + left_column_stat.histogram.is_some() && right_column_stat.histogram.is_some(); + + match left_column_stat.min { + Datum::Int(_) | Datum::UInt(_) | Datum::Float(_) if has_histogram => { + // Calculate join selectivity from histogram + let left_histogram = left_column_stat.histogram.as_ref().unwrap(); + let right_histogram = right_column_stat.histogram.as_ref().unwrap(); + let (cardinality, ndv) = + evaluate_by_histogram(left_histogram, right_histogram)?; + join_selectivity *= cardinality / (left_cardinality * right_cardinality); + + let new_statistic = NewStatistic { + min, + max, + ndv: Some(ndv), }; - let (left_index, right_index) = update_statistic( - left_statistics, - right_statistics, - left_condition, - right_condition, - NewStatistic { - min: new_min, - max: new_max, - ndv: new_ndv, - }, - ); - if card < join_card { - join_card = card; - join_card_updated = true; - left_column_index = left_index; - right_column_index = right_index; + + left_need_update_columns_stat.push((left_idx, new_statistic.clone())); + right_need_update_columns_stat.push((right_idx, new_statistic)); + } + _calculate_join_selectivity_from_ndv => { + let min_ndv = left_column_stat.ndv.min(right_column_stat.ndv); + let max_ndv = left_column_stat.ndv.max(right_column_stat.ndv); + + if max_ndv != 0.0 { + join_selectivity *= 1_f64 / max_ndv; } + + let new_statistic = NewStatistic { + min, + max, + ndv: Some(min_ndv), + }; + left_need_update_columns_stat.push((left_idx, new_statistic.clone())); + right_need_update_columns_stat.push((right_idx, new_statistic)); } - _ => continue, } } - if join_card_updated { - for (idx, left) in left_statistics.column_stats.iter_mut() { - if *idx == left_column_index { - if left.histogram.is_some() { - // Todo: find a better way to update accuracy histogram - left.histogram = if left.ndv as u64 <= 2 { - None - } else { - if matches!(left.min, Datum::Int(_) | Datum::UInt(_) | Datum::Float(_)) - { - left.min = Datum::Float(F64::from(left.min.to_double()?)); - left.max = Datum::Float(F64::from(left.max.to_double()?)); - } - Some(HistogramBuilder::from_ndv( - left.ndv as u64, - max(join_card as u64, left.ndv as u64), - Some((left.min.clone(), left.max.clone())), - DEFAULT_HISTOGRAM_BUCKETS, - )?) - }; - } + + let mut join_cardinality = left_cardinality * right_cardinality * join_selectivity; + join_cardinality = join_cardinality.min(left_cardinality * right_cardinality); + + for (need_update_columns_stat, column_stats) in [ + (left_need_update_columns_stat, left_statistics), + (right_need_update_columns_stat, right_statistics), + ] { + for (idx, mut new_statistic) in need_update_columns_stat { + let Some(col_stat) = column_stats.column_stats.get_mut(&idx) else { continue; + }; + + if let Some(min) = new_statistic.min.as_ref() { + col_stat.min = min.clone(); } - // Other columns' histograms are inaccurate, so make them None - left.histogram = None; - } - for (idx, right) in right_statistics.column_stats.iter_mut() { - if *idx == right_column_index { - if right.histogram.is_some() { - // Todo: find a better way to update accuracy histogram - right.histogram = if right.ndv as u64 <= 2 { - None - } else { - if matches!(right.min, Datum::Int(_) | Datum::UInt(_) | Datum::Float(_)) - { - right.min = Datum::Float(F64::from(right.min.to_double()?)); - right.max = Datum::Float(F64::from(right.max.to_double()?)); - } - Some(HistogramBuilder::from_ndv( - right.ndv as u64, - max(join_card as u64, right.ndv as u64), - Some((right.min.clone(), right.max.clone())), - DEFAULT_HISTOGRAM_BUCKETS, - )?) - }; + + if let Some(max) = new_statistic.max.as_ref() { + col_stat.max = max.clone(); + } + + if let Some(ndv) = new_statistic.ndv.as_ref() { + col_stat.ndv = *ndv; + } + + if col_stat.histogram.is_some() { + col_stat.histogram = None; + + let Some(min) = new_statistic.min.take() else { + continue; + }; + + let Some(max) = new_statistic.max.take() else { + continue; + }; + + let Some(ndv) = new_statistic.ndv.take() else { + continue; + }; + + if ndv as u64 <= 2 { + continue; } - continue; + + col_stat.histogram = Some(HistogramBuilder::from_ndv( + ndv as u64, + std::cmp::max(join_cardinality as u64, ndv as u64), + Some((min, max)), + DEFAULT_HISTOGRAM_BUCKETS, + )?); } - right.histogram = None; } } - Ok(join_card) + + Ok(join_cardinality) + } + + fn inner_join_non_eq_conditions_cardinality( + &self, + cardinality: f64, + statistics: &mut Statistics, + ) -> Result { + if self.non_equi_conditions.is_empty() || cardinality == 0.0 { + return Ok(cardinality); + } + + let mut selectivity = MAX_SELECTIVITY; + let mut sb = SelectivityEstimator::new(statistics, cardinality, HashSet::new()); + + for expr in &self.non_equi_conditions { + selectivity = selectivity.min(sb.compute_selectivity(expr, true)?); + } + + sb.update_other_statistic_by_selectivity(selectivity); + Ok(cardinality * selectivity) } pub fn has_null_equi_condition(&self) -> bool { @@ -432,22 +463,39 @@ impl Join { left_stat_info: Arc, right_stat_info: Arc, ) -> Result> { - let (mut left_cardinality, mut left_statistics) = ( + let (left_cardinality, mut left_statistics) = ( left_stat_info.cardinality, left_stat_info.statistics.clone(), ); - let (mut right_cardinality, mut right_statistics) = ( + let (right_cardinality, mut right_statistics) = ( right_stat_info.cardinality, right_stat_info.statistics.clone(), ); // Evaluating join cardinality using histograms. // If histogram is None, will evaluate using NDV. - let inner_join_cardinality = self.inner_join_cardinality( - &mut left_cardinality, - &mut right_cardinality, + let inner_join_cardinality = self.inner_join_eq_conditions_cardinality( + left_cardinality, + right_cardinality, &mut left_statistics, &mut right_statistics, )?; + + let mut join_statistics = { + let mut column_stats = HashMap::new(); + column_stats.extend(left_statistics.column_stats); + column_stats.extend(right_statistics.column_stats); + + Statistics { + precise_cardinality: None, + column_stats, + } + }; + + let inner_join_cardinality = self.inner_join_non_eq_conditions_cardinality( + inner_join_cardinality, + &mut join_statistics, + )?; + let cardinality = match self.join_type { JoinType::Inner | JoinType::Asof | JoinType::Cross => inner_join_cardinality, JoinType::Left | JoinType::LeftAsof => { @@ -466,21 +514,15 @@ impl Join { JoinType::LeftSingle | JoinType::RightMark | JoinType::LeftAnti => left_cardinality, JoinType::RightSingle | JoinType::LeftMark | JoinType::RightAnti => right_cardinality, }; + // Derive column statistics - let column_stats = if cardinality == 0.0 { - HashMap::new() - } else { - let mut column_stats = HashMap::new(); - column_stats.extend(left_statistics.column_stats); - column_stats.extend(right_statistics.column_stats); - column_stats - }; + if cardinality == 0.0 { + join_statistics.column_stats = HashMap::new(); + } + Ok(Arc::new(StatInfo { cardinality, - statistics: Statistics { - precise_cardinality: None, - column_stats, - }, + statistics: join_statistics, })) } @@ -811,11 +853,7 @@ impl Operator for Join { } } -fn evaluate_by_histogram( - left_hist: &Histogram, - right_hist: &Histogram, - new_ndv: &mut Option, -) -> Result { +fn evaluate_by_histogram(left_hist: &Histogram, right_hist: &Histogram) -> Result<(f64, f64)> { let mut card = 0.0; let mut all_ndv = 0.0; for left_bucket in left_hist.buckets.iter() { @@ -905,6 +943,8 @@ fn evaluate_by_histogram( if max_ndv > 0.0 { all_ndv += left_ndv.min(right_ndv); card += left_num_rows * right_num_rows / max_ndv; + + // (left_num_rows * right_num_rows / 0.2) + (left_num_rows * right_num_rows / 0.3) } } } else if has_intersection { @@ -912,50 +952,414 @@ fn evaluate_by_histogram( } } } - *new_ndv = Some(all_ndv.ceil()); - Ok(card) + + Ok((card, all_ndv.ceil())) } -fn evaluate_by_ndv( - left_stat: &ColumnStat, - right_stat: &ColumnStat, - left_cardinality: f64, - right_cardinality: f64, - new_ndv: &mut Option, -) -> f64 { - // Update column ndv - *new_ndv = Some(left_stat.ndv.min(right_stat.ndv)); - - let max_ndv = f64::max(left_stat.ndv, right_stat.ndv); - if max_ndv == 0.0 { - 0.0 - } else { - left_cardinality * right_cardinality / max_ndv +#[cfg(test)] +mod tests { + use std::collections::HashMap; + + use databend_common_exception::Result; + use databend_common_expression::types::DataType; + use databend_common_expression::types::NumberDataType; + use databend_common_storage::Datum; + use databend_common_storage::DEFAULT_HISTOGRAM_BUCKETS; + + use crate::optimizer::ir::ColumnStat; + use crate::optimizer::ir::HistogramBuilder; + use crate::optimizer::ir::Statistics; + use crate::plans::join::DEFAULT_EQ_SELECTIVITY; + use crate::plans::BoundColumnRef; + use crate::plans::Join; + use crate::plans::JoinEquiCondition; + use crate::plans::JoinType; + use crate::ColumnBindingBuilder; + use crate::IndexType; + use crate::ScalarExpr; + use crate::Visibility; + + fn create_column_stat(min: Datum, max: Datum, ndv: f64, has_histogram: bool) -> ColumnStat { + let histogram = if has_histogram { + Some( + HistogramBuilder::from_ndv( + ndv as u64, + ndv as u64 * 2, + Some((min.clone(), max.clone())), + DEFAULT_HISTOGRAM_BUCKETS, + ) + .unwrap(), + ) + } else { + None + }; + + ColumnStat { + min, + max, + ndv, + histogram, + null_count: 0, + } } -} -fn update_statistic( - left_statistics: &mut Statistics, - right_statistics: &mut Statistics, - left_condition: &ScalarExpr, - right_condition: &ScalarExpr, - new_stat: NewStatistic, -) -> (usize, usize) { - let left_index = *left_condition.used_columns().iter().next().unwrap(); - let right_index = *right_condition.used_columns().iter().next().unwrap(); - let left_col_stat = left_statistics.column_stats.get_mut(&left_index).unwrap(); - let right_col_stat = right_statistics.column_stats.get_mut(&right_index).unwrap(); - if let Some(new_min) = new_stat.min { - left_col_stat.min = new_min.clone(); - right_col_stat.min = new_min; - } - if let Some(new_max) = new_stat.max { - left_col_stat.max = new_max.clone(); - right_col_stat.max = new_max; - } - if let Some(new_ndv) = new_stat.ndv { - left_col_stat.ndv = new_ndv; - right_col_stat.ndv = new_ndv; - } - (left_index, right_index) + fn create_equi_condition(left_col: IndexType, right_col: IndexType) -> JoinEquiCondition { + let left_column = ColumnBindingBuilder::new( + String::from("column_name"), + left_col, + Box::new(DataType::Number(NumberDataType::Float32)), + Visibility::Visible, + ); + + let left_bound_column_ref = BoundColumnRef { + span: None, + column: left_column.build(), + }; + + let right_column = ColumnBindingBuilder::new( + String::from("column_name"), + right_col, + Box::new(DataType::Number(NumberDataType::Float32)), + Visibility::Visible, + ); + + let right_bound_column_ref = BoundColumnRef { + span: None, + column: right_column.build(), + }; + + JoinEquiCondition::new( + ScalarExpr::BoundColumnRef(left_bound_column_ref), + ScalarExpr::BoundColumnRef(right_bound_column_ref), + false, + ) + } + + fn create_join_with_equi_conditions(conditions: Vec<(IndexType, IndexType)>) -> Join { + Join { + equi_conditions: conditions + .into_iter() + .map(|(l, r)| create_equi_condition(l, r)) + .collect(), + non_equi_conditions: vec![], + join_type: JoinType::Cross, + marker_index: None, + from_correlated_subquery: false, + need_hold_hash_table: false, + is_lateral: false, + single_to_inner: None, + build_side_cache_info: None, + } + } + + #[test] + fn test_inner_join_cardinality_basic() -> Result<()> { + let join = create_join_with_equi_conditions(vec![(1, 2)]); + + let mut left_stats = Statistics { + precise_cardinality: None, + column_stats: HashMap::from([( + 1, + create_column_stat(Datum::Int(1), Datum::Int(100), 100.0, false), + )]), + }; + + let mut right_stats = Statistics { + precise_cardinality: None, + column_stats: HashMap::from([( + 2, + create_column_stat(Datum::Int(1), Datum::Int(100), 100.0, false), + )]), + }; + + let result = join.inner_join_eq_conditions_cardinality( + 1000.0, // left_cardinality + 2000.0, // right_cardinality + &mut left_stats, + &mut right_stats, + )?; + + assert_eq!((result - 200.0).abs(), 20000.0 - 200.0); // 1000 * 2000 * (1/100) = 20000 + assert_eq!(left_stats.column_stats[&1].ndv, 100.0); + assert_eq!(right_stats.column_stats[&2].ndv, 100.0); + Ok(()) + } + + #[test] + fn test_inner_join_cardinality_zero_ndv() -> Result<()> { + let join = create_join_with_equi_conditions(vec![(1, 2)]); + + let mut left_stats = Statistics { + precise_cardinality: None, + column_stats: HashMap::from([( + 1, + create_column_stat(Datum::Int(1), Datum::Int(100), 0.0, false), + )]), + }; + + let mut right_stats = Statistics { + precise_cardinality: None, + column_stats: HashMap::from([( + 2, + create_column_stat(Datum::Int(1), Datum::Int(100), 0.0, false), + )]), + }; + + let result = join.inner_join_eq_conditions_cardinality( + 1000.0, + 2000.0, + &mut left_stats, + &mut right_stats, + )?; + + assert_eq!(result, 2000000.0); + Ok(()) + } + + #[test] + fn test_inner_join_cardinality_no_overlap() -> Result<()> { + let join = create_join_with_equi_conditions(vec![(1, 2)]); + + let mut left_stats = Statistics { + precise_cardinality: None, + column_stats: HashMap::from([( + 1, + create_column_stat(Datum::Int(1), Datum::Int(100), 100.0, false), + )]), + }; + + let mut right_stats = Statistics { + precise_cardinality: None, + column_stats: HashMap::from([( + 2, + create_column_stat(Datum::Int(200), Datum::Int(300), 100.0, false), + )]), + }; + + let result = join.inner_join_eq_conditions_cardinality( + 1000.0, + 2000.0, + &mut left_stats, + &mut right_stats, + )?; + + assert_eq!(result, 0.0); + Ok(()) + } + + #[test] + fn test_inner_join_cardinality_with_histogram() -> Result<()> { + let join = create_join_with_equi_conditions(vec![(1, 2)]); + + let mut left_stats = Statistics { + precise_cardinality: None, + column_stats: HashMap::from([( + 1, + create_column_stat(Datum::Int(1), Datum::Int(100), 100.0, true), + )]), + }; + + let mut right_stats = Statistics { + precise_cardinality: None, + column_stats: HashMap::from([( + 2, + create_column_stat(Datum::Int(50), Datum::Int(150), 100.0, true), + )]), + }; + + let result = join.inner_join_eq_conditions_cardinality( + 1000.0, + 2000.0, + &mut left_stats, + &mut right_stats, + )?; + + assert!(result > 0.0); + assert!(result < 1000.0 * 2000.0); + + assert_eq!(left_stats.column_stats[&1].min, Datum::Int(50)); + assert_eq!(left_stats.column_stats[&1].max, Datum::Int(100)); + assert_eq!(right_stats.column_stats[&2].min, Datum::Int(50)); + assert_eq!(right_stats.column_stats[&2].max, Datum::Int(100)); + Ok(()) + } + + #[test] + fn test_inner_join_cardinality_missing_stats() -> Result<()> { + let join = create_join_with_equi_conditions(vec![(1, 2), (3, 4)]); + + let mut left_stats = Statistics { + precise_cardinality: None, + column_stats: HashMap::from([( + 1, + create_column_stat(Datum::Int(1), Datum::Int(100), 100.0, false), + )]), + }; + + let mut right_stats = Statistics { + precise_cardinality: None, + column_stats: HashMap::from([ + ( + 2, + create_column_stat(Datum::Int(1), Datum::Int(100), 100.0, false), + ), + ( + 4, + create_column_stat(Datum::Int(1), Datum::Int(50), 50.0, false), + ), + ]), + }; + + let result = join.inner_join_eq_conditions_cardinality( + 1000.0, + 2000.0, + &mut left_stats, + &mut right_stats, + )?; + + let expected = 1000.0 * 2000.0 * (1.0 / 100.0) * DEFAULT_EQ_SELECTIVITY; + assert_eq!(result, expected); + Ok(()) + } + + #[test] + fn test_inner_join_cardinality_zero_cardinality() -> Result<()> { + let join = create_join_with_equi_conditions(vec![(1, 2)]); + + let mut left_stats = Statistics { + precise_cardinality: None, + column_stats: HashMap::from([( + 1, + create_column_stat(Datum::Int(1), Datum::Int(100), 100.0, false), + )]), + }; + + let mut right_stats = Statistics { + precise_cardinality: None, + column_stats: HashMap::from([( + 2, + create_column_stat(Datum::Int(1), Datum::Int(100), 100.0, false), + )]), + }; + + let result = + join.inner_join_eq_conditions_cardinality(0.0, 0.0, &mut left_stats, &mut right_stats)?; + + assert_eq!(result, 0.0); + Ok(()) + } + + #[test] + fn test_inner_join_cardinality_type_incompatible() -> Result<()> { + let join = create_join_with_equi_conditions(vec![(1, 2)]); + + let mut left_stats = Statistics { + precise_cardinality: None, + column_stats: HashMap::from([( + 1, + create_column_stat(Datum::Int(1), Datum::Int(100), 100.0, false), + )]), + }; + + let mut right_stats = Statistics { + precise_cardinality: None, + column_stats: HashMap::from([( + 2, + create_column_stat( + Datum::Bytes("a".as_bytes().to_vec()), + Datum::Bytes("z".as_bytes().to_vec()), + 26.0, + false, + ), + )]), + }; + + let result = join.inner_join_eq_conditions_cardinality( + 1000.0, + 2000.0, + &mut left_stats, + &mut right_stats, + )?; + + assert!((result - (1000.0 * 2000.0)).abs() < f64::EPSILON); + Ok(()) + } + + #[test] + fn test_inner_join_cardinality_multiple_conditions() -> Result<()> { + let join = create_join_with_equi_conditions(vec![(1, 2), (3, 4)]); + + let mut left_stats = Statistics { + precise_cardinality: None, + column_stats: HashMap::from([ + ( + 1, + create_column_stat(Datum::Int(1), Datum::Int(100), 100.0, false), + ), + ( + 3, + create_column_stat(Datum::Int(1), Datum::Int(50), 50.0, false), + ), + ]), + }; + + let mut right_stats = Statistics { + precise_cardinality: None, + column_stats: HashMap::from([ + ( + 2, + create_column_stat(Datum::Int(1), Datum::Int(100), 100.0, false), + ), + ( + 4, + create_column_stat(Datum::Int(1), Datum::Int(50), 50.0, false), + ), + ]), + }; + + let result = join.inner_join_eq_conditions_cardinality( + 1000.0, + 2000.0, + &mut left_stats, + &mut right_stats, + )?; + + let expected = 1000.0 * 2000.0 * (1.0 / 100.0) * (1.0 / 50.0); + assert!((result - expected).abs() < f64::EPSILON); + Ok(()) + } + + #[test] + fn test_inner_join_cardinality_stats_update() -> Result<()> { + let join = create_join_with_equi_conditions(vec![(1, 2)]); + + let mut left_stats = Statistics { + precise_cardinality: None, + column_stats: HashMap::from([( + 1, + create_column_stat(Datum::Int(1), Datum::Int(100), 100.0, true), + )]), + }; + + let mut right_stats = Statistics { + precise_cardinality: None, + column_stats: HashMap::from([( + 2, + create_column_stat(Datum::Int(50), Datum::Int(150), 100.0, true), + )]), + }; + + let _ = join.inner_join_eq_conditions_cardinality( + 1000.0, + 2000.0, + &mut left_stats, + &mut right_stats, + )?; + + assert_eq!(left_stats.column_stats[&1].min, Datum::Int(50)); + assert_eq!(left_stats.column_stats[&1].max, Datum::Int(100)); + assert_eq!(right_stats.column_stats[&2].min, Datum::Int(50)); + assert_eq!(right_stats.column_stats[&2].max, Datum::Int(100)); + Ok(()) + } } diff --git a/tests/sqllogictests/suites/mode/cluster/exchange.test b/tests/sqllogictests/suites/mode/cluster/exchange.test index 01dd974fb468c..8c0e6562c59b6 100644 --- a/tests/sqllogictests/suites/mode/cluster/exchange.test +++ b/tests/sqllogictests/suites/mode/cluster/exchange.test @@ -16,7 +16,7 @@ Exchange ├── filters: [] ├── build join filters: │ └── filter id:0, build key:t.number (#0), probe key:t1.number (#1), filter type:inlist,min_max - ├── estimated rows: 2.00 + ├── estimated rows: 0.01 ├── Exchange(Build) │ ├── output columns: [t.number (#0)] │ ├── exchange type: Broadcast @@ -55,7 +55,7 @@ Exchange ├── filters: [] ├── build join filters: │ └── filter id:1, build key:t.number (#0), probe key:t2.number (#2), filter type:inlist,min_max - ├── estimated rows: 6.00 + ├── estimated rows: 0.00 ├── Exchange(Build) │ ├── output columns: [t1.number (#1), t.number (#0)] │ ├── exchange type: Broadcast @@ -68,7 +68,7 @@ Exchange │ ├── filters: [] │ ├── build join filters: │ │ └── filter id:0, build key:t.number (#0), probe key:t1.number (#1), filter type:inlist,min_max - │ ├── estimated rows: 2.00 + │ ├── estimated rows: 0.01 │ ├── Exchange(Build) │ │ ├── output columns: [t.number (#0)] │ │ ├── exchange type: Broadcast @@ -117,7 +117,7 @@ Exchange ├── filters: [] ├── build join filters: │ └── filter id:1, build key:t.b (#1), probe key:t2.number (#3), filter type:inlist,min_max - ├── estimated rows: 6.00 + ├── estimated rows: 0.00 ├── Exchange(Build) │ ├── output columns: [t1.number (#2), b (#1), numbers.number (#0)] │ ├── exchange type: Broadcast @@ -130,7 +130,7 @@ Exchange │ ├── filters: [] │ ├── build join filters: │ │ └── filter id:0, build key:t.a (#0), probe key:t1.number (#2), filter type:inlist,min_max - │ ├── estimated rows: 2.00 + │ ├── estimated rows: 0.01 │ ├── Exchange(Build) │ │ ├── output columns: [numbers.number (#0), b (#1)] │ │ ├── exchange type: Broadcast @@ -181,7 +181,7 @@ Exchange ├── probe keys: [CAST(t1.number (#2) AS UInt64 NULL)] ├── keys is null equal: [false] ├── filters: [] - ├── estimated rows: 2.00 + ├── estimated rows: 0.01 ├── Exchange(Build) │ ├── output columns: [sum(number) (#1), numbers.number (#0)] │ ├── exchange type: Broadcast @@ -266,7 +266,7 @@ Fragment 2: ├── probe keys: [CAST(t1.number (#2) AS UInt64 NULL)] ├── keys is null equal: [false] ├── filters: [] - ├── estimated rows: 2.00 + ├── estimated rows: 0.01 ├── ExchangeSource(Build) │ ├── output columns: [sum(number) (#1), numbers.number (#0)] │ └── source fragment: [1] @@ -313,7 +313,7 @@ AggregateFinal ├── estimated rows: 1.00 └── UnionAll ├── output columns: [a (#4)] - ├── estimated rows: 10100.00 + ├── estimated rows: 50.50 ├── HashJoin │ ├── output columns: [numbers.number (#0)] │ ├── join type: INNER @@ -323,7 +323,7 @@ AggregateFinal │ ├── filters: [] │ ├── build join filters: │ │ └── filter id:0, build key:t1.a (#0), probe key:t2.a (#1), filter type:inlist,min_max - │ ├── estimated rows: 10000.00 + │ ├── estimated rows: 50.00 │ ├── Exchange(Build) │ │ ├── output columns: [numbers.number (#0)] │ │ ├── exchange type: Broadcast @@ -355,7 +355,7 @@ AggregateFinal ├── filters: [] ├── build join filters: │ └── filter id:1, build key:t3.a (#3), probe key:t1.a (#2), filter type:inlist,min_max - ├── estimated rows: 100.00 + ├── estimated rows: 0.50 ├── Exchange(Build) │ ├── output columns: [numbers.number (#3)] │ ├── exchange type: Broadcast @@ -403,7 +403,7 @@ AggregateFinal ├── estimated rows: 1.00 └── UnionAll ├── output columns: [a (#5)] - ├── estimated rows: 10001.00 + ├── estimated rows: 51.00 ├── Exchange │ ├── output columns: [numbers.number (#0)] │ ├── exchange type: Merge @@ -416,7 +416,7 @@ AggregateFinal │ ├── filters: [] │ ├── build join filters: │ │ └── filter id:0, build key:t1.a (#0), probe key:t2.a (#1), filter type:inlist,min_max - │ ├── estimated rows: 10000.00 + │ ├── estimated rows: 50.00 │ ├── Exchange(Build) │ │ ├── output columns: [numbers.number (#0)] │ │ ├── exchange type: Broadcast @@ -460,7 +460,7 @@ AggregateFinal ├── filters: [] ├── build join filters: │ └── filter id:1, build key:t3.a (#3), probe key:t1.a (#2), filter type:inlist,min_max - ├── estimated rows: 100.00 + ├── estimated rows: 0.50 ├── Exchange(Build) │ ├── output columns: [numbers.number (#3)] │ ├── exchange type: Broadcast @@ -510,7 +510,7 @@ AggregateFinal ├── estimated rows: 1.00 └── UnionAll ├── output columns: [a (#3)] - ├── estimated rows: 10010.00 + ├── estimated rows: 60.00 ├── HashJoin │ ├── output columns: [numbers.number (#0)] │ ├── join type: INNER @@ -520,7 +520,7 @@ AggregateFinal │ ├── filters: [] │ ├── build join filters: │ │ └── filter id:0, build key:t1.a (#0), probe key:t2.a (#1), filter type:inlist,min_max - │ ├── estimated rows: 10000.00 + │ ├── estimated rows: 50.00 │ ├── Exchange(Build) │ │ ├── output columns: [numbers.number (#0)] │ │ ├── exchange type: Broadcast @@ -632,7 +632,7 @@ Exchange ├── filters: [] ├── build join filters(distributed): │ └── filter id:0, build key:t2.number (#1), probe key:t1.number (#0), filter type:inlist,min_max - ├── estimated rows: 200.00 + ├── estimated rows: 1.00 ├── Exchange(Build) │ ├── output columns: [t2.number (#1)] │ ├── exchange type: Hash(t2.number (#1)) diff --git a/tests/sqllogictests/suites/mode/cluster/explain_v2.test b/tests/sqllogictests/suites/mode/cluster/explain_v2.test index 5024eb471c94d..5171c18915114 100644 --- a/tests/sqllogictests/suites/mode/cluster/explain_v2.test +++ b/tests/sqllogictests/suites/mode/cluster/explain_v2.test @@ -357,7 +357,7 @@ AggregateFinal ├── probe keys: [x.a (#2)] ├── keys is null equal: [false] ├── filters: [] - ├── estimated rows: 50.00 + ├── estimated rows: 5.00 ├── Exchange(Build) │ ├── output columns: [numbers.number (#1)] │ ├── exchange type: Hash(CAST(y.a (#1) AS UInt64 NULL)) diff --git a/tests/sqllogictests/suites/mode/cluster/memo/aggregate_property.test b/tests/sqllogictests/suites/mode/cluster/memo/aggregate_property.test index 087095317314d..2e1c2083f3306 100644 --- a/tests/sqllogictests/suites/mode/cluster/memo/aggregate_property.test +++ b/tests/sqllogictests/suites/mode/cluster/memo/aggregate_property.test @@ -63,22 +63,22 @@ Memo │ └── #0 Join [#0, #3] ├── Group #5 │ ├── Best properties -│ │ ├── { dist: Any }: expr: #0, cost: 4420.000, children: [{ dist: Any }] -│ │ └── { dist: Serial }: expr: #1, cost: 7920.000, children: [{ dist: Any }] +│ │ ├── { dist: Any }: expr: #0, cost: 4410.010, children: [{ dist: Any }] +│ │ └── { dist: Serial }: expr: #1, cost: 4413.510, children: [{ dist: Any }] │ ├── #0 EvalScalar [#4] │ └── #1 Exchange: (Merge) [#5] ├── Group #6 │ ├── Best properties -│ │ └── { dist: Serial }: expr: #0, cost: 7970.000, children: [{ dist: Serial }] +│ │ └── { dist: Serial }: expr: #0, cost: 4413.560, children: [{ dist: Serial }] │ ├── #0 Aggregate [#5] │ └── #1 Exchange: (Merge) [#6] ├── Group #7 │ ├── Best properties -│ │ └── { dist: Any }: expr: #0, cost: 7975.000, children: [{ dist: Serial }] +│ │ └── { dist: Any }: expr: #0, cost: 4418.560, children: [{ dist: Serial }] │ └── #0 Aggregate [#6] └── Group #8 ├── Best properties - │ └── { dist: Serial }: expr: #0, cost: 7976.000, children: [{ dist: Any }] + │ └── { dist: Serial }: expr: #0, cost: 4419.560, children: [{ dist: Any }] └── #0 EvalScalar [#7] query T @@ -126,22 +126,22 @@ Memo │ └── #0 Join [#0, #3] ├── Group #5 │ ├── Best properties -│ │ ├── { dist: Any }: expr: #0, cost: 4420.000, children: [{ dist: Any }] -│ │ └── { dist: Hash(t_10.a (#0)::Int32 NULL) }: expr: #1, cost: 4930.000, children: [{ dist: Any }] +│ │ ├── { dist: Any }: expr: #0, cost: 4410.010, children: [{ dist: Any }] +│ │ └── { dist: Hash(t_10.a (#0)::Int32 NULL) }: expr: #1, cost: 4410.520, children: [{ dist: Any }] │ ├── #0 EvalScalar [#4] │ └── #1 Exchange: (Hash(t_10.a (#0)::Int32 NULL)) [#5] ├── Group #6 │ ├── Best properties -│ │ └── { dist: Any }: expr: #0, cost: 4980.000, children: [{ dist: Hash(t_10.a (#0)::Int32 NULL) }] +│ │ └── { dist: Any }: expr: #0, cost: 4410.570, children: [{ dist: Hash(t_10.a (#0)::Int32 NULL) }] │ └── #0 Aggregate [#5] ├── Group #7 │ ├── Best properties -│ │ └── { dist: Any }: expr: #0, cost: 5030.000, children: [{ dist: Any }] +│ │ └── { dist: Any }: expr: #0, cost: 4410.620, children: [{ dist: Any }] │ └── #0 Aggregate [#6] ├── Group #8 │ ├── Best properties -│ │ ├── { dist: Any }: expr: #0, cost: 5040.000, children: [{ dist: Any }] -│ │ └── { dist: Serial }: expr: #4, cost: 8540.000, children: [{ dist: Any }] +│ │ ├── { dist: Any }: expr: #0, cost: 4410.630, children: [{ dist: Any }] +│ │ └── { dist: Serial }: expr: #4, cost: 4414.130, children: [{ dist: Any }] │ ├── #0 EvalScalar [#7] │ ├── #1 EvalScalar [#14] │ ├── #2 EvalScalar [#20] @@ -166,16 +166,16 @@ Memo ├── Group #12 │ ├── Best properties │ │ ├── { dist: Any }: expr: #0, cost: 66410.000, children: [{ dist: Any }, { dist: Broadcast }] -│ │ └── { dist: Hash(t_10.a (#0)::Int32 NULL) }: expr: #1, cost: 66920.000, children: [{ dist: Any }] +│ │ └── { dist: Hash(t_10.a (#0)::Int32 NULL) }: expr: #1, cost: 66410.510, children: [{ dist: Any }] │ ├── #0 Join [#11, #3] │ └── #1 Exchange: (Hash(t_10.a (#0)::Int32 NULL)) [#12] ├── Group #13 │ ├── Best properties -│ │ └── { dist: Any }: expr: #0, cost: 66970.000, children: [{ dist: Hash(t_10.a (#0)::Int32 NULL) }] +│ │ └── { dist: Any }: expr: #0, cost: 66410.560, children: [{ dist: Hash(t_10.a (#0)::Int32 NULL) }] │ └── #0 Aggregate [#12] ├── Group #14 │ ├── Best properties -│ │ └── { dist: Any }: expr: #0, cost: 67020.000, children: [{ dist: Any }] +│ │ └── { dist: Any }: expr: #0, cost: 66410.610, children: [{ dist: Any }] │ └── #0 Aggregate [#13] ├── Group #15 │ ├── Best properties @@ -197,17 +197,17 @@ Memo │ └── #0 Join [#0, #16] ├── Group #18 │ ├── Best properties -│ │ ├── { dist: Any }: expr: #0, cost: 5030.000, children: [{ dist: Any }] -│ │ └── { dist: Hash(t_10.a (#0)::Int32 NULL) }: expr: #1, cost: 5540.000, children: [{ dist: Any }] +│ │ ├── { dist: Any }: expr: #0, cost: 5020.010, children: [{ dist: Any }] +│ │ └── { dist: Hash(t_10.a (#0)::Int32 NULL) }: expr: #1, cost: 5020.520, children: [{ dist: Any }] │ ├── #0 EvalScalar [#17] │ └── #1 Exchange: (Hash(t_10.a (#0)::Int32 NULL)) [#18] ├── Group #19 │ ├── Best properties -│ │ └── { dist: Any }: expr: #0, cost: 5590.000, children: [{ dist: Hash(t_10.a (#0)::Int32 NULL) }] +│ │ └── { dist: Any }: expr: #0, cost: 5020.570, children: [{ dist: Hash(t_10.a (#0)::Int32 NULL) }] │ └── #0 Aggregate [#18] ├── Group #20 │ ├── Best properties -│ │ └── { dist: Any }: expr: #0, cost: 5640.000, children: [{ dist: Any }] +│ │ └── { dist: Any }: expr: #0, cost: 5020.620, children: [{ dist: Any }] │ └── #0 Aggregate [#19] ├── Group #21 │ ├── Best properties @@ -215,17 +215,17 @@ Memo │ └── #0 Join [#11, #16] ├── Group #22 │ ├── Best properties -│ │ ├── { dist: Any }: expr: #0, cost: 67030.000, children: [{ dist: Any }] -│ │ └── { dist: Hash(t_10.a (#0)::Int32 NULL) }: expr: #1, cost: 67540.000, children: [{ dist: Any }] +│ │ ├── { dist: Any }: expr: #0, cost: 67020.010, children: [{ dist: Any }] +│ │ └── { dist: Hash(t_10.a (#0)::Int32 NULL) }: expr: #1, cost: 67020.520, children: [{ dist: Any }] │ ├── #0 EvalScalar [#21] │ └── #1 Exchange: (Hash(t_10.a (#0)::Int32 NULL)) [#22] ├── Group #23 │ ├── Best properties -│ │ └── { dist: Any }: expr: #0, cost: 67590.000, children: [{ dist: Hash(t_10.a (#0)::Int32 NULL) }] +│ │ └── { dist: Any }: expr: #0, cost: 67020.570, children: [{ dist: Hash(t_10.a (#0)::Int32 NULL) }] │ └── #0 Aggregate [#22] └── Group #24 ├── Best properties - │ └── { dist: Any }: expr: #0, cost: 67640.000, children: [{ dist: Any }] + │ └── { dist: Any }: expr: #0, cost: 67020.620, children: [{ dist: Any }] └── #0 Aggregate [#23] diff --git a/tests/sqllogictests/suites/mode/cluster/memo/join_property.test b/tests/sqllogictests/suites/mode/cluster/memo/join_property.test index 3d49805b0577b..d652138ae34bf 100644 --- a/tests/sqllogictests/suites/mode/cluster/memo/join_property.test +++ b/tests/sqllogictests/suites/mode/cluster/memo/join_property.test @@ -62,8 +62,8 @@ Memo │ └── #0 Join [#0, #3] └── Group #5 ├── Best properties - │ ├── { dist: Any }: expr: #0, cost: 4420.000, children: [{ dist: Any }] - │ └── { dist: Serial }: expr: #1, cost: 7920.000, children: [{ dist: Any }] + │ ├── { dist: Any }: expr: #0, cost: 4410.010, children: [{ dist: Any }] + │ └── { dist: Serial }: expr: #1, cost: 4413.510, children: [{ dist: Any }] ├── #0 EvalScalar [#4] └── #1 Exchange: (Merge) [#5] diff --git a/tests/sqllogictests/suites/mode/cluster/memo/mix_property.test b/tests/sqllogictests/suites/mode/cluster/memo/mix_property.test index 15274e1c00eed..a26c2262a8d2c 100644 --- a/tests/sqllogictests/suites/mode/cluster/memo/mix_property.test +++ b/tests/sqllogictests/suites/mode/cluster/memo/mix_property.test @@ -66,22 +66,22 @@ Memo │ └── #0 Join [#0, #3] ├── Group #5 │ ├── Best properties -│ │ ├── { dist: Any }: expr: #0, cost: 4420.000, children: [{ dist: Any }] -│ │ └── { dist: Hash(t_100.a (#1)::Int32 NULL) }: expr: #1, cost: 4930.000, children: [{ dist: Any }] +│ │ ├── { dist: Any }: expr: #0, cost: 4410.010, children: [{ dist: Any }] +│ │ └── { dist: Hash(t_100.a (#1)::Int32 NULL) }: expr: #1, cost: 4410.520, children: [{ dist: Any }] │ ├── #0 EvalScalar [#4] │ └── #1 Exchange: (Hash(t_100.a (#1)::Int32 NULL)) [#5] ├── Group #6 │ ├── Best properties -│ │ └── { dist: Any }: expr: #0, cost: 4980.000, children: [{ dist: Hash(t_100.a (#1)::Int32 NULL) }] +│ │ └── { dist: Any }: expr: #0, cost: 4410.570, children: [{ dist: Hash(t_100.a (#1)::Int32 NULL) }] │ └── #0 Aggregate [#5] ├── Group #7 │ ├── Best properties -│ │ └── { dist: Any }: expr: #0, cost: 5030.000, children: [{ dist: Any }] +│ │ └── { dist: Any }: expr: #0, cost: 4410.620, children: [{ dist: Any }] │ └── #0 Aggregate [#6] ├── Group #8 │ ├── Best properties -│ │ ├── { dist: Any }: expr: #0, cost: 5040.000, children: [{ dist: Any }] -│ │ └── { dist: Serial }: expr: #4, cost: 8540.000, children: [{ dist: Any }] +│ │ ├── { dist: Any }: expr: #0, cost: 4410.630, children: [{ dist: Any }] +│ │ └── { dist: Serial }: expr: #4, cost: 4414.130, children: [{ dist: Any }] │ ├── #0 EvalScalar [#7] │ ├── #1 EvalScalar [#16] │ ├── #2 EvalScalar [#22] @@ -89,11 +89,11 @@ Memo │ └── #4 Exchange: (Merge) [#8] ├── Group #9 │ ├── Best properties -│ │ └── { dist: Serial }: expr: #0, cost: 8550.000, children: [{ dist: Serial }] +│ │ └── { dist: Serial }: expr: #0, cost: 4414.140, children: [{ dist: Serial }] │ └── #0 Sort [#8] ├── Group #10 │ ├── Best properties -│ │ └── { dist: Serial }: expr: #0, cost: 8560.000, children: [{ dist: Serial }] +│ │ └── { dist: Serial }: expr: #0, cost: 4414.150, children: [{ dist: Serial }] │ └── #0 Limit [#9] ├── Group #11 │ ├── Best properties @@ -114,16 +114,16 @@ Memo ├── Group #14 │ ├── Best properties │ │ ├── { dist: Any }: expr: #0, cost: 66410.000, children: [{ dist: Any }, { dist: Broadcast }] -│ │ └── { dist: Hash(t_100.a (#1)::Int32 NULL) }: expr: #1, cost: 66920.000, children: [{ dist: Any }] +│ │ └── { dist: Hash(t_100.a (#1)::Int32 NULL) }: expr: #1, cost: 66410.510, children: [{ dist: Any }] │ ├── #0 Join [#13, #3] │ └── #1 Exchange: (Hash(t_100.a (#1)::Int32 NULL)) [#14] ├── Group #15 │ ├── Best properties -│ │ └── { dist: Any }: expr: #0, cost: 66970.000, children: [{ dist: Hash(t_100.a (#1)::Int32 NULL) }] +│ │ └── { dist: Any }: expr: #0, cost: 66410.560, children: [{ dist: Hash(t_100.a (#1)::Int32 NULL) }] │ └── #0 Aggregate [#14] ├── Group #16 │ ├── Best properties -│ │ └── { dist: Any }: expr: #0, cost: 67020.000, children: [{ dist: Any }] +│ │ └── { dist: Any }: expr: #0, cost: 66410.610, children: [{ dist: Any }] │ └── #0 Aggregate [#15] ├── Group #17 │ ├── Best properties @@ -145,17 +145,17 @@ Memo │ └── #0 Join [#0, #18] ├── Group #20 │ ├── Best properties -│ │ ├── { dist: Any }: expr: #0, cost: 9130.000, children: [{ dist: Any }] -│ │ └── { dist: Hash(t_100.a (#1)::Int32 NULL) }: expr: #1, cost: 9640.000, children: [{ dist: Any }] +│ │ ├── { dist: Any }: expr: #0, cost: 9120.010, children: [{ dist: Any }] +│ │ └── { dist: Hash(t_100.a (#1)::Int32 NULL) }: expr: #1, cost: 9120.520, children: [{ dist: Any }] │ ├── #0 EvalScalar [#19] │ └── #1 Exchange: (Hash(t_100.a (#1)::Int32 NULL)) [#20] ├── Group #21 │ ├── Best properties -│ │ └── { dist: Any }: expr: #0, cost: 9690.000, children: [{ dist: Hash(t_100.a (#1)::Int32 NULL) }] +│ │ └── { dist: Any }: expr: #0, cost: 9120.570, children: [{ dist: Hash(t_100.a (#1)::Int32 NULL) }] │ └── #0 Aggregate [#20] ├── Group #22 │ ├── Best properties -│ │ └── { dist: Any }: expr: #0, cost: 9740.000, children: [{ dist: Any }] +│ │ └── { dist: Any }: expr: #0, cost: 9120.620, children: [{ dist: Any }] │ └── #0 Aggregate [#21] ├── Group #23 │ ├── Best properties @@ -163,17 +163,17 @@ Memo │ └── #0 Join [#13, #18] ├── Group #24 │ ├── Best properties -│ │ ├── { dist: Any }: expr: #0, cost: 71130.000, children: [{ dist: Any }] -│ │ └── { dist: Hash(t_100.a (#1)::Int32 NULL) }: expr: #1, cost: 71640.000, children: [{ dist: Any }] +│ │ ├── { dist: Any }: expr: #0, cost: 71120.010, children: [{ dist: Any }] +│ │ └── { dist: Hash(t_100.a (#1)::Int32 NULL) }: expr: #1, cost: 71120.520, children: [{ dist: Any }] │ ├── #0 EvalScalar [#23] │ └── #1 Exchange: (Hash(t_100.a (#1)::Int32 NULL)) [#24] ├── Group #25 │ ├── Best properties -│ │ └── { dist: Any }: expr: #0, cost: 71690.000, children: [{ dist: Hash(t_100.a (#1)::Int32 NULL) }] +│ │ └── { dist: Any }: expr: #0, cost: 71120.570, children: [{ dist: Hash(t_100.a (#1)::Int32 NULL) }] │ └── #0 Aggregate [#24] └── Group #26 ├── Best properties - │ └── { dist: Any }: expr: #0, cost: 71740.000, children: [{ dist: Any }] + │ └── { dist: Any }: expr: #0, cost: 71120.620, children: [{ dist: Any }] └── #0 Aggregate [#25] diff --git a/tests/sqllogictests/suites/mode/cluster/merge_into_non_equal_distributed.test b/tests/sqllogictests/suites/mode/cluster/merge_into_non_equal_distributed.test index 9c39208142a1e..455452edfa12b 100644 --- a/tests/sqllogictests/suites/mode/cluster/merge_into_non_equal_distributed.test +++ b/tests/sqllogictests/suites/mode/cluster/merge_into_non_equal_distributed.test @@ -111,7 +111,7 @@ CommitSink ├── probe keys: [] ├── keys is null equal: [] ├── filters: [t1.a (#1) > t.a (#0)] - ├── estimated rows: 15.00 + ├── estimated rows: 3.00 ├── TableScan(Build) │ ├── table: default.default.t2 │ ├── output columns: [a (#0)] @@ -149,7 +149,7 @@ CommitSink ├── probe keys: [] ├── keys is null equal: [] ├── filters: [t1.a (#1) < t2.a (#0)] - ├── estimated rows: 15.00 + ├── estimated rows: 3.00 ├── TableScan(Build) │ ├── table: default.default.t2 │ ├── output columns: [a (#0)] diff --git a/tests/sqllogictests/suites/mode/standalone/ee/explain_virtual_column.test b/tests/sqllogictests/suites/mode/standalone/ee/explain_virtual_column.test index 65d0a19b01881..3255d98bb4cd2 100644 --- a/tests/sqllogictests/suites/mode/standalone/ee/explain_virtual_column.test +++ b/tests/sqllogictests/suites/mode/standalone/ee/explain_virtual_column.test @@ -377,11 +377,11 @@ AggregateFinal ├── output columns: [sum(t1.a) (#9), t2.v['b']['c'] (#5)] ├── group by: [v['b']['c']] ├── aggregate functions: [sum(a)] -├── estimated rows: 1.00 +├── estimated rows: 0.01 └── AggregatePartial ├── group by: [v['b']['c']] ├── aggregate functions: [sum(a)] - ├── estimated rows: 1.00 + ├── estimated rows: 0.01 └── HashJoin ├── output columns: [t2.v['b']['c'] (#5), t1.a (#7)] ├── join type: INNER @@ -389,7 +389,7 @@ AggregateFinal ├── probe keys: [CAST(t2.v['b']['c'] (#5) AS Int32 NULL)] ├── keys is null equal: [false] ├── filters: [] - ├── estimated rows: 1.00 + ├── estimated rows: 0.01 ├── TableScan(Build) │ ├── table: default.test_virtual_db.t1 │ ├── output columns: [a (#7)] diff --git a/tests/sqllogictests/suites/mode/standalone/explain/aggregate.test b/tests/sqllogictests/suites/mode/standalone/explain/aggregate.test index 3000316916869..7b6c40d38b598 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain/aggregate.test +++ b/tests/sqllogictests/suites/mode/standalone/explain/aggregate.test @@ -407,7 +407,7 @@ AggregateFinal ├── join type: INNER ├── range join conditions: [t1.a (#0) "gt" t2.a (#1)] ├── other conditions: [] - ├── estimated rows: 1000.00 + ├── estimated rows: 200.00 ├── TableScan(Left) │ ├── table: default.default.t1 │ ├── output columns: [a (#0)] diff --git a/tests/sqllogictests/suites/mode/standalone/explain/explain.test b/tests/sqllogictests/suites/mode/standalone/explain/explain.test index 8cf9b3161543e..8503584e2152c 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain/explain.test +++ b/tests/sqllogictests/suites/mode/standalone/explain/explain.test @@ -421,7 +421,7 @@ HashJoin ├── probe keys: [] ├── keys is null equal: [] ├── filters: [(((t1.a (#0) > 1 AND t2.a (#2) > 2) OR (t1.b (#1) < 3 AND t2.b (#3) < 4)) OR t3.a (#4) = 2)] -├── estimated rows: 50.00 +├── estimated rows: 41.00 ├── HashJoin(Build) │ ├── output columns: [t2.a (#2), t2.b (#3), t1.a (#0), t1.b (#1)] │ ├── join type: CROSS @@ -944,7 +944,7 @@ HashJoin ├── build join filters: │ ├── filter id:0, build key:scalar_subquery_2 (#2), probe key:a.id (#0), filter type:inlist,min_max │ └── filter id:1, build key:id (#2), probe key:a.id (#0), filter type:inlist,min_max -├── estimated rows: 0.40 +├── estimated rows: 0.13 ├── Filter(Build) │ ├── output columns: [b.id (#2)] │ ├── filters: [is_true(b.id (#2) = b.id (#2))] @@ -1677,7 +1677,7 @@ HashJoin ├── filters: [] ├── build join filters: │ └── filter id:0, build key:numbers.number (#1), probe key:a.number (#0), filter type:inlist,min_max -├── estimated rows: 100.00 +├── estimated rows: 0.50 ├── Filter(Build) │ ├── output columns: [numbers.number (#1)] │ ├── filters: [NOT CAST(numbers.number (#1) AS Boolean)] diff --git a/tests/sqllogictests/suites/mode/standalone/explain/expression_scan.test b/tests/sqllogictests/suites/mode/standalone/explain/expression_scan.test index 601d48e2c6035..5ea9c09bb1b50 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain/expression_scan.test +++ b/tests/sqllogictests/suites/mode/standalone/explain/expression_scan.test @@ -28,7 +28,7 @@ HashJoin ├── filters: [] ├── cache index: 0 ├── cache columns: [1] -├── estimated rows: 3.00 +├── estimated rows: 0.00 ├── TableScan(Build) │ ├── table: default.default.t1 │ ├── output columns: [a (#0), b (#1)] @@ -68,7 +68,7 @@ HashJoin ├── filters: [] ├── cache index: 0 ├── cache columns: [1, 2] -├── estimated rows: 6.00 +├── estimated rows: 0.00 ├── TableScan(Build) │ ├── table: default.default.t1 │ ├── output columns: [a (#0), b (#1), c (#2)] @@ -109,7 +109,7 @@ HashJoin ├── filters: [] ├── cache index: 0 ├── cache columns: [1, 2] -├── estimated rows: 6.00 +├── estimated rows: 3.00 ├── TableScan(Build) │ ├── table: default.default.t1 │ ├── output columns: [a (#0), b (#1), c (#2)] @@ -154,7 +154,7 @@ HashJoin ├── filters: [] ├── cache index: 1 ├── cache columns: [1, 2] -├── estimated rows: 12.00 +├── estimated rows: 3.00 ├── HashJoin(Build) │ ├── output columns: [c2 (#4), c1 (#8), t1.a (#0), t1.b (#1), t1.c (#2)] │ ├── join type: RIGHT OUTER @@ -164,7 +164,7 @@ HashJoin │ ├── filters: [] │ ├── cache index: 0 │ ├── cache columns: [1, 2] -│ ├── estimated rows: 6.00 +│ ├── estimated rows: 3.00 │ ├── TableScan(Build) │ │ ├── table: default.default.t1 │ │ ├── output columns: [a (#0), b (#1), c (#2)] @@ -226,7 +226,7 @@ HashJoin ├── filters: [] ├── cache index: 1 ├── cache columns: [1, 3] -├── estimated rows: 12.00 +├── estimated rows: 0.00 ├── HashJoin(Build) │ ├── output columns: [c1 (#3), c2 (#4), t1.a (#0), t1.b (#1)] │ ├── join type: INNER @@ -236,7 +236,7 @@ HashJoin │ ├── filters: [] │ ├── cache index: 0 │ ├── cache columns: [1, 2] -│ ├── estimated rows: 6.00 +│ ├── estimated rows: 0.00 │ ├── TableScan(Build) │ │ ├── table: default.default.t1 │ │ ├── output columns: [a (#0), b (#1), c (#2)] diff --git a/tests/sqllogictests/suites/mode/standalone/explain/join.test b/tests/sqllogictests/suites/mode/standalone/explain/join.test index 91c9489931e69..8678e01b89074 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain/join.test +++ b/tests/sqllogictests/suites/mode/standalone/explain/join.test @@ -63,7 +63,7 @@ HashJoin ├── filters: [] ├── build join filters: │ └── filter id:0, build key:t.number (#0), probe key:t1.number (#1), filter type:inlist,min_max -├── estimated rows: 1.00 +├── estimated rows: 0.50 ├── TableScan(Build) │ ├── table: default.default.t │ ├── output columns: [number (#0)] @@ -850,7 +850,7 @@ HashJoin ├── filters: [] ├── build join filters: │ └── filter id:0, build key:t2.number (#1), probe key:t1.number (#0), filter type:inlist,min_max -├── estimated rows: 200.00 +├── estimated rows: 1.00 ├── TableScan(Build) │ ├── table: default.system.numbers │ ├── output columns: [number (#1)] @@ -1036,7 +1036,7 @@ HashJoin ├── probe keys: [] ├── keys is null equal: [] ├── filters: [t1.a (#0) >= scalar_subquery_2 (#2)] -├── estimated rows: 4.00 +├── estimated rows: 0.80 ├── EvalScalar(Build) │ ├── output columns: [MAX(a) (#2)] │ ├── expressions: [1] @@ -1063,7 +1063,7 @@ HashJoin ├── probe keys: [] ├── keys is null equal: [] ├── filters: [t1.a (#0) >= scalar_subquery_1 (#1)] -├── estimated rows: 4.00 +├── estimated rows: 0.80 ├── Filter(Build) │ ├── output columns: [t2.a (#1)] │ ├── filters: [is_true(t2.a (#1) > 0)] diff --git a/tests/sqllogictests/suites/mode/standalone/explain/join_reorder/chain.test b/tests/sqllogictests/suites/mode/standalone/explain/join_reorder/chain.test index 111394ff82c38..9dfb367850e43 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain/join_reorder/chain.test +++ b/tests/sqllogictests/suites/mode/standalone/explain/join_reorder/chain.test @@ -144,24 +144,24 @@ query T explain select * from t, t2, t1 where t.a = t1.a and t1.a = t2.a ---- HashJoin -├── output columns: [t1.a (#2), t2.a (#1), t.a (#0)] +├── output columns: [t2.a (#1), t1.a (#2), t.a (#0)] ├── join type: INNER ├── build keys: [t.a (#0)] -├── probe keys: [t1.a (#2)] +├── probe keys: [t2.a (#1)] ├── keys is null equal: [false] ├── filters: [] ├── build join filters: -│ └── filter id:1, build key:t.a (#0), probe key:t1.a (#2), filter type:inlist,min_max +│ └── filter id:1, build key:t.a (#0), probe key:t2.a (#1), filter type:inlist,min_max ├── estimated rows: 1.00 ├── HashJoin(Build) -│ ├── output columns: [t2.a (#1), t.a (#0)] +│ ├── output columns: [t1.a (#2), t.a (#0)] │ ├── join type: INNER │ ├── build keys: [t.a (#0)] -│ ├── probe keys: [t2.a (#1)] +│ ├── probe keys: [t1.a (#2)] │ ├── keys is null equal: [false] │ ├── filters: [] │ ├── build join filters: -│ │ └── filter id:0, build key:t.a (#0), probe key:t2.a (#1), filter type:inlist,min_max +│ │ └── filter id:0, build key:t.a (#0), probe key:t1.a (#2), filter type:inlist,min_max │ ├── estimated rows: 1.00 │ ├── TableScan(Build) │ │ ├── table: default.join_reorder.t @@ -174,50 +174,50 @@ HashJoin │ │ ├── push downs: [filters: [], limit: NONE] │ │ └── estimated rows: 1.00 │ └── TableScan(Probe) -│ ├── table: default.join_reorder.t2 -│ ├── output columns: [a (#1)] -│ ├── read rows: 100 +│ ├── table: default.join_reorder.t1 +│ ├── output columns: [a (#2)] +│ ├── read rows: 10 │ ├── read size: < 1 KiB │ ├── partitions total: 1 │ ├── partitions scanned: 1 │ ├── pruning stats: [segments: , blocks: ] │ ├── push downs: [filters: [], limit: NONE] │ ├── apply join filters: [#0] -│ └── estimated rows: 100.00 +│ └── estimated rows: 10.00 └── TableScan(Probe) - ├── table: default.join_reorder.t1 - ├── output columns: [a (#2)] - ├── read rows: 10 + ├── table: default.join_reorder.t2 + ├── output columns: [a (#1)] + ├── read rows: 100 ├── read size: < 1 KiB ├── partitions total: 1 ├── partitions scanned: 1 ├── pruning stats: [segments: , blocks: ] ├── push downs: [filters: [], limit: NONE] ├── apply join filters: [#1] - └── estimated rows: 10.00 + └── estimated rows: 100.00 query T explain select * from t1, t, t2 where t.a = t1.a and t1.a = t2.a ---- HashJoin -├── output columns: [t1.a (#0), t.a (#1), t2.a (#2)] +├── output columns: [t2.a (#2), t.a (#1), t1.a (#0)] ├── join type: INNER -├── build keys: [t2.a (#2)] -├── probe keys: [t1.a (#0)] +├── build keys: [t1.a (#0)] +├── probe keys: [t2.a (#2)] ├── keys is null equal: [false] ├── filters: [] ├── build join filters: -│ └── filter id:1, build key:t2.a (#2), probe key:t1.a (#0), filter type:inlist,min_max +│ └── filter id:1, build key:t1.a (#0), probe key:t2.a (#2), filter type:inlist,min_max ├── estimated rows: 1.00 ├── HashJoin(Build) -│ ├── output columns: [t2.a (#2), t.a (#1)] +│ ├── output columns: [t1.a (#0), t.a (#1)] │ ├── join type: INNER │ ├── build keys: [t.a (#1)] -│ ├── probe keys: [t2.a (#2)] +│ ├── probe keys: [t1.a (#0)] │ ├── keys is null equal: [false] │ ├── filters: [] │ ├── build join filters: -│ │ └── filter id:0, build key:t.a (#1), probe key:t2.a (#2), filter type:inlist,min_max +│ │ └── filter id:0, build key:t.a (#1), probe key:t1.a (#0), filter type:inlist,min_max │ ├── estimated rows: 1.00 │ ├── TableScan(Build) │ │ ├── table: default.join_reorder.t @@ -230,50 +230,50 @@ HashJoin │ │ ├── push downs: [filters: [], limit: NONE] │ │ └── estimated rows: 1.00 │ └── TableScan(Probe) -│ ├── table: default.join_reorder.t2 -│ ├── output columns: [a (#2)] -│ ├── read rows: 100 +│ ├── table: default.join_reorder.t1 +│ ├── output columns: [a (#0)] +│ ├── read rows: 10 │ ├── read size: < 1 KiB │ ├── partitions total: 1 │ ├── partitions scanned: 1 │ ├── pruning stats: [segments: , blocks: ] │ ├── push downs: [filters: [], limit: NONE] │ ├── apply join filters: [#0] -│ └── estimated rows: 100.00 +│ └── estimated rows: 10.00 └── TableScan(Probe) - ├── table: default.join_reorder.t1 - ├── output columns: [a (#0)] - ├── read rows: 10 + ├── table: default.join_reorder.t2 + ├── output columns: [a (#2)] + ├── read rows: 100 ├── read size: < 1 KiB ├── partitions total: 1 ├── partitions scanned: 1 ├── pruning stats: [segments: , blocks: ] ├── push downs: [filters: [], limit: NONE] ├── apply join filters: [#1] - └── estimated rows: 10.00 + └── estimated rows: 100.00 query T explain select * from t1, t2, t where t.a = t1.a and t1.a = t2.a ---- HashJoin -├── output columns: [t1.a (#0), t2.a (#1), t.a (#2)] +├── output columns: [t2.a (#1), t.a (#2), t1.a (#0)] ├── join type: INNER -├── build keys: [t.a (#2)] -├── probe keys: [t1.a (#0)] +├── build keys: [t1.a (#0)] +├── probe keys: [t2.a (#1)] ├── keys is null equal: [false] ├── filters: [] ├── build join filters: -│ └── filter id:1, build key:t.a (#2), probe key:t1.a (#0), filter type:inlist,min_max +│ └── filter id:1, build key:t1.a (#0), probe key:t2.a (#1), filter type:inlist,min_max ├── estimated rows: 1.00 ├── HashJoin(Build) -│ ├── output columns: [t2.a (#1), t.a (#2)] +│ ├── output columns: [t1.a (#0), t.a (#2)] │ ├── join type: INNER │ ├── build keys: [t.a (#2)] -│ ├── probe keys: [t2.a (#1)] +│ ├── probe keys: [t1.a (#0)] │ ├── keys is null equal: [false] │ ├── filters: [] │ ├── build join filters: -│ │ └── filter id:0, build key:t.a (#2), probe key:t2.a (#1), filter type:inlist,min_max +│ │ └── filter id:0, build key:t.a (#2), probe key:t1.a (#0), filter type:inlist,min_max │ ├── estimated rows: 1.00 │ ├── TableScan(Build) │ │ ├── table: default.join_reorder.t @@ -286,27 +286,27 @@ HashJoin │ │ ├── push downs: [filters: [], limit: NONE] │ │ └── estimated rows: 1.00 │ └── TableScan(Probe) -│ ├── table: default.join_reorder.t2 -│ ├── output columns: [a (#1)] -│ ├── read rows: 100 +│ ├── table: default.join_reorder.t1 +│ ├── output columns: [a (#0)] +│ ├── read rows: 10 │ ├── read size: < 1 KiB │ ├── partitions total: 1 │ ├── partitions scanned: 1 │ ├── pruning stats: [segments: , blocks: ] │ ├── push downs: [filters: [], limit: NONE] │ ├── apply join filters: [#0] -│ └── estimated rows: 100.00 +│ └── estimated rows: 10.00 └── TableScan(Probe) - ├── table: default.join_reorder.t1 - ├── output columns: [a (#0)] - ├── read rows: 10 + ├── table: default.join_reorder.t2 + ├── output columns: [a (#1)] + ├── read rows: 100 ├── read size: < 1 KiB ├── partitions total: 1 ├── partitions scanned: 1 ├── pruning stats: [segments: , blocks: ] ├── push downs: [filters: [], limit: NONE] ├── apply join filters: [#1] - └── estimated rows: 10.00 + └── estimated rows: 100.00 query T explain select * from t2, t1, t where t.a = t1.a and t1.a = t2.a @@ -430,7 +430,7 @@ HashJoin ├── probe keys: [t1.a (#2)] ├── keys is null equal: [false] ├── filters: [] -├── estimated rows: 10.00 +├── estimated rows: 1.00 ├── TableScan(Build) │ ├── table: default.join_reorder.t │ ├── output columns: [a (#0)] diff --git a/tests/sqllogictests/suites/mode/standalone/explain/join_reorder/cycles.test b/tests/sqllogictests/suites/mode/standalone/explain/join_reorder/cycles.test index e46e983860c78..7d154436c6a2e 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain/join_reorder/cycles.test +++ b/tests/sqllogictests/suites/mode/standalone/explain/join_reorder/cycles.test @@ -76,24 +76,24 @@ query T explain select * from t, t2, t1 where t.a = t1.a and t1.a = t2.a and t2.a = t.a ---- HashJoin -├── output columns: [t1.a (#2), t2.a (#1), t.a (#0)] +├── output columns: [t2.a (#1), t1.a (#2), t.a (#0)] ├── join type: INNER ├── build keys: [t.a (#0)] -├── probe keys: [t1.a (#2)] +├── probe keys: [t2.a (#1)] ├── keys is null equal: [false] ├── filters: [] ├── build join filters: -│ └── filter id:1, build key:t.a (#0), probe key:t1.a (#2), filter type:inlist,min_max +│ └── filter id:1, build key:t.a (#0), probe key:t2.a (#1), filter type:inlist,min_max ├── estimated rows: 1.00 ├── HashJoin(Build) -│ ├── output columns: [t2.a (#1), t.a (#0)] +│ ├── output columns: [t1.a (#2), t.a (#0)] │ ├── join type: INNER │ ├── build keys: [t.a (#0)] -│ ├── probe keys: [t2.a (#1)] +│ ├── probe keys: [t1.a (#2)] │ ├── keys is null equal: [false] │ ├── filters: [] │ ├── build join filters: -│ │ └── filter id:0, build key:t.a (#0), probe key:t2.a (#1), filter type:inlist,min_max +│ │ └── filter id:0, build key:t.a (#0), probe key:t1.a (#2), filter type:inlist,min_max │ ├── estimated rows: 1.00 │ ├── TableScan(Build) │ │ ├── table: default.join_reorder.t @@ -106,50 +106,50 @@ HashJoin │ │ ├── push downs: [filters: [], limit: NONE] │ │ └── estimated rows: 1.00 │ └── TableScan(Probe) -│ ├── table: default.join_reorder.t2 -│ ├── output columns: [a (#1)] -│ ├── read rows: 100 +│ ├── table: default.join_reorder.t1 +│ ├── output columns: [a (#2)] +│ ├── read rows: 10 │ ├── read size: < 1 KiB │ ├── partitions total: 1 │ ├── partitions scanned: 1 │ ├── pruning stats: [segments: , blocks: ] │ ├── push downs: [filters: [], limit: NONE] │ ├── apply join filters: [#0] -│ └── estimated rows: 100.00 +│ └── estimated rows: 10.00 └── TableScan(Probe) - ├── table: default.join_reorder.t1 - ├── output columns: [a (#2)] - ├── read rows: 10 + ├── table: default.join_reorder.t2 + ├── output columns: [a (#1)] + ├── read rows: 100 ├── read size: < 1 KiB ├── partitions total: 1 ├── partitions scanned: 1 ├── pruning stats: [segments: , blocks: ] ├── push downs: [filters: [], limit: NONE] ├── apply join filters: [#1] - └── estimated rows: 10.00 + └── estimated rows: 100.00 query T explain select * from t1, t, t2 where t.a = t1.a and t1.a = t2.a and t2.a = t.a ---- HashJoin -├── output columns: [t1.a (#0), t.a (#1), t2.a (#2)] +├── output columns: [t2.a (#2), t.a (#1), t1.a (#0)] ├── join type: INNER -├── build keys: [t2.a (#2)] -├── probe keys: [t1.a (#0)] +├── build keys: [t1.a (#0)] +├── probe keys: [t2.a (#2)] ├── keys is null equal: [false] ├── filters: [] ├── build join filters: -│ └── filter id:1, build key:t2.a (#2), probe key:t1.a (#0), filter type:inlist,min_max +│ └── filter id:1, build key:t1.a (#0), probe key:t2.a (#2), filter type:inlist,min_max ├── estimated rows: 1.00 ├── HashJoin(Build) -│ ├── output columns: [t2.a (#2), t.a (#1)] +│ ├── output columns: [t1.a (#0), t.a (#1)] │ ├── join type: INNER │ ├── build keys: [t.a (#1)] -│ ├── probe keys: [t2.a (#2)] +│ ├── probe keys: [t1.a (#0)] │ ├── keys is null equal: [false] │ ├── filters: [] │ ├── build join filters: -│ │ └── filter id:0, build key:t.a (#1), probe key:t2.a (#2), filter type:inlist,min_max +│ │ └── filter id:0, build key:t.a (#1), probe key:t1.a (#0), filter type:inlist,min_max │ ├── estimated rows: 1.00 │ ├── TableScan(Build) │ │ ├── table: default.join_reorder.t @@ -162,50 +162,50 @@ HashJoin │ │ ├── push downs: [filters: [], limit: NONE] │ │ └── estimated rows: 1.00 │ └── TableScan(Probe) -│ ├── table: default.join_reorder.t2 -│ ├── output columns: [a (#2)] -│ ├── read rows: 100 +│ ├── table: default.join_reorder.t1 +│ ├── output columns: [a (#0)] +│ ├── read rows: 10 │ ├── read size: < 1 KiB │ ├── partitions total: 1 │ ├── partitions scanned: 1 │ ├── pruning stats: [segments: , blocks: ] │ ├── push downs: [filters: [], limit: NONE] │ ├── apply join filters: [#0] -│ └── estimated rows: 100.00 +│ └── estimated rows: 10.00 └── TableScan(Probe) - ├── table: default.join_reorder.t1 - ├── output columns: [a (#0)] - ├── read rows: 10 + ├── table: default.join_reorder.t2 + ├── output columns: [a (#2)] + ├── read rows: 100 ├── read size: < 1 KiB ├── partitions total: 1 ├── partitions scanned: 1 ├── pruning stats: [segments: , blocks: ] ├── push downs: [filters: [], limit: NONE] ├── apply join filters: [#1] - └── estimated rows: 10.00 + └── estimated rows: 100.00 query T explain select * from t1, t2, t where t.a = t1.a and t1.a = t2.a and t2.a = t.a ---- HashJoin -├── output columns: [t1.a (#0), t2.a (#1), t.a (#2)] +├── output columns: [t2.a (#1), t.a (#2), t1.a (#0)] ├── join type: INNER -├── build keys: [t.a (#2)] -├── probe keys: [t1.a (#0)] +├── build keys: [t1.a (#0)] +├── probe keys: [t2.a (#1)] ├── keys is null equal: [false] ├── filters: [] ├── build join filters: -│ └── filter id:1, build key:t.a (#2), probe key:t1.a (#0), filter type:inlist,min_max +│ └── filter id:1, build key:t1.a (#0), probe key:t2.a (#1), filter type:inlist,min_max ├── estimated rows: 1.00 ├── HashJoin(Build) -│ ├── output columns: [t2.a (#1), t.a (#2)] +│ ├── output columns: [t1.a (#0), t.a (#2)] │ ├── join type: INNER │ ├── build keys: [t.a (#2)] -│ ├── probe keys: [t2.a (#1)] +│ ├── probe keys: [t1.a (#0)] │ ├── keys is null equal: [false] │ ├── filters: [] │ ├── build join filters: -│ │ └── filter id:0, build key:t.a (#2), probe key:t2.a (#1), filter type:inlist,min_max +│ │ └── filter id:0, build key:t.a (#2), probe key:t1.a (#0), filter type:inlist,min_max │ ├── estimated rows: 1.00 │ ├── TableScan(Build) │ │ ├── table: default.join_reorder.t @@ -218,27 +218,27 @@ HashJoin │ │ ├── push downs: [filters: [], limit: NONE] │ │ └── estimated rows: 1.00 │ └── TableScan(Probe) -│ ├── table: default.join_reorder.t2 -│ ├── output columns: [a (#1)] -│ ├── read rows: 100 +│ ├── table: default.join_reorder.t1 +│ ├── output columns: [a (#0)] +│ ├── read rows: 10 │ ├── read size: < 1 KiB │ ├── partitions total: 1 │ ├── partitions scanned: 1 │ ├── pruning stats: [segments: , blocks: ] │ ├── push downs: [filters: [], limit: NONE] │ ├── apply join filters: [#0] -│ └── estimated rows: 100.00 +│ └── estimated rows: 10.00 └── TableScan(Probe) - ├── table: default.join_reorder.t1 - ├── output columns: [a (#0)] - ├── read rows: 10 + ├── table: default.join_reorder.t2 + ├── output columns: [a (#1)] + ├── read rows: 100 ├── read size: < 1 KiB ├── partitions total: 1 ├── partitions scanned: 1 ├── pruning stats: [segments: , blocks: ] ├── push downs: [filters: [], limit: NONE] ├── apply join filters: [#1] - └── estimated rows: 10.00 + └── estimated rows: 100.00 query T explain select * from t2, t1, t where t.a = t1.a and t1.a = t2.a and t2.a = t.a diff --git a/tests/sqllogictests/suites/mode/standalone/explain/join_reorder/star.test b/tests/sqllogictests/suites/mode/standalone/explain/join_reorder/star.test index f02aff118b009..7d143de9b3c5b 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain/join_reorder/star.test +++ b/tests/sqllogictests/suites/mode/standalone/explain/join_reorder/star.test @@ -76,24 +76,24 @@ query T explain select * from t, t2, t1 where t.a = t2.a and t1.a = t2.a ---- HashJoin -├── output columns: [t1.a (#2), t2.a (#1), t.a (#0)] +├── output columns: [t2.a (#1), t1.a (#2), t.a (#0)] ├── join type: INNER ├── build keys: [t.a (#0)] -├── probe keys: [t1.a (#2)] +├── probe keys: [t2.a (#1)] ├── keys is null equal: [false] ├── filters: [] ├── build join filters: -│ └── filter id:1, build key:t.a (#0), probe key:t1.a (#2), filter type:inlist,min_max +│ └── filter id:1, build key:t.a (#0), probe key:t2.a (#1), filter type:inlist,min_max ├── estimated rows: 1.00 ├── HashJoin(Build) -│ ├── output columns: [t2.a (#1), t.a (#0)] +│ ├── output columns: [t1.a (#2), t.a (#0)] │ ├── join type: INNER │ ├── build keys: [t.a (#0)] -│ ├── probe keys: [t2.a (#1)] +│ ├── probe keys: [t1.a (#2)] │ ├── keys is null equal: [false] │ ├── filters: [] │ ├── build join filters: -│ │ └── filter id:0, build key:t.a (#0), probe key:t2.a (#1), filter type:inlist,min_max +│ │ └── filter id:0, build key:t.a (#0), probe key:t1.a (#2), filter type:inlist,min_max │ ├── estimated rows: 1.00 │ ├── TableScan(Build) │ │ ├── table: default.join_reorder.t @@ -106,50 +106,50 @@ HashJoin │ │ ├── push downs: [filters: [], limit: NONE] │ │ └── estimated rows: 1.00 │ └── TableScan(Probe) -│ ├── table: default.join_reorder.t2 -│ ├── output columns: [a (#1)] -│ ├── read rows: 100 +│ ├── table: default.join_reorder.t1 +│ ├── output columns: [a (#2)] +│ ├── read rows: 10 │ ├── read size: < 1 KiB │ ├── partitions total: 1 │ ├── partitions scanned: 1 │ ├── pruning stats: [segments: , blocks: ] │ ├── push downs: [filters: [], limit: NONE] │ ├── apply join filters: [#0] -│ └── estimated rows: 100.00 +│ └── estimated rows: 10.00 └── TableScan(Probe) - ├── table: default.join_reorder.t1 - ├── output columns: [a (#2)] - ├── read rows: 10 + ├── table: default.join_reorder.t2 + ├── output columns: [a (#1)] + ├── read rows: 100 ├── read size: < 1 KiB ├── partitions total: 1 ├── partitions scanned: 1 ├── pruning stats: [segments: , blocks: ] ├── push downs: [filters: [], limit: NONE] ├── apply join filters: [#1] - └── estimated rows: 10.00 + └── estimated rows: 100.00 query T explain select * from t1, t, t2 where t.a = t2.a and t1.a = t2.a ---- HashJoin -├── output columns: [t1.a (#0), t.a (#1), t2.a (#2)] +├── output columns: [t2.a (#2), t.a (#1), t1.a (#0)] ├── join type: INNER -├── build keys: [t2.a (#2)] -├── probe keys: [t1.a (#0)] +├── build keys: [t1.a (#0)] +├── probe keys: [t2.a (#2)] ├── keys is null equal: [false] ├── filters: [] ├── build join filters: -│ └── filter id:1, build key:t2.a (#2), probe key:t1.a (#0), filter type:inlist,min_max +│ └── filter id:1, build key:t1.a (#0), probe key:t2.a (#2), filter type:inlist,min_max ├── estimated rows: 1.00 ├── HashJoin(Build) -│ ├── output columns: [t2.a (#2), t.a (#1)] +│ ├── output columns: [t1.a (#0), t.a (#1)] │ ├── join type: INNER │ ├── build keys: [t.a (#1)] -│ ├── probe keys: [t2.a (#2)] +│ ├── probe keys: [t1.a (#0)] │ ├── keys is null equal: [false] │ ├── filters: [] │ ├── build join filters: -│ │ └── filter id:0, build key:t.a (#1), probe key:t2.a (#2), filter type:inlist,min_max +│ │ └── filter id:0, build key:t.a (#1), probe key:t1.a (#0), filter type:inlist,min_max │ ├── estimated rows: 1.00 │ ├── TableScan(Build) │ │ ├── table: default.join_reorder.t @@ -162,50 +162,50 @@ HashJoin │ │ ├── push downs: [filters: [], limit: NONE] │ │ └── estimated rows: 1.00 │ └── TableScan(Probe) -│ ├── table: default.join_reorder.t2 -│ ├── output columns: [a (#2)] -│ ├── read rows: 100 +│ ├── table: default.join_reorder.t1 +│ ├── output columns: [a (#0)] +│ ├── read rows: 10 │ ├── read size: < 1 KiB │ ├── partitions total: 1 │ ├── partitions scanned: 1 │ ├── pruning stats: [segments: , blocks: ] │ ├── push downs: [filters: [], limit: NONE] │ ├── apply join filters: [#0] -│ └── estimated rows: 100.00 +│ └── estimated rows: 10.00 └── TableScan(Probe) - ├── table: default.join_reorder.t1 - ├── output columns: [a (#0)] - ├── read rows: 10 + ├── table: default.join_reorder.t2 + ├── output columns: [a (#2)] + ├── read rows: 100 ├── read size: < 1 KiB ├── partitions total: 1 ├── partitions scanned: 1 ├── pruning stats: [segments: , blocks: ] ├── push downs: [filters: [], limit: NONE] ├── apply join filters: [#1] - └── estimated rows: 10.00 + └── estimated rows: 100.00 query T explain select * from t1, t2, t where t.a = t2.a and t1.a = t2.a ---- HashJoin -├── output columns: [t1.a (#0), t2.a (#1), t.a (#2)] +├── output columns: [t2.a (#1), t.a (#2), t1.a (#0)] ├── join type: INNER -├── build keys: [t.a (#2)] -├── probe keys: [t1.a (#0)] +├── build keys: [t1.a (#0)] +├── probe keys: [t2.a (#1)] ├── keys is null equal: [false] ├── filters: [] ├── build join filters: -│ └── filter id:1, build key:t.a (#2), probe key:t1.a (#0), filter type:inlist,min_max +│ └── filter id:1, build key:t1.a (#0), probe key:t2.a (#1), filter type:inlist,min_max ├── estimated rows: 1.00 ├── HashJoin(Build) -│ ├── output columns: [t2.a (#1), t.a (#2)] +│ ├── output columns: [t1.a (#0), t.a (#2)] │ ├── join type: INNER │ ├── build keys: [t.a (#2)] -│ ├── probe keys: [t2.a (#1)] +│ ├── probe keys: [t1.a (#0)] │ ├── keys is null equal: [false] │ ├── filters: [] │ ├── build join filters: -│ │ └── filter id:0, build key:t.a (#2), probe key:t2.a (#1), filter type:inlist,min_max +│ │ └── filter id:0, build key:t.a (#2), probe key:t1.a (#0), filter type:inlist,min_max │ ├── estimated rows: 1.00 │ ├── TableScan(Build) │ │ ├── table: default.join_reorder.t @@ -218,27 +218,27 @@ HashJoin │ │ ├── push downs: [filters: [], limit: NONE] │ │ └── estimated rows: 1.00 │ └── TableScan(Probe) -│ ├── table: default.join_reorder.t2 -│ ├── output columns: [a (#1)] -│ ├── read rows: 100 +│ ├── table: default.join_reorder.t1 +│ ├── output columns: [a (#0)] +│ ├── read rows: 10 │ ├── read size: < 1 KiB │ ├── partitions total: 1 │ ├── partitions scanned: 1 │ ├── pruning stats: [segments: , blocks: ] │ ├── push downs: [filters: [], limit: NONE] │ ├── apply join filters: [#0] -│ └── estimated rows: 100.00 +│ └── estimated rows: 10.00 └── TableScan(Probe) - ├── table: default.join_reorder.t1 - ├── output columns: [a (#0)] - ├── read rows: 10 + ├── table: default.join_reorder.t2 + ├── output columns: [a (#1)] + ├── read rows: 100 ├── read size: < 1 KiB ├── partitions total: 1 ├── partitions scanned: 1 ├── pruning stats: [segments: , blocks: ] ├── push downs: [filters: [], limit: NONE] ├── apply join filters: [#1] - └── estimated rows: 10.00 + └── estimated rows: 100.00 query T explain select * from t2, t1, t where t.a = t2.a and t1.a = t2.a diff --git a/tests/sqllogictests/suites/mode/standalone/explain/lateral.test b/tests/sqllogictests/suites/mode/standalone/explain/lateral.test index e2739c3545db4..6621d4664401e 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain/lateral.test +++ b/tests/sqllogictests/suites/mode/standalone/explain/lateral.test @@ -10,7 +10,7 @@ HashJoin ├── filters: [] ├── build join filters: │ └── filter id:0, build key:number (#1), probe key:number (#0), filter type:inlist,min_max -├── estimated rows: 100.00 +├── estimated rows: 0.50 ├── TableScan(Build) │ ├── table: default.system.numbers │ ├── output columns: [number (#1)] @@ -41,7 +41,7 @@ HashJoin ├── probe keys: [number (#3)] ├── keys is null equal: [false] ├── filters: [] -├── estimated rows: 1000.00 +├── estimated rows: 5.00 ├── TableScan(Build) │ ├── table: default.system.numbers │ ├── output columns: [number (#0)] @@ -95,62 +95,62 @@ query T explain select * from numbers(10) t(a) left join lateral(select t.a + t1.a as b from numbers(10) t1(a) where t.a = t1.a) t1 on t.a = t1.b ---- HashJoin -├── output columns: [b (#3), t.number (#0)] -├── join type: RIGHT OUTER -├── build keys: [CAST(t.a (#0) AS UInt64 NULL), CAST(number (#0) AS UInt64 NULL)] -├── probe keys: [t1.b (#3), number (#4)] +├── output columns: [t.number (#0), b (#3)] +├── join type: LEFT OUTER +├── build keys: [t1.b (#3), number (#4)] +├── probe keys: [CAST(t.a (#0) AS UInt64 NULL), CAST(number (#0) AS UInt64 NULL)] ├── keys is null equal: [false, false] ├── filters: [] -├── estimated rows: 1000.00 -├── TableScan(Build) -│ ├── table: default.system.numbers -│ ├── output columns: [number (#0)] -│ ├── read rows: 10 -│ ├── read size: < 1 KiB -│ ├── partitions total: 1 -│ ├── partitions scanned: 1 -│ ├── push downs: [filters: [], limit: NONE] -│ └── estimated rows: 10.00 -└── EvalScalar(Probe) - ├── output columns: [number (#4), b (#3)] - ├── expressions: [CAST(t1.b (#2) AS UInt64 NULL)] - ├── estimated rows: 100.00 - └── EvalScalar - ├── output columns: [number (#4), b (#2)] - ├── expressions: [number (#4) + t1.a (#1)] - ├── estimated rows: 100.00 - └── HashJoin - ├── output columns: [number (#4), t1.number (#1)] - ├── join type: INNER - ├── build keys: [t1.a (#1)] - ├── probe keys: [outer.number (#4)] - ├── keys is null equal: [false] - ├── filters: [] - ├── estimated rows: 100.00 - ├── TableScan(Build) - │ ├── table: default.system.numbers - │ ├── output columns: [number (#1)] - │ ├── read rows: 10 - │ ├── read size: < 1 KiB - │ ├── partitions total: 1 - │ ├── partitions scanned: 1 - │ ├── push downs: [filters: [], limit: NONE] - │ └── estimated rows: 10.00 - └── AggregateFinal(Probe) - ├── output columns: [number (#4)] - ├── group by: [number] - ├── aggregate functions: [] - ├── estimated rows: 10.00 - └── AggregatePartial - ├── group by: [number] - ├── aggregate functions: [] - ├── estimated rows: 10.00 - └── TableScan - ├── table: default.system.numbers - ├── output columns: [number (#4)] - ├── read rows: 10 - ├── read size: < 1 KiB - ├── partitions total: 1 - ├── partitions scanned: 1 - ├── push downs: [filters: [], limit: NONE] - └── estimated rows: 10.00 +├── estimated rows: 10.00 +├── EvalScalar(Build) +│ ├── output columns: [number (#4), b (#3)] +│ ├── expressions: [CAST(t1.b (#2) AS UInt64 NULL)] +│ ├── estimated rows: 0.50 +│ └── EvalScalar +│ ├── output columns: [number (#4), b (#2)] +│ ├── expressions: [number (#4) + t1.a (#1)] +│ ├── estimated rows: 0.50 +│ └── HashJoin +│ ├── output columns: [number (#4), t1.number (#1)] +│ ├── join type: INNER +│ ├── build keys: [t1.a (#1)] +│ ├── probe keys: [outer.number (#4)] +│ ├── keys is null equal: [false] +│ ├── filters: [] +│ ├── estimated rows: 0.50 +│ ├── TableScan(Build) +│ │ ├── table: default.system.numbers +│ │ ├── output columns: [number (#1)] +│ │ ├── read rows: 10 +│ │ ├── read size: < 1 KiB +│ │ ├── partitions total: 1 +│ │ ├── partitions scanned: 1 +│ │ ├── push downs: [filters: [], limit: NONE] +│ │ └── estimated rows: 10.00 +│ └── AggregateFinal(Probe) +│ ├── output columns: [number (#4)] +│ ├── group by: [number] +│ ├── aggregate functions: [] +│ ├── estimated rows: 10.00 +│ └── AggregatePartial +│ ├── group by: [number] +│ ├── aggregate functions: [] +│ ├── estimated rows: 10.00 +│ └── TableScan +│ ├── table: default.system.numbers +│ ├── output columns: [number (#4)] +│ ├── read rows: 10 +│ ├── read size: < 1 KiB +│ ├── partitions total: 1 +│ ├── partitions scanned: 1 +│ ├── push downs: [filters: [], limit: NONE] +│ └── estimated rows: 10.00 +└── TableScan(Probe) + ├── table: default.system.numbers + ├── output columns: [number (#0)] + ├── read rows: 10 + ├── read size: < 1 KiB + ├── partitions total: 1 + ├── partitions scanned: 1 + ├── push downs: [filters: [], limit: NONE] + └── estimated rows: 10.00 diff --git a/tests/sqllogictests/suites/mode/standalone/explain/limit.test b/tests/sqllogictests/suites/mode/standalone/explain/limit.test index a77e114390b49..6eb01662b4692 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain/limit.test +++ b/tests/sqllogictests/suites/mode/standalone/explain/limit.test @@ -180,7 +180,7 @@ Limit └── Sort(Single) ├── output columns: [c (#4), count(t1.number) (#1)] ├── sort keys: [count(t1.number) ASC NULLS LAST] - ├── estimated rows: 2.00 + ├── estimated rows: 1.00 └── HashJoin ├── output columns: [c (#4), count(t1.number) (#1)] ├── join type: RIGHT OUTER @@ -188,7 +188,7 @@ Limit ├── probe keys: [t4.c (#4)] ├── keys is null equal: [false] ├── filters: [] - ├── estimated rows: 2.00 + ├── estimated rows: 1.00 ├── AggregateFinal(Build) │ ├── output columns: [count(t1.number) (#1), t1.number (#0)] │ ├── group by: [number] diff --git a/tests/sqllogictests/suites/mode/standalone/explain/materialized_cte.test b/tests/sqllogictests/suites/mode/standalone/explain/materialized_cte.test index 6776a69cfc4dd..69c2f869f3449 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain/materialized_cte.test +++ b/tests/sqllogictests/suites/mode/standalone/explain/materialized_cte.test @@ -21,7 +21,7 @@ Sequence ├── filters: [] ├── build join filters: │ └── filter id:0, build key:t2.b (#1), probe key:t1.a (#0), filter type:inlist,min_max - ├── estimated rows: 100.00 + ├── estimated rows: 0.50 ├── MaterializeCTERef(Build) │ ├── cte_name: t1 │ ├── cte_schema: [number (#1)] @@ -60,7 +60,7 @@ Sequence ├── filters: [] ├── build join filters: │ └── filter id:0, build key:t2.b (#1), probe key:t1.a (#0), filter type:inlist,min_max - ├── estimated rows: 100.00 + ├── estimated rows: 0.50 ├── MaterializeCTERef(Build) │ ├── cte_name: t2 │ ├── cte_schema: [number (#1)] diff --git a/tests/sqllogictests/suites/mode/standalone/explain/project_set.test b/tests/sqllogictests/suites/mode/standalone/explain/project_set.test index ab6f6d1a5ae17..34fb48ca4fdef 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain/project_set.test +++ b/tests/sqllogictests/suites/mode/standalone/explain/project_set.test @@ -166,7 +166,7 @@ HashJoin ├── probe keys: [CAST(t1.a (#2) AS UInt64 NULL)] ├── keys is null equal: [false] ├── filters: [] -├── estimated rows: 450.00 +├── estimated rows: 2.25 ├── TableScan(Build) │ ├── table: default.system.numbers │ ├── output columns: [number (#3)] diff --git a/tests/sqllogictests/suites/mode/standalone/explain/prune_column.test b/tests/sqllogictests/suites/mode/standalone/explain/prune_column.test index 67543ec537e2c..cd8e53caa2b3e 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain/prune_column.test +++ b/tests/sqllogictests/suites/mode/standalone/explain/prune_column.test @@ -79,7 +79,7 @@ HashJoin ├── probe keys: [t2.b (#7)] ├── keys is null equal: [false] ├── filters: [] -├── estimated rows: 0.20 +├── estimated rows: 0.00 ├── EvalScalar(Build) │ ├── output columns: [a (#1), b (#2)] │ ├── expressions: [numbers.number (#0) + 1, numbers.number (#0) + 1] @@ -121,7 +121,7 @@ HashJoin ├── probe keys: [t1.a (#1)] ├── keys is null equal: [false] ├── filters: [] -├── estimated rows: 2.00 +├── estimated rows: 0.01 ├── AggregateFinal(Build) │ ├── output columns: [COUNT(*) (#12)] │ ├── group by: [] @@ -138,7 +138,7 @@ HashJoin │ ├── probe keys: [t3.b (#10)] │ ├── keys is null equal: [false] │ ├── filters: [] -│ ├── estimated rows: 0.20 +│ ├── estimated rows: 0.00 │ ├── EvalScalar(Build) │ │ ├── output columns: [b (#5)] │ │ ├── expressions: [numbers.number (#3) + 1] diff --git a/tests/sqllogictests/suites/mode/standalone/explain/push_down_filter/push_down_filter_join/push_down_filter_self_join.test b/tests/sqllogictests/suites/mode/standalone/explain/push_down_filter/push_down_filter_join/push_down_filter_self_join.test index ddd365e14102c..0183d46eb5860 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain/push_down_filter/push_down_filter_join/push_down_filter_self_join.test +++ b/tests/sqllogictests/suites/mode/standalone/explain/push_down_filter/push_down_filter_join/push_down_filter_self_join.test @@ -49,7 +49,7 @@ Sequence ├── probe keys: [b1.a (#4)] ├── keys is null equal: [false] ├── filters: [d.b (#5) < d.b (#11)] - ├── estimated rows: 400.00 + ├── estimated rows: 0.40 ├── MaterializeCTERef(Build) │ ├── cte_name: b │ ├── cte_schema: [a (#10), b (#11)] diff --git a/tests/sqllogictests/suites/mode/standalone/explain/select_limit_offset.test b/tests/sqllogictests/suites/mode/standalone/explain/select_limit_offset.test index 314a118a96bf4..6e16ec0003070 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain/select_limit_offset.test +++ b/tests/sqllogictests/suites/mode/standalone/explain/select_limit_offset.test @@ -130,7 +130,7 @@ Limit ├── output columns: [t1.a (#1), t.a (#0)] ├── limit: 2 ├── offset: 0 -├── estimated rows: 2.00 +├── estimated rows: 1.00 └── HashJoin ├── output columns: [t1.a (#1), t.a (#0)] ├── join type: RIGHT OUTER @@ -140,7 +140,7 @@ Limit ├── filters: [] ├── build join filters: │ └── filter id:0, build key:t.a (#0), probe key:t1.a (#1), filter type:inlist,min_max - ├── estimated rows: 2.00 + ├── estimated rows: 1.00 ├── Limit(Build) │ ├── output columns: [t.a (#0)] │ ├── limit: 2 @@ -175,7 +175,7 @@ Limit ├── output columns: [t1.a (#0), t.a (#1)] ├── limit: 2 ├── offset: 0 -├── estimated rows: 2.00 +├── estimated rows: 1.00 └── HashJoin ├── output columns: [t1.a (#0), t.a (#1)] ├── join type: RIGHT OUTER @@ -185,7 +185,7 @@ Limit ├── filters: [] ├── build join filters: │ └── filter id:0, build key:t.a (#1), probe key:t1.a (#0), filter type:inlist,min_max - ├── estimated rows: 2.00 + ├── estimated rows: 1.00 ├── Limit(Build) │ ├── output columns: [t.a (#1)] │ ├── limit: 2 diff --git a/tests/sqllogictests/suites/mode/standalone/explain/subquery.test b/tests/sqllogictests/suites/mode/standalone/explain/subquery.test index 819a2c3b760b9..f6f15d641701a 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain/subquery.test +++ b/tests/sqllogictests/suites/mode/standalone/explain/subquery.test @@ -208,7 +208,7 @@ HashJoin ├── probe keys: [t.number (#0)] ├── keys is null equal: [false] ├── filters: [] -├── estimated rows: 1.00 +├── estimated rows: 0.01 ├── TableScan(Build) │ ├── table: default.system.numbers │ ├── output columns: [number (#1)] @@ -268,7 +268,7 @@ HashJoin ├── probe keys: [t.number (#0)] ├── keys is null equal: [false] ├── filters: [] -├── estimated rows: 1.00 +├── estimated rows: 0.01 ├── TableScan(Build) │ ├── table: default.system.numbers │ ├── output columns: [number (#1)] @@ -361,7 +361,7 @@ HashJoin ├── probe keys: [t.number (#0)] ├── keys is null equal: [false] ├── filters: [t.number (#0) < numbers.number (#1)] -├── estimated rows: 1.00 +├── estimated rows: 0.00 ├── TableScan(Build) │ ├── table: default.system.numbers │ ├── output columns: [number (#1)] @@ -391,7 +391,7 @@ HashJoin ├── probe keys: [t.number (#0)] ├── keys is null equal: [false] ├── filters: [] -├── estimated rows: 1.00 +├── estimated rows: 0.01 ├── TableScan(Build) │ ├── table: default.system.numbers │ ├── output columns: [number (#1)] @@ -423,7 +423,7 @@ HashJoin ├── filters: [] ├── build join filters: │ └── filter id:1, build key:number (#2), probe key:number (#0), filter type:inlist,min_max -├── estimated rows: 0.20 +├── estimated rows: 0.00 ├── HashJoin(Build) │ ├── output columns: [numbers.number (#2)] │ ├── join type: INNER @@ -433,7 +433,7 @@ HashJoin │ ├── filters: [] │ ├── build join filters: │ │ └── filter id:0, build key:number (#2), probe key:t1.number (#1), filter type:inlist,min_max -│ ├── estimated rows: 0.20 +│ ├── estimated rows: 0.00 │ ├── Filter(Build) │ │ ├── output columns: [numbers.number (#2)] │ │ ├── filters: [is_true(TRY_CAST(if(true, TRY_CAST(COUNT(*) (#3) = 1 AS UInt64 NULL), 0) AS Boolean NULL))] @@ -482,47 +482,47 @@ explain select t.number from numbers(1) as t where exists(select * from numbers( ---- HashJoin ├── output columns: [t.number (#0)] -├── join type: LEFT ANTI +├── join type: RIGHT ANTI ├── build keys: [] ├── probe keys: [] ├── keys is null equal: [] ├── filters: [t.number (#0) < t1.number (#2)] -├── estimated rows: 1.00 -├── TableScan(Build) -│ ├── table: default.system.numbers -│ ├── output columns: [number (#2)] -│ ├── read rows: 1 -│ ├── read size: < 1 KiB -│ ├── partitions total: 1 -│ ├── partitions scanned: 1 -│ ├── push downs: [filters: [], limit: NONE] -│ └── estimated rows: 1.00 -└── HashJoin(Probe) - ├── output columns: [t.number (#0)] - ├── join type: LEFT SEMI - ├── build keys: [] - ├── probe keys: [] - ├── keys is null equal: [] - ├── filters: [t.number (#0) > t1.number (#1)] - ├── estimated rows: 1.00 - ├── TableScan(Build) - │ ├── table: default.system.numbers - │ ├── output columns: [number (#1)] - │ ├── read rows: 1 - │ ├── read size: < 1 KiB - │ ├── partitions total: 1 - │ ├── partitions scanned: 1 - │ ├── push downs: [filters: [], limit: NONE] - │ └── estimated rows: 1.00 - └── TableScan(Probe) - ├── table: default.system.numbers - ├── output columns: [number (#0)] - ├── read rows: 1 - ├── read size: < 1 KiB - ├── partitions total: 1 - ├── partitions scanned: 1 - ├── push downs: [filters: [], limit: NONE] - └── estimated rows: 1.00 +├── estimated rows: 0.20 +├── HashJoin(Build) +│ ├── output columns: [t.number (#0)] +│ ├── join type: LEFT SEMI +│ ├── build keys: [] +│ ├── probe keys: [] +│ ├── keys is null equal: [] +│ ├── filters: [t.number (#0) > t1.number (#1)] +│ ├── estimated rows: 0.20 +│ ├── TableScan(Build) +│ │ ├── table: default.system.numbers +│ │ ├── output columns: [number (#1)] +│ │ ├── read rows: 1 +│ │ ├── read size: < 1 KiB +│ │ ├── partitions total: 1 +│ │ ├── partitions scanned: 1 +│ │ ├── push downs: [filters: [], limit: NONE] +│ │ └── estimated rows: 1.00 +│ └── TableScan(Probe) +│ ├── table: default.system.numbers +│ ├── output columns: [number (#0)] +│ ├── read rows: 1 +│ ├── read size: < 1 KiB +│ ├── partitions total: 1 +│ ├── partitions scanned: 1 +│ ├── push downs: [filters: [], limit: NONE] +│ └── estimated rows: 1.00 +└── TableScan(Probe) + ├── table: default.system.numbers + ├── output columns: [number (#2)] + ├── read rows: 1 + ├── read size: < 1 KiB + ├── partitions total: 1 + ├── partitions scanned: 1 + ├── push downs: [filters: [], limit: NONE] + └── estimated rows: 1.00 query T explain select * from (select number as a from numbers(10)) as t(b) where t.b > 5 @@ -781,7 +781,7 @@ HashJoin ├── probe keys: [] ├── keys is null equal: [] ├── filters: [like(t.i (#0), CAST(subquery_1 (#1) AS String NULL))] -├── estimated rows: 3.00 +├── estimated rows: 0.60 ├── ConstantTableScan(Build) │ ├── output columns: [c1 (#1)] │ └── column 0: ['data%'] @@ -834,7 +834,7 @@ HashJoin ├── probe keys: [] ├── keys is null equal: [] ├── filters: [like(t.i (#0), CAST(subquery_1 (#1) AS String NULL), '$')] -├── estimated rows: 3.00 +├── estimated rows: 0.60 ├── ConstantTableScan(Build) │ ├── output columns: [c1 (#1)] │ └── column 0: ['data$%%'] diff --git a/tests/sqllogictests/suites/mode/standalone/explain/update.test b/tests/sqllogictests/suites/mode/standalone/explain/update.test index 3132881b6cb48..a3a95eb36edd7 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain/update.test +++ b/tests/sqllogictests/suites/mode/standalone/explain/update.test @@ -205,7 +205,7 @@ CommitSink │ ├── filter id:0, build key:p.c_code (#9), probe key:c.c_code (#1), filter type:inlist,min_max │ ├── filter id:1, build key:p.id (#8), probe key:c.id (#0), filter type:inlist,min_max │ └── filter id:2, build key:p.me_id (#10), probe key:c.me_id (#2), filter type:inlist,min_max - ├── estimated rows: 3.20 + ├── estimated rows: 0.16 ├── TableScan(Build) │ ├── table: default.default.t2 │ ├── output columns: [id (#8), c_code (#9), me_id (#10), p_id (#11), m_p_id (#12), lkp_id (#13), l_p_id (#14), l_m_p_id (#15)]