@@ -154,13 +154,15 @@ mod test {
154154 // - null_count = 0 (partition values from paths are never null)
155155 // - min/max are the merged partition values across files in the group
156156 // - byte_size = num_rows * 4 (Date32 is 4 bytes per row)
157+ // - distinct_count = Inexact(1) per partition file (single partition value per file),
158+ // preserved via max() when merging stats across partitions
157159 let date32_byte_size = num_rows * 4 ;
158160 column_stats. push ( ColumnStatistics {
159161 null_count : Precision :: Exact ( 0 ) ,
160162 max_value : Precision :: Exact ( ScalarValue :: Date32 ( Some ( max_date) ) ) ,
161163 min_value : Precision :: Exact ( ScalarValue :: Date32 ( Some ( min_date) ) ) ,
162164 sum_value : Precision :: Absent ,
163- distinct_count : Precision :: Absent ,
165+ distinct_count : Precision :: Inexact ( 1 ) ,
164166 byte_size : Precision :: Exact ( date32_byte_size) ,
165167 } ) ;
166168 }
@@ -581,7 +583,7 @@ mod test {
581583 max_value: Precision :: Exact ( ScalarValue :: Date32 ( Some ( 20151 ) ) ) ,
582584 min_value: Precision :: Exact ( ScalarValue :: Date32 ( Some ( 20148 ) ) ) ,
583585 sum_value: Precision :: Absent ,
584- distinct_count: Precision :: Absent ,
586+ distinct_count: Precision :: Inexact ( 1 ) ,
585587 byte_size: Precision :: Absent ,
586588 } ,
587589 // column 2: right.id (Int32, file column from t2) - right partition 0: ids [3,4]
@@ -615,7 +617,7 @@ mod test {
615617 max_value: Precision :: Exact ( ScalarValue :: Date32 ( Some ( 20151 ) ) ) ,
616618 min_value: Precision :: Exact ( ScalarValue :: Date32 ( Some ( 20148 ) ) ) ,
617619 sum_value: Precision :: Absent ,
618- distinct_count: Precision :: Absent ,
620+ distinct_count: Precision :: Inexact ( 1 ) ,
619621 byte_size: Precision :: Absent ,
620622 } ,
621623 // column 2: right.id (Int32, file column from t2) - right partition 1: ids [1,2]
@@ -1251,7 +1253,7 @@ mod test {
12511253 DATE_2025_03_01 ,
12521254 ) ) ) ,
12531255 sum_value: Precision :: Absent ,
1254- distinct_count: Precision :: Absent ,
1256+ distinct_count: Precision :: Inexact ( 1 ) ,
12551257 byte_size: Precision :: Exact ( 8 ) ,
12561258 } ,
12571259 ColumnStatistics :: new_unknown( ) , // window column
@@ -1279,7 +1281,7 @@ mod test {
12791281 DATE_2025_03_03 ,
12801282 ) ) ) ,
12811283 sum_value: Precision :: Absent ,
1282- distinct_count: Precision :: Absent ,
1284+ distinct_count: Precision :: Inexact ( 1 ) ,
12831285 byte_size: Precision :: Exact ( 8 ) ,
12841286 } ,
12851287 ColumnStatistics :: new_unknown( ) , // window column
@@ -1416,6 +1418,8 @@ mod test {
14161418 byte_size: Precision :: Exact ( 16 ) ,
14171419 } ,
14181420 // Left date column: all partitions (2025-03-01..2025-03-04)
1421+ // NDV is Inexact(1) because each Hive partition has exactly 1 distinct date value,
1422+ // and merging takes max as a conservative lower bound
14191423 ColumnStatistics {
14201424 null_count: Precision :: Exact ( 0 ) ,
14211425 max_value: Precision :: Exact ( ScalarValue :: Date32 ( Some (
@@ -1425,7 +1429,7 @@ mod test {
14251429 DATE_2025_03_01 ,
14261430 ) ) ) ,
14271431 sum_value: Precision :: Absent ,
1428- distinct_count: Precision :: Absent ,
1432+ distinct_count: Precision :: Inexact ( 1 ) ,
14291433 byte_size: Precision :: Exact ( 16 ) ,
14301434 } ,
14311435 // Right id column: partition 0 only (id 3..4)
@@ -1438,6 +1442,7 @@ mod test {
14381442 byte_size: Precision :: Exact ( 8 ) ,
14391443 } ,
14401444 // Right date column: partition 0 only (2025-03-01..2025-03-02)
1445+ // NDV is Inexact(1) from the single Hive partition's date value
14411446 ColumnStatistics {
14421447 null_count: Precision :: Exact ( 0 ) ,
14431448 max_value: Precision :: Exact ( ScalarValue :: Date32 ( Some (
@@ -1447,7 +1452,7 @@ mod test {
14471452 DATE_2025_03_01 ,
14481453 ) ) ) ,
14491454 sum_value: Precision :: Absent ,
1450- distinct_count: Precision :: Absent ,
1455+ distinct_count: Precision :: Inexact ( 1 ) ,
14511456 byte_size: Precision :: Exact ( 8 ) ,
14521457 } ,
14531458 ] ,
@@ -1499,7 +1504,7 @@ mod test {
14991504 DATE_2025_03_01 ,
15001505 ) ) ) ,
15011506 sum_value: Precision :: Absent ,
1502- distinct_count: Precision :: Absent ,
1507+ distinct_count: Precision :: Inexact ( 1 ) ,
15031508 byte_size: Precision :: Exact ( 8 ) ,
15041509 } ,
15051510 // Right id column: partition 0 only (id 3..4)
@@ -1521,7 +1526,7 @@ mod test {
15211526 DATE_2025_03_01 ,
15221527 ) ) ) ,
15231528 sum_value: Precision :: Absent ,
1524- distinct_count: Precision :: Absent ,
1529+ distinct_count: Precision :: Inexact ( 1 ) ,
15251530 byte_size: Precision :: Exact ( 8 ) ,
15261531 } ,
15271532 ] ,
@@ -1573,7 +1578,7 @@ mod test {
15731578 DATE_2025_03_01 ,
15741579 ) ) ) ,
15751580 sum_value: Precision :: Absent ,
1576- distinct_count: Precision :: Absent ,
1581+ distinct_count: Precision :: Inexact ( 1 ) ,
15771582 byte_size: Precision :: Exact ( 16 ) ,
15781583 } ,
15791584 // Right id column: all partitions (id 1..4)
@@ -1595,7 +1600,7 @@ mod test {
15951600 DATE_2025_03_01 ,
15961601 ) ) ) ,
15971602 sum_value: Precision :: Absent ,
1598- distinct_count: Precision :: Absent ,
1603+ distinct_count: Precision :: Inexact ( 1 ) ,
15991604 byte_size: Precision :: Exact ( 16 ) ,
16001605 } ,
16011606 ] ,
0 commit comments