Skip to content

Commit 8d91cf9

Browse files
committed
Merge remote-tracking branch 'apache/main' into alamb/morsel_api
2 parents 31e161c + 6ef4cef commit 8d91cf9

File tree

105 files changed

+1773
-700
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

105 files changed

+1773
-700
lines changed

datafusion/catalog/src/table.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -486,7 +486,7 @@ pub trait TableProviderFactory: Debug + Sync + Send {
486486
}
487487

488488
/// A trait for table function implementations
489-
pub trait TableFunctionImpl: Debug + Sync + Send {
489+
pub trait TableFunctionImpl: Debug + Sync + Send + Any {
490490
/// Create a table provider
491491
fn call(&self, args: &[Expr]) -> Result<Arc<dyn TableProvider>>;
492492
}

datafusion/common/src/stats.rs

Lines changed: 357 additions & 6 deletions
Large diffs are not rendered by default.

datafusion/core/src/datasource/file_format/csv.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,8 @@ mod tests {
114114
let len = bytes.len() as u64;
115115
let range = 0..len * self.max_iterations;
116116
let arc = self.iterations_detected.clone();
117+
#[expect(clippy::result_large_err)]
118+
// closure only ever returns Ok; Err type is never constructed
117119
let stream = futures::stream::repeat_with(move || {
118120
let arc_inner = arc.clone();
119121
*arc_inner.lock().unwrap() += 1;

datafusion/core/tests/physical_optimizer/partition_statistics.rs

Lines changed: 16 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -154,13 +154,15 @@ mod test {
154154
// - null_count = 0 (partition values from paths are never null)
155155
// - min/max are the merged partition values across files in the group
156156
// - byte_size = num_rows * 4 (Date32 is 4 bytes per row)
157+
// - distinct_count = Inexact(1) per partition file (single partition value per file),
158+
// preserved via max() when merging stats across partitions
157159
let date32_byte_size = num_rows * 4;
158160
column_stats.push(ColumnStatistics {
159161
null_count: Precision::Exact(0),
160162
max_value: Precision::Exact(ScalarValue::Date32(Some(max_date))),
161163
min_value: Precision::Exact(ScalarValue::Date32(Some(min_date))),
162164
sum_value: Precision::Absent,
163-
distinct_count: Precision::Absent,
165+
distinct_count: Precision::Inexact(1),
164166
byte_size: Precision::Exact(date32_byte_size),
165167
});
166168
}
@@ -581,7 +583,7 @@ mod test {
581583
max_value: Precision::Exact(ScalarValue::Date32(Some(20151))),
582584
min_value: Precision::Exact(ScalarValue::Date32(Some(20148))),
583585
sum_value: Precision::Absent,
584-
distinct_count: Precision::Absent,
586+
distinct_count: Precision::Inexact(1),
585587
byte_size: Precision::Absent,
586588
},
587589
// column 2: right.id (Int32, file column from t2) - right partition 0: ids [3,4]
@@ -615,7 +617,7 @@ mod test {
615617
max_value: Precision::Exact(ScalarValue::Date32(Some(20151))),
616618
min_value: Precision::Exact(ScalarValue::Date32(Some(20148))),
617619
sum_value: Precision::Absent,
618-
distinct_count: Precision::Absent,
620+
distinct_count: Precision::Inexact(1),
619621
byte_size: Precision::Absent,
620622
},
621623
// column 2: right.id (Int32, file column from t2) - right partition 1: ids [1,2]
@@ -1251,7 +1253,7 @@ mod test {
12511253
DATE_2025_03_01,
12521254
))),
12531255
sum_value: Precision::Absent,
1254-
distinct_count: Precision::Absent,
1256+
distinct_count: Precision::Inexact(1),
12551257
byte_size: Precision::Exact(8),
12561258
},
12571259
ColumnStatistics::new_unknown(), // window column
@@ -1279,7 +1281,7 @@ mod test {
12791281
DATE_2025_03_03,
12801282
))),
12811283
sum_value: Precision::Absent,
1282-
distinct_count: Precision::Absent,
1284+
distinct_count: Precision::Inexact(1),
12831285
byte_size: Precision::Exact(8),
12841286
},
12851287
ColumnStatistics::new_unknown(), // window column
@@ -1416,6 +1418,8 @@ mod test {
14161418
byte_size: Precision::Exact(16),
14171419
},
14181420
// Left date column: all partitions (2025-03-01..2025-03-04)
1421+
// NDV is Inexact(1) because each Hive partition has exactly 1 distinct date value,
1422+
// and merging takes max as a conservative lower bound
14191423
ColumnStatistics {
14201424
null_count: Precision::Exact(0),
14211425
max_value: Precision::Exact(ScalarValue::Date32(Some(
@@ -1425,7 +1429,7 @@ mod test {
14251429
DATE_2025_03_01,
14261430
))),
14271431
sum_value: Precision::Absent,
1428-
distinct_count: Precision::Absent,
1432+
distinct_count: Precision::Inexact(1),
14291433
byte_size: Precision::Exact(16),
14301434
},
14311435
// Right id column: partition 0 only (id 3..4)
@@ -1438,6 +1442,7 @@ mod test {
14381442
byte_size: Precision::Exact(8),
14391443
},
14401444
// Right date column: partition 0 only (2025-03-01..2025-03-02)
1445+
// NDV is Inexact(1) from the single Hive partition's date value
14411446
ColumnStatistics {
14421447
null_count: Precision::Exact(0),
14431448
max_value: Precision::Exact(ScalarValue::Date32(Some(
@@ -1447,7 +1452,7 @@ mod test {
14471452
DATE_2025_03_01,
14481453
))),
14491454
sum_value: Precision::Absent,
1450-
distinct_count: Precision::Absent,
1455+
distinct_count: Precision::Inexact(1),
14511456
byte_size: Precision::Exact(8),
14521457
},
14531458
],
@@ -1499,7 +1504,7 @@ mod test {
14991504
DATE_2025_03_01,
15001505
))),
15011506
sum_value: Precision::Absent,
1502-
distinct_count: Precision::Absent,
1507+
distinct_count: Precision::Inexact(1),
15031508
byte_size: Precision::Exact(8),
15041509
},
15051510
// Right id column: partition 0 only (id 3..4)
@@ -1521,7 +1526,7 @@ mod test {
15211526
DATE_2025_03_01,
15221527
))),
15231528
sum_value: Precision::Absent,
1524-
distinct_count: Precision::Absent,
1529+
distinct_count: Precision::Inexact(1),
15251530
byte_size: Precision::Exact(8),
15261531
},
15271532
],
@@ -1573,7 +1578,7 @@ mod test {
15731578
DATE_2025_03_01,
15741579
))),
15751580
sum_value: Precision::Absent,
1576-
distinct_count: Precision::Absent,
1581+
distinct_count: Precision::Inexact(1),
15771582
byte_size: Precision::Exact(16),
15781583
},
15791584
// Right id column: all partitions (id 1..4)
@@ -1595,7 +1600,7 @@ mod test {
15951600
DATE_2025_03_01,
15961601
))),
15971602
sum_value: Precision::Absent,
1598-
distinct_count: Precision::Absent,
1603+
distinct_count: Precision::Inexact(1),
15991604
byte_size: Precision::Exact(16),
16001605
},
16011606
],

datafusion/core/tests/sql/path_partition.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -735,6 +735,8 @@ impl ObjectStore for MirroringObjectStore {
735735
.map(|mut x| x.next().is_some())
736736
.unwrap_or(false);
737737

738+
#[expect(clippy::result_large_err)]
739+
// closure only ever returns Ok; Err type is never constructed
738740
filter.then(|| {
739741
Ok(ObjectMeta {
740742
location,

0 commit comments

Comments
 (0)