Skip to content

Commit 1d31dfe

Browse files
committed
Merge branch 'main' into udaf-schema-16997
2 parents d7a886c + f0630fb commit 1d31dfe

File tree

77 files changed

+972
-1260
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

77 files changed

+972
-1260
lines changed

Cargo.lock

Lines changed: 11 additions & 10 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

datafusion/common/src/config.rs

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -559,12 +559,6 @@ config_namespace! {
559559
/// (reading) Use any available bloom filters when reading parquet files
560560
pub bloom_filter_on_read: bool, default = true
561561

562-
/// (reading) Whether or not to enable the caching of embedded metadata of Parquet files
563-
/// (footer and page metadata). Enabling it can offer substantial performance improvements
564-
/// for repeated queries over large files. By default, the cache is automatically
565-
/// invalidated when the underlying file is modified.
566-
pub cache_metadata: bool, default = false
567-
568562
// The following options affect writing to parquet files
569563
// and map to parquet::file::properties::WriterProperties
570564

datafusion/common/src/file_options/parquet_writer.rs

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -234,7 +234,6 @@ impl ParquetOptions {
234234
binary_as_string: _, // not used for writer props
235235
coerce_int96: _, // not used for writer props
236236
skip_arrow_metadata: _,
237-
cache_metadata: _,
238237
} = self;
239238

240239
let mut builder = WriterProperties::builder()
@@ -502,7 +501,6 @@ mod tests {
502501
binary_as_string: defaults.binary_as_string,
503502
skip_arrow_metadata: defaults.skip_arrow_metadata,
504503
coerce_int96: None,
505-
cache_metadata: defaults.cache_metadata,
506504
}
507505
}
508506

@@ -613,7 +611,6 @@ mod tests {
613611
binary_as_string: global_options_defaults.binary_as_string,
614612
skip_arrow_metadata: global_options_defaults.skip_arrow_metadata,
615613
coerce_int96: None,
616-
cache_metadata: global_options_defaults.cache_metadata,
617614
},
618615
column_specific_options,
619616
key_value_metadata,

datafusion/common/src/utils/mod.rs

Lines changed: 0 additions & 116 deletions
Original file line numberDiff line numberDiff line change
@@ -444,94 +444,6 @@ impl SingleRowListArrayBuilder {
444444
}
445445
}
446446

447-
/// Wrap an array into a single element `ListArray`.
448-
/// For example `[1, 2, 3]` would be converted into `[[1, 2, 3]]`
449-
/// The field in the list array is nullable.
450-
#[deprecated(
451-
since = "44.0.0",
452-
note = "please use `SingleRowListArrayBuilder` instead"
453-
)]
454-
pub fn array_into_list_array_nullable(arr: ArrayRef) -> ListArray {
455-
SingleRowListArrayBuilder::new(arr)
456-
.with_nullable(true)
457-
.build_list_array()
458-
}
459-
460-
/// Wrap an array into a single element `ListArray`.
461-
/// For example `[1, 2, 3]` would be converted into `[[1, 2, 3]]`
462-
#[deprecated(
463-
since = "44.0.0",
464-
note = "please use `SingleRowListArrayBuilder` instead"
465-
)]
466-
pub fn array_into_list_array(arr: ArrayRef, nullable: bool) -> ListArray {
467-
SingleRowListArrayBuilder::new(arr)
468-
.with_nullable(nullable)
469-
.build_list_array()
470-
}
471-
472-
#[deprecated(
473-
since = "44.0.0",
474-
note = "please use `SingleRowListArrayBuilder` instead"
475-
)]
476-
pub fn array_into_list_array_with_field_name(
477-
arr: ArrayRef,
478-
nullable: bool,
479-
field_name: &str,
480-
) -> ListArray {
481-
SingleRowListArrayBuilder::new(arr)
482-
.with_nullable(nullable)
483-
.with_field_name(Some(field_name.to_string()))
484-
.build_list_array()
485-
}
486-
487-
/// Wrap an array into a single element `LargeListArray`.
488-
/// For example `[1, 2, 3]` would be converted into `[[1, 2, 3]]`
489-
#[deprecated(
490-
since = "44.0.0",
491-
note = "please use `SingleRowListArrayBuilder` instead"
492-
)]
493-
pub fn array_into_large_list_array(arr: ArrayRef) -> LargeListArray {
494-
SingleRowListArrayBuilder::new(arr).build_large_list_array()
495-
}
496-
497-
#[deprecated(
498-
since = "44.0.0",
499-
note = "please use `SingleRowListArrayBuilder` instead"
500-
)]
501-
pub fn array_into_large_list_array_with_field_name(
502-
arr: ArrayRef,
503-
field_name: &str,
504-
) -> LargeListArray {
505-
SingleRowListArrayBuilder::new(arr)
506-
.with_field_name(Some(field_name.to_string()))
507-
.build_large_list_array()
508-
}
509-
510-
#[deprecated(
511-
since = "44.0.0",
512-
note = "please use `SingleRowListArrayBuilder` instead"
513-
)]
514-
pub fn array_into_fixed_size_list_array(
515-
arr: ArrayRef,
516-
list_size: usize,
517-
) -> FixedSizeListArray {
518-
SingleRowListArrayBuilder::new(arr).build_fixed_size_list_array(list_size)
519-
}
520-
521-
#[deprecated(
522-
since = "44.0.0",
523-
note = "please use `SingleRowListArrayBuilder` instead"
524-
)]
525-
pub fn array_into_fixed_size_list_array_with_field_name(
526-
arr: ArrayRef,
527-
list_size: usize,
528-
field_name: &str,
529-
) -> FixedSizeListArray {
530-
SingleRowListArrayBuilder::new(arr)
531-
.with_field_name(Some(field_name.to_string()))
532-
.build_fixed_size_list_array(list_size)
533-
}
534-
535447
/// Wrap arrays into a single element `ListArray`.
536448
///
537449
/// Example:
@@ -832,21 +744,6 @@ pub fn set_difference<T: Borrow<usize>, S: Borrow<usize>>(
832744
.collect()
833745
}
834746

835-
/// Checks whether the given index sequence is monotonically non-decreasing.
836-
#[deprecated(since = "45.0.0", note = "Use std::Iterator::is_sorted instead")]
837-
pub fn is_sorted<T: Borrow<usize>>(sequence: impl IntoIterator<Item = T>) -> bool {
838-
// TODO: Remove this function when `is_sorted` graduates from Rust nightly.
839-
let mut previous = 0;
840-
for item in sequence.into_iter() {
841-
let current = *item.borrow();
842-
if current < previous {
843-
return false;
844-
}
845-
previous = current;
846-
}
847-
true
848-
}
849-
850747
/// Find indices of each element in `targets` inside `items`. If one of the
851748
/// elements is absent in `items`, returns an error.
852749
pub fn find_indices<T: PartialEq, S: Borrow<T>>(
@@ -1274,19 +1171,6 @@ mod tests {
12741171
assert_eq!(set_difference([3, 4, 0], [4, 1, 2]), vec![3, 0]);
12751172
}
12761173

1277-
#[test]
1278-
#[expect(deprecated)]
1279-
fn test_is_sorted() {
1280-
assert!(is_sorted::<usize>([]));
1281-
assert!(is_sorted([0]));
1282-
assert!(is_sorted([0, 3, 4]));
1283-
assert!(is_sorted([0, 1, 2]));
1284-
assert!(is_sorted([0, 1, 4]));
1285-
assert!(is_sorted([0usize; 0]));
1286-
assert!(is_sorted([1, 2]));
1287-
assert!(!is_sorted([3, 2]));
1288-
}
1289-
12901174
#[test]
12911175
fn test_find_indices() -> Result<()> {
12921176
assert_eq!(find_indices(&[0, 3, 4], [0, 3, 4])?, vec![0, 1, 2]);

datafusion/core/src/datasource/file_format/options.rs

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -254,11 +254,6 @@ pub struct ParquetReadOptions<'a> {
254254
pub file_sort_order: Vec<Vec<SortExpr>>,
255255
/// Properties for decryption of Parquet files that use modular encryption
256256
pub file_decryption_properties: Option<ConfigFileDecryptionProperties>,
257-
/// Whether or not to enable the caching of embedded metadata of this Parquet file (footer and
258-
/// page metadata). Enabling it can offer substantial performance improvements for repeated
259-
/// queries over large files. By default, the cache is automatically invalidated when the
260-
/// underlying file is modified.
261-
pub cache_metadata: Option<bool>,
262257
}
263258

264259
impl Default for ParquetReadOptions<'_> {
@@ -271,7 +266,6 @@ impl Default for ParquetReadOptions<'_> {
271266
schema: None,
272267
file_sort_order: vec![],
273268
file_decryption_properties: None,
274-
cache_metadata: None,
275269
}
276270
}
277271
}
@@ -331,12 +325,6 @@ impl<'a> ParquetReadOptions<'a> {
331325
self.file_decryption_properties = Some(file_decryption_properties);
332326
self
333327
}
334-
335-
/// Specify whether to enable or not metadata caching
336-
pub fn cache_metadata(mut self, cache_metadata: bool) -> Self {
337-
self.cache_metadata = Some(cache_metadata);
338-
self
339-
}
340328
}
341329

342330
/// Options that control the reading of ARROW files.
@@ -602,9 +590,6 @@ impl ReadOptions<'_> for ParquetReadOptions<'_> {
602590
if let Some(file_decryption_properties) = &self.file_decryption_properties {
603591
options.crypto.file_decryption = Some(file_decryption_properties.clone());
604592
}
605-
if let Some(cache_metadata) = self.cache_metadata {
606-
options.global.cache_metadata = cache_metadata;
607-
}
608593
let mut file_format = ParquetFormat::new().with_options(options);
609594

610595
if let Some(parquet_pruning) = self.parquet_pruning {

datafusion/core/src/execution/session_state.rs

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -274,17 +274,6 @@ impl Session for SessionState {
274274
}
275275

276276
impl SessionState {
277-
/// Returns new [`SessionState`] using the provided
278-
/// [`SessionConfig`] and [`RuntimeEnv`].
279-
#[deprecated(since = "41.0.0", note = "Use SessionStateBuilder")]
280-
pub fn new_with_config_rt(config: SessionConfig, runtime: Arc<RuntimeEnv>) -> Self {
281-
SessionStateBuilder::new()
282-
.with_config(config)
283-
.with_runtime_env(runtime)
284-
.with_default_features()
285-
.build()
286-
}
287-
288277
pub(crate) fn resolve_table_ref(
289278
&self,
290279
table_ref: impl Into<TableReference>,

datafusion/core/src/lib.rs

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -744,8 +744,10 @@ pub mod physical_planner;
744744
pub mod prelude;
745745
pub mod scalar;
746746

747-
// re-export dependencies from arrow-rs to minimize version maintenance for crate users
747+
// Re-export dependencies that are part of DataFusion public API (e.g. via DataFusionError)
748748
pub use arrow;
749+
pub use object_store;
750+
749751
#[cfg(feature = "parquet")]
750752
pub use parquet;
751753

@@ -828,13 +830,6 @@ pub mod functions_nested {
828830
pub use datafusion_functions_nested::*;
829831
}
830832

831-
/// re-export of [`datafusion_functions_nested`] crate as [`functions_array`] for backward compatibility, if "nested_expressions" feature is enabled
832-
#[deprecated(since = "41.0.0", note = "use datafusion-functions-nested instead")]
833-
pub mod functions_array {
834-
#[cfg(feature = "nested_expressions")]
835-
pub use datafusion_functions_nested::*;
836-
}
837-
838833
/// re-export of [`datafusion_functions_aggregate`] crate
839834
pub mod functions_aggregate {
840835
pub use datafusion_functions_aggregate::*;

datafusion/core/src/test/object_store.rs

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -17,18 +17,20 @@
1717

1818
//! Object store implementation used for testing
1919
20-
use crate::execution::context::SessionState;
21-
use crate::execution::session_state::SessionStateBuilder;
22-
use crate::prelude::SessionContext;
23-
use futures::stream::BoxStream;
24-
use futures::FutureExt;
25-
use object_store::{
26-
memory::InMemory, path::Path, Error, GetOptions, GetResult, ListResult,
27-
MultipartUpload, ObjectMeta, ObjectStore, PutMultipartOptions, PutOptions,
28-
PutPayload, PutResult,
20+
use crate::{
21+
execution::{context::SessionState, session_state::SessionStateBuilder},
22+
object_store::{
23+
memory::InMemory, path::Path, Error, GetOptions, GetResult, ListResult,
24+
MultipartUpload, ObjectMeta, ObjectStore, PutMultipartOptions, PutOptions,
25+
PutPayload, PutResult,
26+
},
27+
prelude::SessionContext,
28+
};
29+
use futures::{stream::BoxStream, FutureExt};
30+
use std::{
31+
fmt::{Debug, Display, Formatter},
32+
sync::Arc,
2933
};
30-
use std::fmt::{Debug, Display, Formatter};
31-
use std::sync::Arc;
3234
use tokio::{
3335
sync::Barrier,
3436
time::{timeout, Duration},

datafusion/core/tests/parquet/page_pruning.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -903,8 +903,8 @@ async fn without_pushdown_filter() {
903903
)
904904
.unwrap();
905905

906-
// Without filter will not read pageIndex.
907-
assert!(bytes_scanned_with_filter > bytes_scanned_without_filter);
906+
// Same amount of bytes are scanned when defaulting to cache parquet metadata
907+
assert!(bytes_scanned_with_filter == bytes_scanned_without_filter);
908908
}
909909

910910
#[tokio::test]

0 commit comments

Comments
 (0)