Skip to content
Open
Show file tree
Hide file tree
Changes from 5 commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
0aeb7ea
Update delta-rs to add FileFormatOptions
corwinjoy Sep 27, 2025
c081278
Update optimize to use zstd compression by default. Fix clippy warnings.
corwinjoy Sep 29, 2025
9059d8e
Merge branch 'main' into file_format_options_squashed
corwinjoy Sep 29, 2025
a41d3d3
Merge branch 'main' into file_format_options_squashed
corwinjoy Oct 1, 2025
cd88ef8
Fix build error created by merging main
corwinjoy Oct 1, 2025
e5915e0
Merge branch 'main' into file_format_options_squashed
corwinjoy Oct 2, 2025
56ec968
Update optimize.rs to fix merge conflicts from main
corwinjoy Oct 2, 2025
1b7d6c2
cargo format
corwinjoy Oct 2, 2025
4a89682
Merge branch 'main' into file_format_options_squashed
corwinjoy Oct 2, 2025
56a0cb5
Merge latest from main
corwinjoy Oct 2, 2025
73cccbe
Undo refactorization of optimize routines to make diffs clearer
corwinjoy Oct 7, 2025
5359a9e
Move file_format_options into DeltaTableConfig
corwinjoy Oct 13, 2025
2d29203
Merge branch 'main' into file_format_options_squashed
corwinjoy Oct 13, 2025
b9376bd
Fix build errors from merge with main
corwinjoy Oct 13, 2025
4ae8443
Update comment for with_table_config in CreateBuilder
corwinjoy Oct 14, 2025
c7281de
Refined optimize.rs, cargo fmt, comment on builder
corwinjoy Oct 14, 2025
24fba72
Inline and Remove build_writer_properties_factory_ffo Function.
corwinjoy Oct 29, 2025
d71bda5
Inline and Remove to_table_parquet_options_from_ffo Function.
corwinjoy Oct 29, 2025
2c2a7fd
Remove file_format_options argument from the find_files and find_file…
corwinjoy Oct 29, 2025
a0921c4
cargo fmt
corwinjoy Oct 29, 2025
a8aba72
Work in progress. Begin removal of file_format_options from DeltaScan…
corwinjoy Oct 29, 2025
31569a6
Finish removal of file_format_options from DeltaScanBuilder
corwinjoy Oct 29, 2025
762b6b5
cargo fmt
corwinjoy Oct 29, 2025
9d48c94
Remove dead code flagged by clippy
corwinjoy Oct 30, 2025
6913ec8
Merge branch 'main' into file_format_options_squashed
corwinjoy Oct 30, 2025
46097ee
Fix errors introduced by merge from main.
corwinjoy Oct 30, 2025
9495f13
Cargo clippy + fmt
corwinjoy Oct 30, 2025
020d409
Remove extension to create WriterPropertiesBuilder from WriterPropert…
corwinjoy Oct 30, 2025
2d14fbf
Change try_with_config to not be async to match main.
corwinjoy Nov 3, 2025
cd21447
Remove build_writer_properties_factory_wp and convert to trait.
corwinjoy Nov 3, 2025
ac71c65
Convert build_writer_properties_factory_or_default_ffo to trait.
corwinjoy Nov 3, 2025
8047afe
cargo fmt
corwinjoy Nov 3, 2025
a036d7f
Update parquet-key-management to version 0.4.1 to avoid build depende…
corwinjoy Nov 3, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ object_store = { version = "0.12.1" }
parquet = { version = "56.0.0" }

# datafusion
datafusion = "50.0.0"
datafusion = { version = "50.0.0" , features = ["parquet_encryption", "default"]}
datafusion-ffi = "50.0.0"
datafusion-proto = "50.0.0"

Expand All @@ -69,7 +69,7 @@ percent-encoding-rfc3986 = { version = "0.1.3" }
uuid = { version = "1" }

# runtime / async
async-trait = { version = "0.1" }
async-trait = { version = "0.1.89" }
futures = { version = "0.3" }
tokio = { version = "1" }
num_cpus = { version = "1" }
Expand Down
5 changes: 5 additions & 0 deletions crates/core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ datatest-stable = "0.2"
deltalake-test = { path = "../test" }
dotenvy = "0"
fs_extra = "1.2.0"
parquet-key-management = { version = "0.4.0", features = ["_test_utils", "datafusion"] }
pretty_assertions = "1.2.1"
pretty_env_logger = "0.5.0"
rstest = { version = "0.26.1" }
Expand Down Expand Up @@ -143,6 +144,10 @@ required-features = ["datafusion"]
name = "command_vacuum"
required-features = ["datafusion"]

[[test]]
name = "commands_with_encryption"
required-features = ["datafusion"]

[[test]]
name = "commit_info_format"
required-features = ["datafusion"]
Expand Down
16 changes: 14 additions & 2 deletions crates/core/src/delta_datafusion/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ pub mod planner;
mod schema_adapter;
mod table_provider;

use crate::table::file_format_options::{to_table_parquet_options_from_ffo, FileFormatRef};
pub use cdf::scan::DeltaCdfTableProvider;
pub(crate) use table_provider::DeltaScanBuilder;
pub use table_provider::{DeltaScan, DeltaScanConfig, DeltaScanConfigBuilder, DeltaTableProvider};
Expand Down Expand Up @@ -940,6 +941,7 @@ pub(crate) async fn find_files_scan(
snapshot: &DeltaTableState,
log_store: LogStoreRef,
state: &SessionState,
file_format_options: Option<&FileFormatRef>,
expression: Expr,
) -> DeltaResult<Vec<Add>> {
let candidate_map: HashMap<String, Add> = snapshot
Expand All @@ -965,10 +967,13 @@ pub(crate) async fn find_files_scan(
// Add path column
used_columns.push(logical_schema.index_of(scan_config.file_column_name.as_ref().unwrap())?);

let table_parquet_options = to_table_parquet_options_from_ffo(file_format_options);

let scan = DeltaScanBuilder::new(snapshot, log_store, state)
.with_filter(Some(expression.clone()))
.with_projection(Some(&used_columns))
.with_scan_config(scan_config)
.with_parquet_options(table_parquet_options)
.build()
.await?;
let scan = Arc::new(scan);
Expand Down Expand Up @@ -1055,6 +1060,7 @@ pub async fn find_files(
snapshot: &DeltaTableState,
log_store: LogStoreRef,
state: &SessionState,
file_format_options: Option<&FileFormatRef>,
predicate: Option<Expr>,
) -> DeltaResult<FindFiles> {
let current_metadata = snapshot.metadata();
Expand All @@ -1078,8 +1084,14 @@ pub async fn find_files(
partition_scan: true,
})
} else {
let candidates =
find_files_scan(snapshot, log_store, state, predicate.to_owned()).await?;
let candidates = find_files_scan(
snapshot,
log_store,
state,
file_format_options,
predicate.to_owned(),
)
.await?;

Ok(FindFiles {
candidates,
Expand Down
64 changes: 56 additions & 8 deletions crates/core/src/delta_datafusion/table_provider.rs
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ use crate::kernel::{Action, Add, Remove};
use crate::operations::write::writer::{DeltaWriter, WriterConfig};
use crate::operations::write::WriterStatsConfig;
use crate::protocol::{DeltaOperation, SaveMode};
use crate::table::file_format_options::{to_table_parquet_options_from_ffo, FileFormatRef};
use crate::{ensure_table_uri, DeltaTable};
use crate::{logstore::LogStoreRef, table::state::DeltaTableState, DeltaResult, DeltaTableError};
use delta_kernel::table_properties::DataSkippingNumIndexedCols;
Expand Down Expand Up @@ -107,7 +108,7 @@ impl DeltaDataSink {
log_store: LogStoreRef,
snapshot: DeltaTableState,
save_mode: SaveMode,
session_state: Arc<SessionState>,
_session_state: Arc<SessionState>,
) -> datafusion::common::Result<Self> {
let schema = snapshot
.arrow_schema()
Expand Down Expand Up @@ -405,6 +406,7 @@ pub(crate) struct DeltaScanBuilder<'a> {
limit: Option<usize>,
files: Option<&'a [Add]>,
config: Option<DeltaScanConfig>,
parquet_options: Option<TableParquetOptions>,
}

impl<'a> DeltaScanBuilder<'a> {
Expand All @@ -422,9 +424,15 @@ impl<'a> DeltaScanBuilder<'a> {
limit: None,
files: None,
config: None,
parquet_options: None,
}
}

pub fn with_parquet_options(mut self, parquet_options: Option<TableParquetOptions>) -> Self {
self.parquet_options = parquet_options;
self
}

pub fn with_filter(mut self, filter: Option<Expr>) -> Self {
self.filter = filter;
self
Expand Down Expand Up @@ -657,13 +665,30 @@ impl<'a> DeltaScanBuilder<'a> {

let stats = stats.unwrap_or(Statistics::new_unknown(&schema));

let parquet_options = TableParquetOptions {
global: self.session.config().options().execution.parquet.clone(),
..Default::default()
};
let parquet_options = self
.parquet_options
.unwrap_or_else(|| self.session.table_options().parquet.clone());

// We have to set the encryption factory on the ParquetSource based on the Parquet options,
// as this is usually handled by the ParquetFormat type in DataFusion,
// which is not used in delta-rs.
let encryption_factory = parquet_options
.crypto
.factory_id
.as_ref()
.map(|factory_id| {
self.session
.runtime_env()
.parquet_encryption_factory(factory_id)
})
.transpose()?;

let mut file_source = ParquetSource::new(parquet_options);

if let Some(encryption_factory) = encryption_factory {
file_source = file_source.with_encryption_factory(encryption_factory);
}

// Sometimes (i.e Merge) we want to prune files that don't make the
// filter and read the entire contents for files that do match the
// filter
Expand Down Expand Up @@ -743,9 +768,17 @@ impl TableProvider for DeltaTable {
limit: Option<usize>,
) -> Result<Arc<dyn ExecutionPlan>> {
register_store(self.log_store(), session.runtime_env().clone());
if let Some(format_options) = &self.file_format_options {
format_options.update_session(session)?;
}
let filter_expr = conjunction(filters.iter().cloned());

let scan = DeltaScanBuilder::new(self.snapshot()?, self.log_store(), session)
.with_parquet_options(
crate::table::file_format_options::to_table_parquet_options_from_ffo(
self.file_format_options.as_ref(),
),
)
.with_projection(projection)
.with_limit(limit)
.with_filter(filter_expr)
Expand Down Expand Up @@ -776,6 +809,7 @@ pub struct DeltaTableProvider {
config: DeltaScanConfig,
schema: Arc<Schema>,
files: Option<Vec<Add>>,
file_format_options: Option<FileFormatRef>,
}

impl DeltaTableProvider {
Expand All @@ -791,9 +825,15 @@ impl DeltaTableProvider {
log_store,
config,
files: None,
file_format_options: None,
})
}

pub fn with_file_format_options(mut self, file_format_options: Option<FileFormatRef>) -> Self {
self.file_format_options = file_format_options;
self
}

/// Define which files to consider while building a scan, for advanced usecases
pub fn with_files(mut self, files: Vec<Add>) -> DeltaTableProvider {
self.files = Some(files);
Expand Down Expand Up @@ -831,9 +871,17 @@ impl TableProvider for DeltaTableProvider {
limit: Option<usize>,
) -> Result<Arc<dyn ExecutionPlan>> {
register_store(self.log_store.clone(), session.runtime_env().clone());
if let Some(format_options) = &self.file_format_options {
format_options.update_session(session)?;
}

let filter_expr = conjunction(filters.iter().cloned());

let table_parquet_options =
to_table_parquet_options_from_ffo(self.file_format_options.as_ref());

let mut scan = DeltaScanBuilder::new(&self.snapshot, self.log_store.clone(), session)
.with_parquet_options(table_parquet_options)
.with_projection(projection)
.with_limit(limit)
.with_filter(filter_expr)
Expand Down Expand Up @@ -875,9 +923,9 @@ impl TableProvider for DeltaTableProvider {
InsertOp::Append => SaveMode::Append,
InsertOp::Overwrite => SaveMode::Overwrite,
InsertOp::Replace => {
return Err(DataFusionError::Plan(format!(
"Replace operation is not supported for DeltaTableProvider"
)))
return Err(DataFusionError::Plan(
"Replace operation is not supported for DeltaTableProvider".to_string(),
))
}
};

Expand Down
4 changes: 2 additions & 2 deletions crates/core/src/operations/add_column.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
//! Add a new column to a table

use std::sync::Arc;

use delta_kernel::schema::StructType;
use futures::future::BoxFuture;
use itertools::Itertools;
use std::sync::Arc;

use super::{CustomExecuteHandler, Operation};
use crate::kernel::schema::merge_delta_struct;
Expand Down Expand Up @@ -130,6 +129,7 @@ impl std::future::IntoFuture for AddColumnBuilder {
Ok(DeltaTable::new_with_state(
this.log_store,
commit.snapshot(),
None,
))
})
}
Expand Down
1 change: 1 addition & 0 deletions crates/core/src/operations/add_feature.rs
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,7 @@ impl std::future::IntoFuture for AddTableFeatureBuilder {
Ok(DeltaTable::new_with_state(
this.log_store,
commit.snapshot(),
None,
))
})
}
Expand Down
1 change: 1 addition & 0 deletions crates/core/src/operations/constraints.rs
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,7 @@ impl std::future::IntoFuture for ConstraintBuilder {
Ok(DeltaTable::new_with_state(
this.log_store,
commit.snapshot(),
None,
))
})
}
Expand Down
20 changes: 16 additions & 4 deletions crates/core/src/operations/create.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
//! Command for creating a new delta table
// https://github.com/delta-io/delta/blob/master/core/src/main/scala/org/apache/spark/sql/delta/commands/CreateDeltaTableCommand.scala

use std::collections::HashMap;
use std::sync::Arc;

use delta_kernel::schema::MetadataValue;
use futures::future::BoxFuture;
use serde_json::Value;
use std::collections::HashMap;
use std::sync::Arc;
use tracing::log::*;
use uuid::Uuid;

Expand All @@ -21,6 +20,7 @@ use crate::logstore::LogStoreRef;
use crate::protocol::{DeltaOperation, SaveMode};
use crate::table::builder::ensure_table_uri;
use crate::table::config::TableProperty;
use crate::table::file_format_options::FileFormatRef;
use crate::{DeltaTable, DeltaTableBuilder};

#[derive(thiserror::Error, Debug)]
Expand Down Expand Up @@ -61,6 +61,7 @@ pub struct CreateBuilder {
storage_options: Option<HashMap<String, String>>,
actions: Vec<Action>,
log_store: Option<LogStoreRef>,
file_format_options: Option<FileFormatRef>,
configuration: HashMap<String, Option<String>>,
/// Additional information to add to the commit
commit_properties: CommitProperties,
Expand Down Expand Up @@ -98,6 +99,7 @@ impl CreateBuilder {
storage_options: None,
actions: Default::default(),
log_store: None,
file_format_options: None,
configuration: Default::default(),
commit_properties: CommitProperties::default(),
raise_if_key_not_exists: true,
Expand Down Expand Up @@ -238,6 +240,12 @@ impl CreateBuilder {
self
}

// Set format options for underlying table files
pub fn with_file_format_options(mut self, file_format_options: FileFormatRef) -> Self {
self.file_format_options = Some(file_format_options);
self
}

/// Set a custom execute handler, for pre and post execution
pub fn with_custom_execute_handler(mut self, handler: Arc<dyn CustomExecuteHandler>) -> Self {
self.custom_execute_handler = Some(handler);
Expand All @@ -262,7 +270,11 @@ impl CreateBuilder {
let (storage_url, table) = if let Some(log_store) = self.log_store {
(
ensure_table_uri(log_store.root_uri())?.as_str().to_string(),
DeltaTable::new(log_store, Default::default()),
DeltaTable::new(
log_store,
Default::default(),
self.file_format_options.clone(),
),
)
} else {
let storage_url =
Expand Down
Loading
Loading