Skip to content

Commit 75d2473

Browse files
authored
Remove SchemaAdapter (#19345)
Closes #16800 We could leave some of these methods around as deprecated and make them no-ops but I'd be afraid that would create a false sense of security (compiles but behaves wrong at runtime).
1 parent 8550010 commit 75d2473

File tree

26 files changed

+531
-2299
lines changed

26 files changed

+531
-2299
lines changed

datafusion-examples/examples/custom_data_source/default_column_values.rs

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -63,9 +63,8 @@ const DEFAULT_VALUE_METADATA_KEY: &str = "example.default_value";
6363
/// 4. Use the DefaultPhysicalExprAdapter as a fallback for standard schema adaptation
6464
/// 5. Convert string default values to proper types using `ScalarValue::cast_to()` at planning time
6565
///
66-
/// Important: PhysicalExprAdapter is specifically designed for rewriting filter predicates
67-
/// that get pushed down to file scans. For handling missing columns in projections,
68-
/// other mechanisms in DataFusion are used (like SchemaAdapter).
66+
/// Important: PhysicalExprAdapter handles rewriting both filter predicates and projection
67+
/// expressions for file scans, including handling missing columns.
6968
///
7069
/// The metadata-based approach provides a flexible way to store default values as strings
7170
/// and cast them to the appropriate types at planning time, avoiding runtime overhead.
@@ -144,8 +143,6 @@ pub async fn default_column_values() -> Result<()> {
144143
);
145144
println!("4. Default values from metadata are cast to proper types at planning time");
146145
println!("5. The DefaultPhysicalExprAdapter handles other schema adaptations");
147-
println!("\nNote: PhysicalExprAdapter is specifically for filter predicates.");
148-
println!("For projection columns, different mechanisms handle missing columns.");
149146

150147
Ok(())
151148
}

datafusion-examples/examples/data_io/parquet_index.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -511,7 +511,7 @@ impl ParquetMetadataIndexBuilder {
511511

512512
// Get the schema of the file. A real system might have to handle the
513513
// case where the schema of the file is not the same as the schema of
514-
// the other files e.g. using SchemaAdapter.
514+
// the other files e.g. using PhysicalExprAdapterFactory.
515515
if self.file_schema.is_none() {
516516
self.file_schema = Some(reader.schema().clone());
517517
}

datafusion/catalog-listing/src/config.rs

Lines changed: 23 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ use datafusion_catalog::Session;
2121
use datafusion_common::{config_err, internal_err};
2222
use datafusion_datasource::ListingTableUrl;
2323
use datafusion_datasource::file_compression_type::FileCompressionType;
24+
#[expect(deprecated)]
2425
use datafusion_datasource::schema_adapter::SchemaAdapterFactory;
2526
use datafusion_physical_expr_adapter::PhysicalExprAdapterFactory;
2627
use std::str::FromStr;
@@ -44,15 +45,12 @@ pub enum SchemaSource {
4445
/// # Schema Evolution Support
4546
///
4647
/// This configuration supports schema evolution through the optional
47-
/// [`SchemaAdapterFactory`]. You might want to override the default factory when you need:
48+
/// [`PhysicalExprAdapterFactory`]. You might want to override the default factory when you need:
4849
///
4950
/// - **Type coercion requirements**: When you need custom logic for converting between
5051
/// different Arrow data types (e.g., Int32 ↔ Int64, Utf8 ↔ LargeUtf8)
5152
/// - **Column mapping**: You need to map columns with a legacy name to a new name
5253
/// - **Custom handling of missing columns**: By default they are filled in with nulls, but you may e.g. want to fill them in with `0` or `""`.
53-
///
54-
/// If not specified, a [`datafusion_datasource::schema_adapter::DefaultSchemaAdapterFactory`]
55-
/// will be used, which handles basic schema compatibility cases.
5654
#[derive(Debug, Clone, Default)]
5755
pub struct ListingTableConfig {
5856
/// Paths on the `ObjectStore` for creating [`crate::ListingTable`].
@@ -68,8 +66,6 @@ pub struct ListingTableConfig {
6866
pub options: Option<ListingOptions>,
6967
/// Tracks the source of the schema information
7068
pub(crate) schema_source: SchemaSource,
71-
/// Optional [`SchemaAdapterFactory`] for creating schema adapters
72-
pub(crate) schema_adapter_factory: Option<Arc<dyn SchemaAdapterFactory>>,
7369
/// Optional [`PhysicalExprAdapterFactory`] for creating physical expression adapters
7470
pub(crate) expr_adapter_factory: Option<Arc<dyn PhysicalExprAdapterFactory>>,
7571
}
@@ -218,8 +214,7 @@ impl ListingTableConfig {
218214
file_schema,
219215
options: _,
220216
schema_source,
221-
schema_adapter_factory,
222-
expr_adapter_factory: physical_expr_adapter_factory,
217+
expr_adapter_factory,
223218
} = self;
224219

225220
let (schema, new_schema_source) = match file_schema {
@@ -241,8 +236,7 @@ impl ListingTableConfig {
241236
file_schema: Some(schema),
242237
options: Some(options),
243238
schema_source: new_schema_source,
244-
schema_adapter_factory,
245-
expr_adapter_factory: physical_expr_adapter_factory,
239+
expr_adapter_factory,
246240
})
247241
}
248242
None => internal_err!("No `ListingOptions` set for inferring schema"),
@@ -282,71 +276,18 @@ impl ListingTableConfig {
282276
file_schema: self.file_schema,
283277
options: Some(options),
284278
schema_source: self.schema_source,
285-
schema_adapter_factory: self.schema_adapter_factory,
286279
expr_adapter_factory: self.expr_adapter_factory,
287280
})
288281
}
289282
None => config_err!("No `ListingOptions` set for inferring schema"),
290283
}
291284
}
292285

293-
/// Set the [`SchemaAdapterFactory`] for the [`crate::ListingTable`]
294-
///
295-
/// The schema adapter factory is used to create schema adapters that can
296-
/// handle schema evolution and type conversions when reading files with
297-
/// different schemas than the table schema.
298-
///
299-
/// If not provided, a default schema adapter factory will be used.
300-
///
301-
/// # Example: Custom Schema Adapter for Type Coercion
302-
/// ```rust
303-
/// # use std::sync::Arc;
304-
/// # use datafusion_catalog_listing::{ListingTableConfig, ListingOptions};
305-
/// # use datafusion_datasource::schema_adapter::{SchemaAdapterFactory, SchemaAdapter};
306-
/// # use datafusion_datasource::ListingTableUrl;
307-
/// # use datafusion_datasource_parquet::file_format::ParquetFormat;
308-
/// # use arrow::datatypes::{SchemaRef, Schema, Field, DataType};
309-
/// #
310-
/// # #[derive(Debug)]
311-
/// # struct MySchemaAdapterFactory;
312-
/// # impl SchemaAdapterFactory for MySchemaAdapterFactory {
313-
/// # fn create(&self, _projected_table_schema: SchemaRef, _file_schema: SchemaRef) -> Box<dyn SchemaAdapter> {
314-
/// # unimplemented!()
315-
/// # }
316-
/// # }
317-
/// # let table_paths = ListingTableUrl::parse("file:///path/to/data").unwrap();
318-
/// # let listing_options = ListingOptions::new(Arc::new(ParquetFormat::default()));
319-
/// # let table_schema = Arc::new(Schema::new(vec![Field::new("id", DataType::Int64, false)]));
320-
/// let config = ListingTableConfig::new(table_paths)
321-
/// .with_listing_options(listing_options)
322-
/// .with_schema(table_schema)
323-
/// .with_schema_adapter_factory(Arc::new(MySchemaAdapterFactory));
324-
/// ```
325-
pub fn with_schema_adapter_factory(
326-
self,
327-
schema_adapter_factory: Arc<dyn SchemaAdapterFactory>,
328-
) -> Self {
329-
Self {
330-
schema_adapter_factory: Some(schema_adapter_factory),
331-
..self
332-
}
333-
}
334-
335-
/// Get the [`SchemaAdapterFactory`] for this configuration
336-
pub fn schema_adapter_factory(&self) -> Option<&Arc<dyn SchemaAdapterFactory>> {
337-
self.schema_adapter_factory.as_ref()
338-
}
339-
340286
/// Set the [`PhysicalExprAdapterFactory`] for the [`crate::ListingTable`]
341287
///
342288
/// The expression adapter factory is used to create physical expression adapters that can
343289
/// handle schema evolution and type conversions when evaluating expressions
344290
/// with different schemas than the table schema.
345-
///
346-
/// If not provided, a default physical expression adapter factory will be used unless a custom
347-
/// `SchemaAdapterFactory` is set, in which case only the `SchemaAdapterFactory` will be used.
348-
///
349-
/// See <https://github.com/apache/datafusion/issues/16800> for details on this transition.
350291
pub fn with_expr_adapter_factory(
351292
self,
352293
expr_adapter_factory: Arc<dyn PhysicalExprAdapterFactory>,
@@ -356,4 +297,23 @@ impl ListingTableConfig {
356297
..self
357298
}
358299
}
300+
301+
/// Deprecated: Set the [`SchemaAdapterFactory`] for the [`crate::ListingTable`]
302+
///
303+
/// `SchemaAdapterFactory` has been removed. Use [`Self::with_expr_adapter_factory`]
304+
/// and `PhysicalExprAdapterFactory` instead. See `upgrading.md` for more details.
305+
///
306+
/// This method is a no-op and returns `self` unchanged.
307+
#[deprecated(
308+
since = "52.0.0",
309+
note = "SchemaAdapterFactory has been removed. Use with_expr_adapter_factory and PhysicalExprAdapterFactory instead. See upgrading.md for more details."
310+
)]
311+
#[expect(deprecated)]
312+
pub fn with_schema_adapter_factory(
313+
self,
314+
_schema_adapter_factory: Arc<dyn SchemaAdapterFactory>,
315+
) -> Self {
316+
// No-op - just return self unchanged
317+
self
318+
}
359319
}

datafusion/catalog-listing/src/table.rs

Lines changed: 30 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ use datafusion_datasource::file::FileSource;
2929
use datafusion_datasource::file_groups::FileGroup;
3030
use datafusion_datasource::file_scan_config::{FileScanConfig, FileScanConfigBuilder};
3131
use datafusion_datasource::file_sink_config::FileSinkConfig;
32+
#[expect(deprecated)]
3233
use datafusion_datasource::schema_adapter::SchemaAdapterFactory;
3334
use datafusion_datasource::{
3435
ListingTableUrl, PartitionedFile, TableSchema, compute_all_files_statistics,
@@ -191,8 +192,6 @@ pub struct ListingTable {
191192
constraints: Constraints,
192193
/// Column default expressions for columns that are not physically present in the data files
193194
column_defaults: HashMap<String, Expr>,
194-
/// Optional [`SchemaAdapterFactory`] for creating schema adapters
195-
schema_adapter_factory: Option<Arc<dyn SchemaAdapterFactory>>,
196195
/// Optional [`PhysicalExprAdapterFactory`] for creating physical expression adapters
197196
expr_adapter_factory: Option<Arc<dyn PhysicalExprAdapterFactory>>,
198197
}
@@ -235,7 +234,6 @@ impl ListingTable {
235234
collected_statistics: Arc::new(DefaultFileStatisticsCache::default()),
236235
constraints: Constraints::default(),
237236
column_defaults: HashMap::new(),
238-
schema_adapter_factory: config.schema_adapter_factory,
239237
expr_adapter_factory: config.expr_adapter_factory,
240238
};
241239

@@ -290,48 +288,42 @@ impl ListingTable {
290288
self.schema_source
291289
}
292290

293-
/// Set the [`SchemaAdapterFactory`] for this [`ListingTable`]
291+
/// Deprecated: Set the [`SchemaAdapterFactory`] for this [`ListingTable`]
294292
///
295-
/// The schema adapter factory is used to create schema adapters that can
296-
/// handle schema evolution and type conversions when reading files with
297-
/// different schemas than the table schema.
293+
/// `SchemaAdapterFactory` has been removed. Use [`ListingTableConfig::with_expr_adapter_factory`]
294+
/// and `PhysicalExprAdapterFactory` instead. See `upgrading.md` for more details.
298295
///
299-
/// # Example: Adding Schema Evolution Support
300-
/// ```rust
301-
/// # use std::sync::Arc;
302-
/// # use datafusion_catalog_listing::{ListingTable, ListingTableConfig, ListingOptions};
303-
/// # use datafusion_datasource::ListingTableUrl;
304-
/// # use datafusion_datasource::schema_adapter::{DefaultSchemaAdapterFactory, SchemaAdapter};
305-
/// # use datafusion_datasource_parquet::file_format::ParquetFormat;
306-
/// # use arrow::datatypes::{SchemaRef, Schema, Field, DataType};
307-
/// # let table_path = ListingTableUrl::parse("file:///path/to/data").unwrap();
308-
/// # let options = ListingOptions::new(Arc::new(ParquetFormat::default()));
309-
/// # let schema = Arc::new(Schema::new(vec![Field::new("id", DataType::Int64, false)]));
310-
/// # let config = ListingTableConfig::new(table_path).with_listing_options(options).with_schema(schema);
311-
/// # let table = ListingTable::try_new(config).unwrap();
312-
/// let table_with_evolution = table
313-
/// .with_schema_adapter_factory(Arc::new(DefaultSchemaAdapterFactory));
314-
/// ```
315-
/// See [`ListingTableConfig::with_schema_adapter_factory`] for an example of custom SchemaAdapterFactory.
296+
/// This method is a no-op and returns `self` unchanged.
297+
#[deprecated(
298+
since = "52.0.0",
299+
note = "SchemaAdapterFactory has been removed. Use ListingTableConfig::with_expr_adapter_factory and PhysicalExprAdapterFactory instead. See upgrading.md for more details."
300+
)]
301+
#[expect(deprecated)]
316302
pub fn with_schema_adapter_factory(
317303
self,
318-
schema_adapter_factory: Arc<dyn SchemaAdapterFactory>,
304+
_schema_adapter_factory: Arc<dyn SchemaAdapterFactory>,
319305
) -> Self {
320-
Self {
321-
schema_adapter_factory: Some(schema_adapter_factory),
322-
..self
323-
}
306+
// No-op - just return self unchanged
307+
self
324308
}
325309

326-
/// Get the [`SchemaAdapterFactory`] for this table
327-
pub fn schema_adapter_factory(&self) -> Option<&Arc<dyn SchemaAdapterFactory>> {
328-
self.schema_adapter_factory.as_ref()
310+
/// Deprecated: Returns the [`SchemaAdapterFactory`] used by this [`ListingTable`].
311+
///
312+
/// `SchemaAdapterFactory` has been removed. Use `PhysicalExprAdapterFactory` instead.
313+
/// See `upgrading.md` for more details.
314+
///
315+
/// Always returns `None`.
316+
#[deprecated(
317+
since = "52.0.0",
318+
note = "SchemaAdapterFactory has been removed. Use PhysicalExprAdapterFactory instead. See upgrading.md for more details."
319+
)]
320+
#[expect(deprecated)]
321+
pub fn schema_adapter_factory(&self) -> Option<Arc<dyn SchemaAdapterFactory>> {
322+
None
329323
}
330324

331-
/// Creates a file source and applies schema adapter factory if available
332-
fn create_file_source_with_schema_adapter(
333-
&self,
334-
) -> datafusion_common::Result<Arc<dyn FileSource>> {
325+
/// Creates a file source for this table
326+
fn create_file_source(&self) -> Arc<dyn FileSource> {
335327
let table_schema = TableSchema::new(
336328
Arc::clone(&self.file_schema),
337329
self.options
@@ -341,13 +333,7 @@ impl ListingTable {
341333
.collect(),
342334
);
343335

344-
let mut source = self.options.format.file_source(table_schema);
345-
// Apply schema adapter to source if available.
346-
// The source will use this SchemaAdapter to adapt data batches as they flow up the plan.
347-
if let Some(factory) = &self.schema_adapter_factory {
348-
source = source.with_schema_adapter_factory(Arc::clone(factory))?;
349-
}
350-
Ok(source)
336+
self.options.format.file_source(table_schema)
351337
}
352338

353339
/// If file_sort_order is specified, creates the appropriate physical expressions
@@ -490,7 +476,7 @@ impl TableProvider for ListingTable {
490476
)))));
491477
};
492478

493-
let file_source = self.create_file_source_with_schema_adapter()?;
479+
let file_source = self.create_file_source();
494480

495481
// create the execution plan
496482
let plan = self

datafusion/core/src/datasource/listing/table.rs

Lines changed: 6 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1453,11 +1453,10 @@ mod tests {
14531453
}
14541454

14551455
#[tokio::test]
1456-
async fn test_statistics_mapping_with_default_factory() -> Result<()> {
1456+
async fn test_basic_table_scan() -> Result<()> {
14571457
let ctx = SessionContext::new();
14581458

1459-
// Create a table without providing a custom schema adapter factory
1460-
// This should fall back to using DefaultSchemaAdapterFactory
1459+
// Test basic table creation and scanning
14611460
let path = "table/file.json";
14621461
register_test_store(&ctx, &[(path, 10)]);
14631462

@@ -1469,25 +1468,18 @@ mod tests {
14691468
let config = ListingTableConfig::new(table_path)
14701469
.with_listing_options(opt)
14711470
.with_schema(Arc::new(schema));
1472-
// Note: NOT calling .with_schema_adapter_factory() to test default behavior
14731471

14741472
let table = ListingTable::try_new(config)?;
14751473

1476-
// Verify that no custom schema adapter factory is set
1477-
assert!(table.schema_adapter_factory().is_none());
1478-
1479-
// The scan should work correctly with the default schema adapter
1474+
// The scan should work correctly
14801475
let scan_result = table.scan(&ctx.state(), None, &[], None).await;
1481-
assert!(
1482-
scan_result.is_ok(),
1483-
"Scan should succeed with default schema adapter"
1484-
);
1476+
assert!(scan_result.is_ok(), "Scan should succeed");
14851477

1486-
// Verify that the default adapter handles basic schema compatibility
1478+
// Verify file listing works
14871479
let result = table.list_files_for_scan(&ctx.state(), &[], None).await?;
14881480
assert!(
14891481
!result.file_groups.is_empty(),
1490-
"Should list files successfully with default adapter"
1482+
"Should list files successfully"
14911483
);
14921484

14931485
Ok(())

0 commit comments

Comments
 (0)