Skip to content

Commit 3c6481a

Browse files
committed
Refactor format and casting options handling
Remove OwnedFormatOptions and OwnedCastOptions from format.rs, and eliminate public re-exports from lib.rs. Refactor CastColumnExpr to directly accept Arrow options (Option<CastOptions<'_>>), using internal non-public structs in cast_column.rs to store owned values. Update proto serialization/deserialization to utilize Arrow CastOptions/FormatOptions borrows, replacing DataFusion-owned wrapper types in from_proto.rs and to_proto.rs.
1 parent 458e38b commit 3c6481a

File tree

5 files changed

+125
-250
lines changed

5 files changed

+125
-250
lines changed

datafusion/common/src/format.rs

Lines changed: 0 additions & 200 deletions
Original file line numberDiff line numberDiff line change
@@ -35,206 +35,6 @@ pub const DEFAULT_CAST_OPTIONS: CastOptions<'static> = CastOptions {
3535
format_options: DEFAULT_FORMAT_OPTIONS,
3636
};
3737

38-
/// Owned version of Arrow's `FormatOptions` with all `String` values instead of `&str`.
39-
///
40-
/// While Arrow's `FormatOptions<'a>` accepts generic lifetimes, the default constants and
41-
/// public APIs are designed around `'static` strings (e.g., hardcoded format strings).
42-
/// This struct uses `String` values, allowing format options to be created and owned at
43-
/// runtime without lifetime constraints.
44-
///
45-
/// # Conversion to Arrow Types
46-
///
47-
/// Use the `as_arrow_options()` method to temporarily convert to `FormatOptions<'a>`
48-
/// with borrowed references for passing to Arrow compute kernels:
49-
///
50-
/// ```ignore
51-
/// let owned_options = OwnedFormatOptions::default();
52-
/// let arrow_options = owned_options.as_arrow_options(); // borrows owned strings
53-
/// ```
54-
#[derive(Debug, Clone, Eq, PartialEq, Hash)]
55-
pub struct OwnedFormatOptions {
56-
/// String representation of null values
57-
pub null: String,
58-
/// Date format string
59-
pub date_format: Option<String>,
60-
/// Datetime format string
61-
pub datetime_format: Option<String>,
62-
/// Timestamp format string
63-
pub timestamp_format: Option<String>,
64-
/// Timestamp with timezone format string
65-
pub timestamp_tz_format: Option<String>,
66-
/// Time format string
67-
pub time_format: Option<String>,
68-
/// Duration format (owned, since DurationFormat is a simple enum)
69-
pub duration_format: DurationFormat,
70-
/// Include type information in formatted output
71-
pub types_info: bool,
72-
}
73-
74-
impl OwnedFormatOptions {
75-
/// Create a new `OwnedFormatOptions` with default values.
76-
pub fn new() -> Self {
77-
Self::default()
78-
}
79-
80-
/// Set the null string.
81-
pub fn with_null(mut self, null: String) -> Self {
82-
self.null = null;
83-
self
84-
}
85-
86-
/// Set the date format.
87-
pub fn with_date_format(mut self, date_format: Option<String>) -> Self {
88-
self.date_format = date_format;
89-
self
90-
}
91-
92-
/// Set the datetime format.
93-
pub fn with_datetime_format(mut self, datetime_format: Option<String>) -> Self {
94-
self.datetime_format = datetime_format;
95-
self
96-
}
97-
98-
/// Set the timestamp format.
99-
pub fn with_timestamp_format(mut self, timestamp_format: Option<String>) -> Self {
100-
self.timestamp_format = timestamp_format;
101-
self
102-
}
103-
104-
/// Set the timestamp with timezone format.
105-
pub fn with_timestamp_tz_format(
106-
mut self,
107-
timestamp_tz_format: Option<String>,
108-
) -> Self {
109-
self.timestamp_tz_format = timestamp_tz_format;
110-
self
111-
}
112-
113-
/// Set the time format.
114-
pub fn with_time_format(mut self, time_format: Option<String>) -> Self {
115-
self.time_format = time_format;
116-
self
117-
}
118-
119-
/// Set the duration format.
120-
pub fn with_duration_format(mut self, duration_format: DurationFormat) -> Self {
121-
self.duration_format = duration_format;
122-
self
123-
}
124-
125-
/// Set whether to include type information in formatted output.
126-
pub fn with_types_info(mut self, types_info: bool) -> Self {
127-
self.types_info = types_info;
128-
self
129-
}
130-
131-
/// Convert to Arrow's `FormatOptions<'a>` with borrowed references.
132-
///
133-
/// This creates a temporary `FormatOptions` with borrowed `&str` references
134-
/// to the owned strings. The returned options can be passed to Arrow compute
135-
/// kernels. The borrowed references are valid only as long as `self` is alive.
136-
pub fn as_arrow_options<'a>(&'a self) -> FormatOptions<'a> {
137-
FormatOptions::new()
138-
.with_null(self.null.as_str())
139-
.with_date_format(self.date_format.as_deref())
140-
.with_datetime_format(self.datetime_format.as_deref())
141-
.with_timestamp_format(self.timestamp_format.as_deref())
142-
.with_timestamp_tz_format(self.timestamp_tz_format.as_deref())
143-
.with_time_format(self.time_format.as_deref())
144-
.with_duration_format(self.duration_format)
145-
.with_display_error(false)
146-
.with_types_info(self.types_info)
147-
}
148-
}
149-
150-
impl Default for OwnedFormatOptions {
151-
fn default() -> Self {
152-
Self {
153-
null: "NULL".to_string(),
154-
date_format: None,
155-
datetime_format: None,
156-
timestamp_format: None,
157-
timestamp_tz_format: None,
158-
time_format: None,
159-
duration_format: DurationFormat::Pretty,
160-
types_info: false,
161-
}
162-
}
163-
}
164-
165-
/// Owned version of Arrow's `CastOptions` with `OwnedFormatOptions` instead of `FormatOptions<'a>`.
166-
///
167-
/// While Arrow's `CastOptions<'a>` accepts generic lifetimes, the default constants and
168-
/// public APIs are designed around `'static` strings (e.g., hardcoded format strings).
169-
/// This struct uses `OwnedFormatOptions` with `String` values, allowing dynamic cast options
170-
/// to be created and owned at runtime without lifetime constraints.
171-
///
172-
/// # Conversion to Arrow Types
173-
///
174-
/// Use the `as_arrow_options()` method to temporarily convert to `CastOptions<'a>`
175-
/// with borrowed references for passing to Arrow compute kernels:
176-
///
177-
/// ```ignore
178-
/// let owned_options = OwnedCastOptions { ... };
179-
/// let arrow_options = owned_options.as_arrow_options(); // borrows owned strings
180-
/// arrow::compute::cast(&array, &data_type, Some(&arrow_options))?;
181-
/// ```
182-
#[derive(Debug, Clone, Default, Eq, PartialEq, Hash)]
183-
pub struct OwnedCastOptions {
184-
/// Whether to use safe casting (return errors instead of overflowing)
185-
pub safe: bool,
186-
/// Format options for string output
187-
pub format_options: OwnedFormatOptions,
188-
}
189-
190-
impl OwnedCastOptions {
191-
/// Create a new `OwnedCastOptions` with default values.
192-
pub fn new(safe: bool) -> Self {
193-
Self {
194-
safe,
195-
format_options: OwnedFormatOptions::default(),
196-
}
197-
}
198-
199-
/// Create a new `OwnedCastOptions` from an Arrow `CastOptions`.
200-
pub fn from_arrow_options(options: &CastOptions<'_>) -> Self {
201-
Self {
202-
safe: options.safe,
203-
format_options: OwnedFormatOptions {
204-
null: options.format_options.null().to_string(),
205-
date_format: options.format_options.date_format().map(|s| s.to_string()),
206-
datetime_format: options
207-
.format_options
208-
.datetime_format()
209-
.map(|s| s.to_string()),
210-
timestamp_format: options
211-
.format_options
212-
.timestamp_format()
213-
.map(|s| s.to_string()),
214-
timestamp_tz_format: options
215-
.format_options
216-
.timestamp_tz_format()
217-
.map(|s| s.to_string()),
218-
time_format: options.format_options.time_format().map(|s| s.to_string()),
219-
duration_format: options.format_options.duration_format(),
220-
types_info: options.format_options.types_info(),
221-
},
222-
}
223-
}
224-
225-
/// Convert to Arrow's `CastOptions<'a>` with borrowed references.
226-
///
227-
/// This creates a temporary `CastOptions` with borrowed `&str` references
228-
/// to the owned strings. The returned options can be passed to Arrow compute
229-
/// kernels. The borrowed references are valid only as long as `self` is alive.
230-
pub fn as_arrow_options<'a>(&'a self) -> CastOptions<'a> {
231-
CastOptions {
232-
safe: self.safe,
233-
format_options: self.format_options.as_arrow_options(),
234-
}
235-
}
236-
}
237-
23838
/// Output formats for controlling for Explain plans
23939
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
24040
pub enum ExplainFormat {

datafusion/common/src/lib.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,6 @@ pub use file_options::file_type::{
7979
DEFAULT_ARROW_EXTENSION, DEFAULT_AVRO_EXTENSION, DEFAULT_CSV_EXTENSION,
8080
DEFAULT_JSON_EXTENSION, DEFAULT_PARQUET_EXTENSION, GetExt,
8181
};
82-
pub use format::{OwnedCastOptions, OwnedFormatOptions};
8382
pub use functional_dependencies::{
8483
Constraint, Constraints, Dependency, FunctionalDependence, FunctionalDependencies,
8584
aggregate_functional_dependencies, get_required_group_by_exprs_indices,

datafusion/physical-expr/src/expressions/cast_column.rs

Lines changed: 92 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -19,13 +19,13 @@
1919
2020
use crate::{expressions::Column, physical_expr::PhysicalExpr};
2121
use arrow::{
22-
compute::can_cast_types,
22+
compute::{CastOptions, can_cast_types},
2323
datatypes::{DataType, FieldRef, Schema},
2424
record_batch::RecordBatch,
25+
util::display::{DurationFormat, FormatOptions},
2526
};
2627
use datafusion_common::{
2728
Result, ScalarValue,
28-
format::OwnedCastOptions,
2929
nested_struct::{
3030
cast_column, validate_field_compatibility, validate_struct_compatibility,
3131
},
@@ -60,7 +60,7 @@ pub struct CastColumnExpr {
6060
/// The field metadata describing the desired output column.
6161
target_field: FieldRef,
6262
/// Options forwarded to [`cast_column`] (owned, allowing dynamic format strings).
63-
cast_options: OwnedCastOptions,
63+
cast_options: CastOptionsStore,
6464
/// Schema used to resolve expression data types during construction.
6565
input_schema: Arc<Schema>,
6666
}
@@ -85,8 +85,89 @@ impl Hash for CastColumnExpr {
8585
}
8686
}
8787

88-
fn normalize_cast_options(cast_options: Option<OwnedCastOptions>) -> OwnedCastOptions {
89-
cast_options.unwrap_or_default()
88+
#[derive(Debug, Clone, Eq, PartialEq, Hash)]
89+
struct FormatOptionsStore {
90+
null: String,
91+
date_format: Option<String>,
92+
datetime_format: Option<String>,
93+
timestamp_format: Option<String>,
94+
timestamp_tz_format: Option<String>,
95+
time_format: Option<String>,
96+
duration_format: DurationFormat,
97+
types_info: bool,
98+
}
99+
100+
impl Default for FormatOptionsStore {
101+
fn default() -> Self {
102+
Self {
103+
null: "NULL".to_string(),
104+
date_format: None,
105+
datetime_format: None,
106+
timestamp_format: None,
107+
timestamp_tz_format: None,
108+
time_format: None,
109+
duration_format: DurationFormat::Pretty,
110+
types_info: false,
111+
}
112+
}
113+
}
114+
115+
impl FormatOptionsStore {
116+
fn from_arrow_options(options: &FormatOptions<'_>) -> Self {
117+
Self {
118+
null: options.null().to_string(),
119+
date_format: options.date_format().map(str::to_owned),
120+
datetime_format: options.datetime_format().map(str::to_owned),
121+
timestamp_format: options.timestamp_format().map(str::to_owned),
122+
timestamp_tz_format: options.timestamp_tz_format().map(str::to_owned),
123+
time_format: options.time_format().map(str::to_owned),
124+
duration_format: options.duration_format(),
125+
types_info: options.types_info(),
126+
}
127+
}
128+
129+
fn as_arrow_options(&self) -> FormatOptions<'_> {
130+
FormatOptions::new()
131+
.with_null(self.null.as_str())
132+
.with_date_format(self.date_format.as_deref())
133+
.with_datetime_format(self.datetime_format.as_deref())
134+
.with_timestamp_format(self.timestamp_format.as_deref())
135+
.with_timestamp_tz_format(self.timestamp_tz_format.as_deref())
136+
.with_time_format(self.time_format.as_deref())
137+
.with_duration_format(self.duration_format)
138+
.with_display_error(false)
139+
.with_types_info(self.types_info)
140+
}
141+
}
142+
143+
#[derive(Debug, Clone, Eq, PartialEq, Hash, Default)]
144+
struct CastOptionsStore {
145+
safe: bool,
146+
format_options: FormatOptionsStore,
147+
}
148+
149+
impl CastOptionsStore {
150+
fn from_arrow_options(options: &CastOptions<'_>) -> Self {
151+
Self {
152+
safe: options.safe,
153+
format_options: FormatOptionsStore::from_arrow_options(
154+
&options.format_options,
155+
),
156+
}
157+
}
158+
159+
fn as_arrow_options(&self) -> CastOptions<'_> {
160+
CastOptions {
161+
safe: self.safe,
162+
format_options: self.format_options.as_arrow_options(),
163+
}
164+
}
165+
}
166+
167+
fn normalize_cast_options(cast_options: Option<CastOptions<'_>>) -> CastOptionsStore {
168+
cast_options
169+
.map(|options| CastOptionsStore::from_arrow_options(&options))
170+
.unwrap_or_default()
90171
}
91172

92173
/// Validates that a Column expression matches the input field's data type.
@@ -217,7 +298,7 @@ impl CastColumnExpr {
217298
expr: Arc<dyn PhysicalExpr>,
218299
input_field: FieldRef,
219300
target_field: FieldRef,
220-
cast_options: Option<OwnedCastOptions>,
301+
cast_options: Option<CastOptions<'_>>,
221302
input_schema: Arc<Schema>,
222303
) -> Result<Self> {
223304
let cast_options = normalize_cast_options(cast_options);
@@ -245,7 +326,7 @@ impl CastColumnExpr {
245326
expr: Arc<dyn PhysicalExpr>,
246327
input_field: FieldRef,
247328
target_field: FieldRef,
248-
cast_options: Option<OwnedCastOptions>,
329+
cast_options: Option<CastOptions<'_>>,
249330
) -> Result<Self> {
250331
let input_schema = Arc::new(Schema::new(vec![Arc::unwrap_or_clone(Arc::clone(
251332
&input_field,
@@ -269,7 +350,7 @@ impl CastColumnExpr {
269350
expr: Arc<dyn PhysicalExpr>,
270351
input_field: FieldRef,
271352
target_field: FieldRef,
272-
cast_options: Option<OwnedCastOptions>,
353+
cast_options: Option<CastOptions<'_>>,
273354
input_schema: Arc<Schema>,
274355
) -> Result<Self> {
275356
Self::build(expr, input_field, target_field, cast_options, input_schema)
@@ -291,8 +372,8 @@ impl CastColumnExpr {
291372
}
292373

293374
/// Casting options forwarded to [`cast_column`].
294-
pub fn cast_options(&self) -> &OwnedCastOptions {
295-
&self.cast_options
375+
pub fn cast_options(&self) -> CastOptions<'_> {
376+
self.cast_options.as_arrow_options()
296377
}
297378
}
298379

@@ -357,7 +438,7 @@ impl PhysicalExpr for CastColumnExpr {
357438
child,
358439
Arc::clone(&self.input_field),
359440
Arc::clone(&self.target_field),
360-
Some(self.cast_options.clone()),
441+
Some(self.cast_options.as_arrow_options()),
361442
Arc::clone(&self.input_schema),
362443
)?))
363444
}

0 commit comments

Comments
 (0)