Skip to content

Commit 131f7dd

Browse files
authored
Merge branch 'main' into alamb/clickbenchmaxxing_2
2 parents 1f85927 + 44dfa7b commit 131f7dd

File tree

18 files changed

+490
-123
lines changed

18 files changed

+490
-123
lines changed

datafusion/core/src/execution/context/mod.rs

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2218,7 +2218,9 @@ mod tests {
22182218
use crate::test;
22192219
use crate::test_util::{plan_and_collect, populate_csv_partitions};
22202220
use arrow::datatypes::{DataType, TimeUnit};
2221+
use arrow_schema::FieldRef;
22212222
use datafusion_common::DataFusionError;
2223+
use datafusion_common::datatype::DataTypeExt;
22222224
use std::error::Error;
22232225
use std::path::PathBuf;
22242226

@@ -2735,7 +2737,7 @@ mod tests {
27352737
struct MyTypePlanner {}
27362738

27372739
impl TypePlanner for MyTypePlanner {
2738-
fn plan_type(&self, sql_type: &ast::DataType) -> Result<Option<DataType>> {
2740+
fn plan_type_field(&self, sql_type: &ast::DataType) -> Result<Option<FieldRef>> {
27392741
match sql_type {
27402742
ast::DataType::Datetime(precision) => {
27412743
let precision = match precision {
@@ -2745,7 +2747,9 @@ mod tests {
27452747
None | Some(9) => TimeUnit::Nanosecond,
27462748
_ => unreachable!(),
27472749
};
2748-
Ok(Some(DataType::Timestamp(precision, None)))
2750+
Ok(Some(
2751+
DataType::Timestamp(precision, None).into_nullable_field_ref(),
2752+
))
27492753
}
27502754
_ => Ok(None),
27512755
}

datafusion/expr/src/planner.rs

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -434,10 +434,28 @@ pub trait TypePlanner: Debug + Send + Sync {
434434
/// Plan SQL [`sqlparser::ast::DataType`] to DataFusion [`DataType`]
435435
///
436436
/// Returns None if not possible
437+
#[deprecated(since = "53.0.0", note = "Use plan_type_field()")]
437438
fn plan_type(
438439
&self,
439440
_sql_type: &sqlparser::ast::DataType,
440441
) -> Result<Option<DataType>> {
441442
Ok(None)
442443
}
444+
445+
/// Plan SQL [`sqlparser::ast::DataType`] to DataFusion [`FieldRef`]
446+
///
447+
/// Returns None if not possible. Unlike [`Self::plan_type`], `plan_type_field()`
448+
/// makes it possible to express extension types (e.g., `arrow.uuid`) or otherwise
449+
/// insert metadata into the DataFusion type representation. The default implementation
450+
/// falls back on [`Self::plan_type`] for backward compatibility and wraps the result
451+
/// in a nullable field reference.
452+
fn plan_type_field(
453+
&self,
454+
sql_type: &sqlparser::ast::DataType,
455+
) -> Result<Option<FieldRef>> {
456+
#[expect(deprecated)]
457+
Ok(self
458+
.plan_type(sql_type)?
459+
.map(|data_type| data_type.into_nullable_field_ref()))
460+
}
443461
}

datafusion/functions/src/datetime/date_part.rs

Lines changed: 21 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -28,10 +28,11 @@ use arrow::datatypes::DataType::{
2828
};
2929
use arrow::datatypes::TimeUnit::{Microsecond, Millisecond, Nanosecond, Second};
3030
use arrow::datatypes::{
31-
DataType, Date32Type, Date64Type, Field, FieldRef, IntervalUnit as ArrowIntervalUnit,
32-
TimeUnit,
31+
ArrowTimestampType, DataType, Date32Type, Date64Type, Field, FieldRef,
32+
IntervalUnit as ArrowIntervalUnit, TimeUnit, TimestampMicrosecondType,
33+
TimestampMillisecondType, TimestampNanosecondType, TimestampSecondType,
3334
};
34-
use chrono::{Datelike, NaiveDate, TimeZone, Utc};
35+
use chrono::{Datelike, NaiveDate};
3536
use datafusion_common::types::{NativeType, logical_date};
3637

3738
use datafusion_common::{
@@ -337,37 +338,32 @@ fn date_to_scalar(date: NaiveDate, target_type: &DataType) -> Option<ScalarValue
337338

338339
Timestamp(unit, tz_opt) => {
339340
let naive_midnight = date.and_hms_opt(0, 0, 0)?;
340-
341-
let utc_dt = if let Some(tz_str) = tz_opt {
342-
let tz: Tz = tz_str.parse().ok()?;
343-
344-
let local = tz.from_local_datetime(&naive_midnight);
345-
346-
let local_dt = match local {
347-
chrono::offset::LocalResult::Single(dt) => dt,
348-
chrono::offset::LocalResult::Ambiguous(dt1, _dt2) => dt1,
349-
chrono::offset::LocalResult::None => local.earliest()?,
350-
};
351-
352-
local_dt.with_timezone(&Utc)
353-
} else {
354-
Utc.from_utc_datetime(&naive_midnight)
355-
};
341+
let tz: Option<Tz> = tz_opt.clone().and_then(|s| s.parse().ok());
356342

357343
match unit {
358-
Second => {
359-
ScalarValue::TimestampSecond(Some(utc_dt.timestamp()), tz_opt.clone())
360-
}
344+
Second => ScalarValue::TimestampSecond(
345+
TimestampSecondType::from_naive_datetime(naive_midnight, tz.as_ref()),
346+
tz_opt.clone(),
347+
),
361348
Millisecond => ScalarValue::TimestampMillisecond(
362-
Some(utc_dt.timestamp_millis()),
349+
TimestampMillisecondType::from_naive_datetime(
350+
naive_midnight,
351+
tz.as_ref(),
352+
),
363353
tz_opt.clone(),
364354
),
365355
Microsecond => ScalarValue::TimestampMicrosecond(
366-
Some(utc_dt.timestamp_micros()),
356+
TimestampMicrosecondType::from_naive_datetime(
357+
naive_midnight,
358+
tz.as_ref(),
359+
),
367360
tz_opt.clone(),
368361
),
369362
Nanosecond => ScalarValue::TimestampNanosecond(
370-
Some(utc_dt.timestamp_nanos_opt()?),
363+
TimestampNanosecondType::from_naive_datetime(
364+
naive_midnight,
365+
tz.as_ref(),
366+
),
371367
tz_opt.clone(),
372368
),
373369
}

datafusion/functions/src/string/common.rs

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ use std::sync::Arc;
2222
use crate::strings::make_and_append_view;
2323
use arrow::array::{
2424
Array, ArrayRef, GenericStringArray, GenericStringBuilder, NullBufferBuilder,
25-
OffsetSizeTrait, StringBuilder, StringViewArray, new_null_array,
25+
OffsetSizeTrait, StringViewArray, StringViewBuilder, new_null_array,
2626
};
2727
use arrow::buffer::{Buffer, ScalarBuffer};
2828
use arrow::datatypes::DataType;
@@ -358,10 +358,8 @@ where
358358
>(array, op)?)),
359359
DataType::Utf8View => {
360360
let string_array = as_string_view_array(array)?;
361-
let mut string_builder = StringBuilder::with_capacity(
362-
string_array.len(),
363-
string_array.get_array_memory_size(),
364-
);
361+
let mut string_builder =
362+
StringViewBuilder::with_capacity(string_array.len());
365363

366364
for str in string_array.iter() {
367365
if let Some(str) = str {
@@ -386,7 +384,7 @@ where
386384
}
387385
ScalarValue::Utf8View(a) => {
388386
let result = a.as_ref().map(|x| op(x));
389-
Ok(ColumnarValue::Scalar(ScalarValue::Utf8(result)))
387+
Ok(ColumnarValue::Scalar(ScalarValue::Utf8View(result)))
390388
}
391389
other => exec_err!("Unsupported data type {other:?} for function {name}"),
392390
},

datafusion/functions/src/string/lower.rs

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@ use arrow::datatypes::DataType;
1919
use std::any::Any;
2020

2121
use crate::string::common::to_lower;
22-
use crate::utils::utf8_to_str_type;
2322
use datafusion_common::Result;
2423
use datafusion_common::types::logical_string;
2524
use datafusion_expr::{
@@ -82,7 +81,7 @@ impl ScalarUDFImpl for LowerFunc {
8281
}
8382

8483
fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
85-
utf8_to_str_type(&arg_types[0], "lower")
84+
Ok(arg_types[0].clone())
8685
}
8786

8887
fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
@@ -97,8 +96,7 @@ impl ScalarUDFImpl for LowerFunc {
9796
#[cfg(test)]
9897
mod tests {
9998
use super::*;
100-
use arrow::array::{Array, ArrayRef, StringArray};
101-
use arrow::datatypes::DataType::Utf8;
99+
use arrow::array::{Array, ArrayRef, StringArray, StringViewArray};
102100
use arrow::datatypes::Field;
103101
use datafusion_common::config::ConfigOptions;
104102
use std::sync::Arc;
@@ -111,7 +109,7 @@ mod tests {
111109
number_rows: input.len(),
112110
args: vec![ColumnarValue::Array(input)],
113111
arg_fields,
114-
return_field: Field::new("f", Utf8, true).into(),
112+
return_field: Field::new("f", expected.data_type().clone(), true).into(),
115113
config_options: Arc::new(ConfigOptions::default()),
116114
};
117115

@@ -197,4 +195,21 @@ mod tests {
197195

198196
to_lower(input, expected)
199197
}
198+
199+
#[test]
200+
fn lower_utf8view() -> Result<()> {
201+
let input = Arc::new(StringViewArray::from(vec![
202+
Some("ARROW"),
203+
None,
204+
Some("TSCHÜSS"),
205+
])) as ArrayRef;
206+
207+
let expected = Arc::new(StringViewArray::from(vec![
208+
Some("arrow"),
209+
None,
210+
Some("tschüss"),
211+
])) as ArrayRef;
212+
213+
to_lower(input, expected)
214+
}
200215
}

datafusion/functions/src/string/upper.rs

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616
// under the License.
1717

1818
use crate::string::common::to_upper;
19-
use crate::utils::utf8_to_str_type;
2019
use arrow::datatypes::DataType;
2120
use datafusion_common::Result;
2221
use datafusion_common::types::logical_string;
@@ -81,7 +80,7 @@ impl ScalarUDFImpl for UpperFunc {
8180
}
8281

8382
fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
84-
utf8_to_str_type(&arg_types[0], "upper")
83+
Ok(arg_types[0].clone())
8584
}
8685

8786
fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
@@ -96,8 +95,7 @@ impl ScalarUDFImpl for UpperFunc {
9695
#[cfg(test)]
9796
mod tests {
9897
use super::*;
99-
use arrow::array::{Array, ArrayRef, StringArray};
100-
use arrow::datatypes::DataType::Utf8;
98+
use arrow::array::{Array, ArrayRef, StringArray, StringViewArray};
10199
use arrow::datatypes::Field;
102100
use datafusion_common::config::ConfigOptions;
103101
use std::sync::Arc;
@@ -110,7 +108,7 @@ mod tests {
110108
number_rows: input.len(),
111109
args: vec![ColumnarValue::Array(input)],
112110
arg_fields: vec![arg_field],
113-
return_field: Field::new("f", Utf8, true).into(),
111+
return_field: Field::new("f", expected.data_type().clone(), true).into(),
114112
config_options: Arc::new(ConfigOptions::default()),
115113
};
116114

@@ -196,4 +194,21 @@ mod tests {
196194

197195
to_upper(input, expected)
198196
}
197+
198+
#[test]
199+
fn upper_utf8view() -> Result<()> {
200+
let input = Arc::new(StringViewArray::from(vec![
201+
Some("arrow"),
202+
None,
203+
Some("tschüß"),
204+
])) as ArrayRef;
205+
206+
let expected = Arc::new(StringViewArray::from(vec![
207+
Some("ARROW"),
208+
None,
209+
Some("TSCHÜSS"),
210+
])) as ArrayRef;
211+
212+
to_upper(input, expected)
213+
}
199214
}

0 commit comments

Comments
 (0)