Skip to content

Commit 444a673

Browse files
authored
feat: Support Substrait's IntervalCompound type/literal instead of interval-month-day-nano UDT (#12112)
* feat(substrait): use IntervalCompound instead of interval-month-day-nano UDT * clippy * more clippy * even more clippy * fix precision exponent * add a test * update deprecation version * update deprecation comments
1 parent d764c4a commit 444a673

File tree

4 files changed

+153
-171
lines changed

4 files changed

+153
-171
lines changed

datafusion/substrait/src/logical_plan/consumer.rs

Lines changed: 101 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -42,17 +42,18 @@ use crate::variation_const::{
4242
DATE_32_TYPE_VARIATION_REF, DATE_64_TYPE_VARIATION_REF,
4343
DECIMAL_128_TYPE_VARIATION_REF, DECIMAL_256_TYPE_VARIATION_REF,
4444
DEFAULT_CONTAINER_TYPE_VARIATION_REF, DEFAULT_TYPE_VARIATION_REF,
45-
INTERVAL_MONTH_DAY_NANO_TYPE_NAME, LARGE_CONTAINER_TYPE_VARIATION_REF,
46-
UNSIGNED_INTEGER_TYPE_VARIATION_REF, VIEW_CONTAINER_TYPE_VARIATION_REF,
45+
LARGE_CONTAINER_TYPE_VARIATION_REF, UNSIGNED_INTEGER_TYPE_VARIATION_REF,
46+
VIEW_CONTAINER_TYPE_VARIATION_REF,
4747
};
4848
#[allow(deprecated)]
4949
use crate::variation_const::{
50-
INTERVAL_DAY_TIME_TYPE_REF, INTERVAL_MONTH_DAY_NANO_TYPE_REF,
51-
INTERVAL_YEAR_MONTH_TYPE_REF, TIMESTAMP_MICRO_TYPE_VARIATION_REF,
52-
TIMESTAMP_MILLI_TYPE_VARIATION_REF, TIMESTAMP_NANO_TYPE_VARIATION_REF,
53-
TIMESTAMP_SECOND_TYPE_VARIATION_REF,
50+
INTERVAL_DAY_TIME_TYPE_REF, INTERVAL_MONTH_DAY_NANO_TYPE_NAME,
51+
INTERVAL_MONTH_DAY_NANO_TYPE_REF, INTERVAL_YEAR_MONTH_TYPE_REF,
52+
TIMESTAMP_MICRO_TYPE_VARIATION_REF, TIMESTAMP_MILLI_TYPE_VARIATION_REF,
53+
TIMESTAMP_NANO_TYPE_VARIATION_REF, TIMESTAMP_SECOND_TYPE_VARIATION_REF,
5454
};
5555
use datafusion::arrow::array::{new_empty_array, AsArray};
56+
use datafusion::arrow::temporal_conversions::NANOSECONDS;
5657
use datafusion::common::scalar::ScalarStructBuilder;
5758
use datafusion::dataframe::DataFrame;
5859
use datafusion::logical_expr::expr::InList;
@@ -71,10 +72,10 @@ use datafusion::{
7172
use std::collections::HashSet;
7273
use std::sync::Arc;
7374
use substrait::proto::exchange_rel::ExchangeKind;
74-
use substrait::proto::expression::literal::interval_day_to_second::PrecisionMode;
7575
use substrait::proto::expression::literal::user_defined::Val;
7676
use substrait::proto::expression::literal::{
77-
IntervalDayToSecond, IntervalYearToMonth, UserDefined,
77+
interval_day_to_second, IntervalCompound, IntervalDayToSecond, IntervalYearToMonth,
78+
UserDefined,
7879
};
7980
use substrait::proto::expression::subquery::SubqueryType;
8081
use substrait::proto::expression::{FieldReference, Literal, ScalarFunction};
@@ -1845,9 +1846,14 @@ fn from_substrait_type(
18451846
Ok(DataType::Interval(IntervalUnit::YearMonth))
18461847
}
18471848
r#type::Kind::IntervalDay(_) => Ok(DataType::Interval(IntervalUnit::DayTime)),
1849+
r#type::Kind::IntervalCompound(_) => {
1850+
Ok(DataType::Interval(IntervalUnit::MonthDayNano))
1851+
}
18481852
r#type::Kind::UserDefined(u) => {
18491853
if let Some(name) = extensions.types.get(&u.type_reference) {
1854+
#[allow(deprecated)]
18501855
match name.as_ref() {
1856+
// Kept for backwards compatibility, producers should use IntervalCompound instead
18511857
INTERVAL_MONTH_DAY_NANO_TYPE_NAME => Ok(DataType::Interval(IntervalUnit::MonthDayNano)),
18521858
_ => not_impl_err!(
18531859
"Unsupported Substrait user defined type with ref {} and variation {}",
@@ -1856,18 +1862,17 @@ fn from_substrait_type(
18561862
),
18571863
}
18581864
} else {
1859-
// Kept for backwards compatibility, new plans should include the extension instead
18601865
#[allow(deprecated)]
18611866
match u.type_reference {
1862-
// Kept for backwards compatibility, use IntervalYear instead
1867+
// Kept for backwards compatibility, producers should use IntervalYear instead
18631868
INTERVAL_YEAR_MONTH_TYPE_REF => {
18641869
Ok(DataType::Interval(IntervalUnit::YearMonth))
18651870
}
1866-
// Kept for backwards compatibility, use IntervalDay instead
1871+
// Kept for backwards compatibility, producers should use IntervalDay instead
18671872
INTERVAL_DAY_TIME_TYPE_REF => {
18681873
Ok(DataType::Interval(IntervalUnit::DayTime))
18691874
}
1870-
// Not supported yet by Substrait
1875+
// Kept for backwards compatibility, producers should use IntervalCompound instead
18711876
INTERVAL_MONTH_DAY_NANO_TYPE_REF => {
18721877
Ok(DataType::Interval(IntervalUnit::MonthDayNano))
18731878
}
@@ -2285,6 +2290,7 @@ fn from_substrait_literal(
22852290
subseconds,
22862291
precision_mode,
22872292
})) => {
2293+
use interval_day_to_second::PrecisionMode;
22882294
// DF only supports millisecond precision, so for any more granular type we lose precision
22892295
let milliseconds = match precision_mode {
22902296
Some(PrecisionMode::Microseconds(ms)) => ms / 1000,
@@ -2309,6 +2315,35 @@ fn from_substrait_literal(
23092315
Some(LiteralType::IntervalYearToMonth(IntervalYearToMonth { years, months })) => {
23102316
ScalarValue::new_interval_ym(*years, *months)
23112317
}
2318+
Some(LiteralType::IntervalCompound(IntervalCompound {
2319+
interval_year_to_month,
2320+
interval_day_to_second,
2321+
})) => match (interval_year_to_month, interval_day_to_second) {
2322+
(
2323+
Some(IntervalYearToMonth { years, months }),
2324+
Some(IntervalDayToSecond {
2325+
days,
2326+
seconds,
2327+
subseconds,
2328+
precision_mode:
2329+
Some(interval_day_to_second::PrecisionMode::Precision(p)),
2330+
}),
2331+
) => {
2332+
if *p < 0 || *p > 9 {
2333+
return plan_err!(
2334+
"Unsupported Substrait interval day to second precision: {}",
2335+
p
2336+
);
2337+
}
2338+
let nanos = *subseconds * i64::pow(10, (9 - p) as u32);
2339+
ScalarValue::new_interval_mdn(
2340+
*years * 12 + months,
2341+
*days,
2342+
*seconds as i64 * NANOSECONDS + nanos,
2343+
)
2344+
}
2345+
_ => return plan_err!("Substrait compound interval missing components"),
2346+
},
23122347
Some(LiteralType::FixedChar(c)) => ScalarValue::Utf8(Some(c.clone())),
23132348
Some(LiteralType::UserDefined(user_defined)) => {
23142349
// Helper function to prevent duplicating this code - can be inlined once the non-extension path is removed
@@ -2339,6 +2374,8 @@ fn from_substrait_literal(
23392374

23402375
if let Some(name) = extensions.types.get(&user_defined.type_reference) {
23412376
match name.as_ref() {
2377+
// Kept for backwards compatibility - producers should use IntervalCompound instead
2378+
#[allow(deprecated)]
23422379
INTERVAL_MONTH_DAY_NANO_TYPE_NAME => {
23432380
interval_month_day_nano(user_defined)?
23442381
}
@@ -2351,10 +2388,9 @@ fn from_substrait_literal(
23512388
}
23522389
}
23532390
} else {
2354-
// Kept for backwards compatibility - new plans should include extension instead
23552391
#[allow(deprecated)]
23562392
match user_defined.type_reference {
2357-
// Kept for backwards compatibility, use IntervalYearToMonth instead
2393+
// Kept for backwards compatibility, producers should useIntervalYearToMonth instead
23582394
INTERVAL_YEAR_MONTH_TYPE_REF => {
23592395
let Some(Val::Value(raw_val)) = user_defined.val.as_ref() else {
23602396
return substrait_err!("Interval year month value is empty");
@@ -2369,7 +2405,7 @@ fn from_substrait_literal(
23692405
value_slice,
23702406
)))
23712407
}
2372-
// Kept for backwards compatibility, use IntervalDayToSecond instead
2408+
// Kept for backwards compatibility, producers should useIntervalDayToSecond instead
23732409
INTERVAL_DAY_TIME_TYPE_REF => {
23742410
let Some(Val::Value(raw_val)) = user_defined.val.as_ref() else {
23752411
return substrait_err!("Interval day time value is empty");
@@ -2389,6 +2425,7 @@ fn from_substrait_literal(
23892425
milliseconds,
23902426
}))
23912427
}
2428+
// Kept for backwards compatibility, producers should useIntervalCompound instead
23922429
INTERVAL_MONTH_DAY_NANO_TYPE_REF => {
23932430
interval_month_day_nano(user_defined)?
23942431
}
@@ -2738,3 +2775,52 @@ impl BuiltinExprBuilder {
27382775
}))
27392776
}
27402777
}
2778+
2779+
#[cfg(test)]
2780+
mod test {
2781+
use crate::extensions::Extensions;
2782+
use crate::logical_plan::consumer::from_substrait_literal_without_names;
2783+
use arrow_buffer::IntervalMonthDayNano;
2784+
use datafusion::error::Result;
2785+
use datafusion::scalar::ScalarValue;
2786+
use substrait::proto::expression::literal::{
2787+
interval_day_to_second, IntervalCompound, IntervalDayToSecond,
2788+
IntervalYearToMonth, LiteralType,
2789+
};
2790+
use substrait::proto::expression::Literal;
2791+
2792+
#[test]
2793+
fn interval_compound_different_precision() -> Result<()> {
2794+
// DF producer (and thus roundtrip) always uses precision = 9,
2795+
// this test exists to test with some other value.
2796+
let substrait = Literal {
2797+
nullable: false,
2798+
type_variation_reference: 0,
2799+
literal_type: Some(LiteralType::IntervalCompound(IntervalCompound {
2800+
interval_year_to_month: Some(IntervalYearToMonth {
2801+
years: 1,
2802+
months: 2,
2803+
}),
2804+
interval_day_to_second: Some(IntervalDayToSecond {
2805+
days: 3,
2806+
seconds: 4,
2807+
subseconds: 5,
2808+
precision_mode: Some(
2809+
interval_day_to_second::PrecisionMode::Precision(6),
2810+
),
2811+
}),
2812+
})),
2813+
};
2814+
2815+
assert_eq!(
2816+
from_substrait_literal_without_names(&substrait, &Extensions::default())?,
2817+
ScalarValue::IntervalMonthDayNano(Some(IntervalMonthDayNano {
2818+
months: 14,
2819+
days: 3,
2820+
nanoseconds: 4_000_005_000
2821+
}))
2822+
);
2823+
2824+
Ok(())
2825+
}
2826+
}

0 commit comments

Comments
 (0)