Skip to content

Commit b5075a4

Browse files
authored
refactor(cubesql): Use arrow interval packing (#8471)
Bump arrow-datafusion to f99263552906b5b9fb22d679cb466876057d95e9 Use to_parts in date +- interval Use IntervalDayTimeType::make_value in one_day rewrite Use make_value and to_parts in Decomposed*
1 parent 59d74fe commit b5075a4

File tree

6 files changed

+75
-63
lines changed

6 files changed

+75
-63
lines changed

packages/cubejs-backend-native/Cargo.lock

Lines changed: 8 additions & 8 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

rust/cubesql/Cargo.lock

Lines changed: 8 additions & 8 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

rust/cubesql/cubesql/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ homepage = "https://cube.dev"
1010

1111
[dependencies]
1212
arc-swap = "1"
13-
datafusion = { git = 'https://github.com/cube-js/arrow-datafusion.git', rev = "8d98b8587052b35888d29d70ba94618bd913ad39", default-features = false, features = ["regex_expressions", "unicode_expressions"] }
13+
datafusion = { git = 'https://github.com/cube-js/arrow-datafusion.git', rev = "f99263552906b5b9fb22d679cb466876057d95e9", default-features = false, features = ["regex_expressions", "unicode_expressions"] }
1414
anyhow = "1.0"
1515
thiserror = "1.0.50"
1616
cubeclient = { path = "../cubeclient" }

rust/cubesql/cubesql/src/compile/engine/udf/common.rs

Lines changed: 35 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,9 @@ use datafusion::{
1313
},
1414
compute::{cast, concat},
1515
datatypes::{
16-
DataType, Date32Type, Field, Float64Type, Int32Type, Int64Type, IntervalDayTimeType,
17-
IntervalMonthDayNanoType, IntervalUnit, IntervalYearMonthType, TimeUnit,
18-
TimestampNanosecondType, UInt32Type,
16+
ArrowPrimitiveType, DataType, Date32Type, Field, Float64Type, Int32Type, Int64Type,
17+
IntervalDayTimeType, IntervalMonthDayNanoType, IntervalUnit, IntervalYearMonthType,
18+
TimeUnit, TimestampNanosecondType, UInt32Type,
1919
},
2020
},
2121
error::{DataFusionError, Result},
@@ -46,6 +46,9 @@ use crate::{
4646
sql::SessionState,
4747
};
4848

49+
type IntervalDayTime = <IntervalDayTimeType as ArrowPrimitiveType>::Native;
50+
type IntervalMonthDayNano = <IntervalMonthDayNanoType as ArrowPrimitiveType>::Native;
51+
4952
pub type ReturnTypeFunction = Arc<dyn Fn(&[DataType]) -> Result<Arc<DataType>> + Send + Sync>;
5053
pub type ScalarFunctionImplementation =
5154
Arc<dyn Fn(&[ColumnarValue]) -> Result<ColumnarValue> + Send + Sync>;
@@ -1424,10 +1427,12 @@ fn date_addsub_year_month(t: NaiveDateTime, i: i32, is_add: bool) -> Result<Naiv
14241427
};
14251428
}
14261429

1427-
fn date_addsub_month_day_nano(t: NaiveDateTime, i: i128, is_add: bool) -> Result<NaiveDateTime> {
1428-
let month = (i >> (64 + 32)) & 0xFFFFFFFF;
1429-
let day = (i >> 64) & 0xFFFFFFFF;
1430-
let nano = i & 0xFFFFFFFFFFFFFFFF;
1430+
fn date_addsub_month_day_nano(
1431+
t: NaiveDateTime,
1432+
i: IntervalMonthDayNano,
1433+
is_add: bool,
1434+
) -> Result<NaiveDateTime> {
1435+
let (month, day, nano) = IntervalMonthDayNanoType::to_parts(i);
14311436

14321437
let result = if month > 0 && is_add || month < 0 && !is_add {
14331438
t.checked_add_months(Months::new(month as u32))
@@ -1442,9 +1447,7 @@ fn date_addsub_month_day_nano(t: NaiveDateTime, i: i128, is_add: bool) -> Result
14421447
};
14431448

14441449
let result = result.and_then(|t| {
1445-
t.checked_add_signed(Duration::nanoseconds(
1446-
(nano as i64) * (if !is_add { -1 } else { 1 }),
1447-
))
1450+
t.checked_add_signed(Duration::nanoseconds(nano * (if !is_add { -1 } else { 1 })))
14481451
});
14491452
result.ok_or_else(|| {
14501453
DataFusionError::Execution(format!(
@@ -1454,15 +1457,30 @@ fn date_addsub_month_day_nano(t: NaiveDateTime, i: i128, is_add: bool) -> Result
14541457
})
14551458
}
14561459

1457-
fn date_addsub_day_time(t: NaiveDateTime, interval: i64, is_add: bool) -> Result<NaiveDateTime> {
1458-
let i = match is_add {
1459-
true => interval,
1460-
false => -interval,
1460+
fn date_addsub_day_time(
1461+
t: NaiveDateTime,
1462+
interval: IntervalDayTime,
1463+
is_add: bool,
1464+
) -> Result<NaiveDateTime> {
1465+
let (days, millis) = IntervalDayTimeType::to_parts(interval);
1466+
1467+
let result = if days > 0 && is_add || days < 0 && !is_add {
1468+
t.checked_add_days(Days::new(days as u64))
1469+
} else {
1470+
t.checked_sub_days(Days::new(days.abs() as u64))
14611471
};
14621472

1463-
let days: i64 = i.signum() * (i.abs() >> 32);
1464-
let millis: i64 = i.signum() * ((i.abs() << 32) >> 32);
1465-
return Ok(t + chrono::Duration::days(days) + chrono::Duration::milliseconds(millis));
1473+
let result = result.and_then(|t| {
1474+
t.checked_add_signed(Duration::milliseconds(
1475+
millis as i64 * (if !is_add { -1 } else { 1 }),
1476+
))
1477+
});
1478+
result.ok_or_else(|| {
1479+
DataFusionError::Execution(format!(
1480+
"Failed to add interval: {} day {} ms",
1481+
days, millis
1482+
))
1483+
})
14661484
}
14671485

14681486
fn change_ym(t: NaiveDateTime, y: i32, m: u32) -> Option<NaiveDateTime> {

rust/cubesql/cubesql/src/compile/rewrite/rules/filters.rs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ use cubeclient::models::V1CubeMeta;
4040
use datafusion::{
4141
arrow::{
4242
array::{Date32Array, Date64Array, TimestampNanosecondArray},
43-
datatypes::DataType,
43+
datatypes::{DataType, IntervalDayTimeType},
4444
},
4545
logical_plan::{Column, Expr, Operator},
4646
scalar::ScalarValue,
@@ -4441,7 +4441,9 @@ impl FilterRules {
44414441
subst.insert(
44424442
one_day_var,
44434443
egraph.add(LogicalPlanLanguage::LiteralExprValue(LiteralExprValue(
4444-
ScalarValue::IntervalDayTime(Some(1 << 32)),
4444+
ScalarValue::IntervalDayTime(Some(IntervalDayTimeType::make_value(
4445+
1, 0,
4446+
))),
44454447
))),
44464448
);
44474449
return true;

rust/cubesql/cubesql/src/compile/rewrite/rules/utils.rs

Lines changed: 19 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ use std::{
55

66
use chrono::{Datelike, NaiveDateTime, Timelike};
77
use datafusion::{
8+
arrow::datatypes::{ArrowPrimitiveType, IntervalDayTimeType, IntervalMonthDayNanoType},
89
error::DataFusionError,
910
logical_plan::{Expr, Operator},
1011
physical_plan::aggregates::AggregateFunction,
@@ -20,6 +21,9 @@ use crate::{
2021
CubeError,
2122
};
2223

24+
type IntervalDayTime = <IntervalDayTimeType as ArrowPrimitiveType>::Native;
25+
type IntervalMonthDayNano = <IntervalMonthDayNanoType as ArrowPrimitiveType>::Native;
26+
2327
pub fn parse_granularity_string(granularity: &str, to_normalize: bool) -> Option<String> {
2428
if to_normalize {
2529
match granularity.to_lowercase().as_str() {
@@ -208,30 +212,26 @@ pub struct DecomposedDayTime {
208212
}
209213

210214
impl DecomposedDayTime {
211-
const _DAY_BITS: i32 = 32;
212-
const MILLIS_BITS: i32 = 32;
213-
214215
const DAY_LABEL: &'static str = "DAY";
215216
const MILLIS_LABEL: &'static str = "MILLISECOND";
216217

217-
pub fn from_raw_interval_value(interval: i64) -> Self {
218-
Self {
219-
days: (interval >> Self::MILLIS_BITS) as i32,
220-
millis: interval as i32,
221-
}
218+
pub fn from_raw_interval_value(interval: IntervalDayTime) -> Self {
219+
let (days, millis) = IntervalDayTimeType::to_parts(interval);
220+
221+
Self { days, millis }
222222
}
223223

224224
pub fn is_single_part(&self) -> bool {
225225
self.days == 0 || self.millis == 0
226226
}
227227

228228
pub fn millis_scalar(&self) -> ScalarValue {
229-
let value = Some(self.millis as i64);
229+
let value = Some(IntervalDayTimeType::make_value(0, self.millis));
230230
ScalarValue::IntervalDayTime(value)
231231
}
232232

233233
pub fn days_scalar(&self) -> ScalarValue {
234-
let value = Some((self.days as i64) << Self::MILLIS_BITS);
234+
let value = Some(IntervalDayTimeType::make_value(self.days, 0));
235235
ScalarValue::IntervalDayTime(value)
236236
}
237237

@@ -317,26 +317,14 @@ pub struct DecomposedMonthDayNano {
317317
}
318318

319319
impl DecomposedMonthDayNano {
320-
const _MONTHS_MASK: u128 = 0xFFFF_FFFF_0000_0000_0000_0000_0000_0000;
321-
const DAYS_MASK: u128 = 0x0000_0000_FFFF_FFFF_0000_0000_0000_0000;
322-
const NANOS_MASK: u128 = 0x0000_0000_0000_0000_FFFF_FFFF_FFFF_FFFF;
323-
const _MONTHS_BITS: i32 = 32;
324-
const DAYS_BITS: i32 = 32;
325-
const NANOS_BITS: i32 = 64;
326-
const DAYS_OFFSET: i32 = Self::NANOS_BITS;
327-
const MONTHS_OFFSET: i32 = Self::DAYS_OFFSET + Self::DAYS_BITS;
328-
329320
const MONTH: &'static str = "MONTH";
330321
const DAY: &'static str = "DAY";
331322
const MILLIS: &'static str = "MILLISECOND";
332323

333324
const NANOS_IN_MILLI: i64 = 1_000_000;
334325

335-
pub fn from_raw_interval_value(interval: i128) -> Self {
336-
let interval = interval as u128;
337-
let months = (interval >> Self::MONTHS_OFFSET) as i32;
338-
let days = (interval >> Self::DAYS_OFFSET) as i32;
339-
let nanos = interval as i64;
326+
pub fn from_raw_interval_value(interval: IntervalMonthDayNano) -> Self {
327+
let (months, days, nanos) = IntervalMonthDayNanoType::to_parts(interval);
340328
// TODO: precision loss
341329
let millis = nanos / Self::NANOS_IN_MILLI;
342330
DecomposedMonthDayNano {
@@ -355,17 +343,21 @@ impl DecomposedMonthDayNano {
355343
}
356344

357345
pub fn millis_scalar(&self) -> ScalarValue {
358-
let value = Some(((self.millis * Self::NANOS_IN_MILLI) as u128 & Self::NANOS_MASK) as i128);
346+
let value = Some(IntervalMonthDayNanoType::make_value(
347+
0,
348+
0,
349+
self.millis * Self::NANOS_IN_MILLI,
350+
));
359351
ScalarValue::IntervalMonthDayNano(value)
360352
}
361353

362354
pub fn days_scalar(&self) -> ScalarValue {
363-
let value = Some((((self.days as u128) << Self::DAYS_OFFSET) & Self::DAYS_MASK) as i128);
355+
let value = Some(IntervalMonthDayNanoType::make_value(0, self.days, 0));
364356
ScalarValue::IntervalMonthDayNano(value)
365357
}
366358

367359
pub fn months_scalar(&self) -> ScalarValue {
368-
let value = Some(((self.months as u128) << Self::MONTHS_OFFSET) as i128);
360+
let value = Some(IntervalMonthDayNanoType::make_value(self.months, 0, 0));
369361
ScalarValue::IntervalMonthDayNano(value)
370362
}
371363

0 commit comments

Comments
 (0)