Skip to content

Commit 9a68eea

Browse files
committed
fix(iceberg-datafusion): handle timestamp predicates from DF
DataFusion sometimes passes dates as string literals, but can also pass timestamp ScalarValues, which need to be converted to predicates correctly in order to enable partition pruning.
1 parent 4f5d8e2 commit 9a68eea

File tree

5 files changed

+381
-6
lines changed

5 files changed

+381
-6
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

crates/iceberg/src/expr/predicate.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -669,7 +669,7 @@ impl Not for Predicate {
669669
/// just adds a `NOT` operator.
670670
///
671671
/// # Example
672-
///
672+
///
673673
///```rust
674674
/// use std::ops::Bound::Unbounded;
675675
///

crates/iceberg/src/spec/values.rs

Lines changed: 228 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1199,11 +1199,57 @@ impl Datum {
11991199
(PrimitiveLiteral::Long(val), _, PrimitiveType::Int) => {
12001200
Ok(Datum::i64_to_i32(*val))
12011201
}
1202-
(PrimitiveLiteral::Long(val), _, PrimitiveType::Timestamp) => {
1203-
Ok(Datum::timestamp_micros(*val))
1204-
}
1205-
(PrimitiveLiteral::Long(val), _, PrimitiveType::Timestamptz) => {
1206-
Ok(Datum::timestamptz_micros(*val))
1202+
(PrimitiveLiteral::Long(val), source_type, target_type) => {
1203+
match (source_type, target_type) {
1204+
(_, PrimitiveType::Long) => Ok(Datum::long(*val)),
1205+
(
1206+
PrimitiveType::Long
1207+
| PrimitiveType::Timestamp
1208+
| PrimitiveType::Timestamptz,
1209+
PrimitiveType::Timestamp,
1210+
) => Ok(Datum::timestamp_micros(*val)),
1211+
(
1212+
PrimitiveType::Long
1213+
| PrimitiveType::Timestamp
1214+
| PrimitiveType::Timestamptz,
1215+
PrimitiveType::Timestamptz,
1216+
) => Ok(Datum::timestamptz_micros(*val)),
1217+
(
1218+
PrimitiveType::Long
1219+
| PrimitiveType::TimestampNs
1220+
| PrimitiveType::TimestamptzNs,
1221+
PrimitiveType::TimestampNs,
1222+
) => Ok(Datum::timestamp_nanos(*val)),
1223+
(
1224+
PrimitiveType::Long
1225+
| PrimitiveType::TimestampNs
1226+
| PrimitiveType::TimestamptzNs,
1227+
PrimitiveType::TimestamptzNs,
1228+
) => Ok(Datum::timestamptz_nanos(*val)),
1229+
(
1230+
PrimitiveType::TimestampNs | PrimitiveType::TimestamptzNs,
1231+
PrimitiveType::Timestamp,
1232+
) => Ok(Datum::timestamp_micros(val / 1000)),
1233+
(
1234+
PrimitiveType::TimestampNs | PrimitiveType::TimestamptzNs,
1235+
PrimitiveType::Timestamptz,
1236+
) => Ok(Datum::timestamptz_micros(val / 1000)),
1237+
(
1238+
PrimitiveType::Timestamp | PrimitiveType::Timestamptz,
1239+
PrimitiveType::TimestampNs,
1240+
) => Ok(Datum::timestamp_nanos(val * 1000)),
1241+
(
1242+
PrimitiveType::Timestamp | PrimitiveType::Timestamptz,
1243+
PrimitiveType::TimestamptzNs,
1244+
) => Ok(Datum::timestamptz_nanos(val * 1000)),
1245+
_ => Err(Error::new(
1246+
ErrorKind::DataInvalid,
1247+
format!(
1248+
"Can't convert datum from {} type to {} type.",
1249+
self.r#type, target_primitive_type
1250+
),
1251+
)),
1252+
}
12071253
}
12081254
// Let's wait with nano's until this clears up: https://github.com/apache/iceberg/pull/11775
12091255
(PrimitiveLiteral::Int128(val), _, PrimitiveType::Long) => {
@@ -3943,4 +3989,181 @@ mod tests {
39433989

39443990
assert_eq!(double_sorted, double_expected);
39453991
}
3992+
3993+
// Tests for timestamp nanosecond conversions
3994+
#[test]
3995+
fn test_datum_timestamp_nanos_convert_to_timestamp_micros() {
3996+
let datum = Datum::timestamp_nanos(12345000);
3997+
3998+
let result = datum.to(&Primitive(PrimitiveType::Timestamp)).unwrap();
3999+
4000+
let expected = Datum::timestamp_micros(12345);
4001+
4002+
assert_eq!(result, expected);
4003+
}
4004+
4005+
#[test]
4006+
fn test_datum_timestamp_nanos_convert_to_timestamptz_micros() {
4007+
let datum = Datum::timestamp_nanos(12345000);
4008+
4009+
let result = datum.to(&Primitive(PrimitiveType::Timestamptz)).unwrap();
4010+
4011+
let expected = Datum::timestamptz_micros(12345);
4012+
4013+
assert_eq!(result, expected);
4014+
}
4015+
4016+
#[test]
4017+
fn test_datum_timestamptz_nanos_convert_to_timestamp_micros() {
4018+
let datum = Datum::timestamptz_nanos(12345000);
4019+
4020+
let result = datum.to(&Primitive(PrimitiveType::Timestamp)).unwrap();
4021+
4022+
let expected = Datum::timestamp_micros(12345);
4023+
4024+
assert_eq!(result, expected);
4025+
}
4026+
4027+
#[test]
4028+
fn test_datum_timestamptz_nanos_convert_to_timestamptz_micros() {
4029+
let datum = Datum::timestamptz_nanos(12345000);
4030+
4031+
let result = datum.to(&Primitive(PrimitiveType::Timestamptz)).unwrap();
4032+
4033+
let expected = Datum::timestamptz_micros(12345);
4034+
4035+
assert_eq!(result, expected);
4036+
}
4037+
4038+
#[test]
4039+
fn test_datum_timestamp_micros_convert_to_timestamp_nanos() {
4040+
let datum = Datum::timestamp_micros(12345);
4041+
4042+
let result = datum.to(&Primitive(PrimitiveType::TimestampNs)).unwrap();
4043+
4044+
let expected = Datum::timestamp_nanos(12345000);
4045+
4046+
assert_eq!(result, expected);
4047+
}
4048+
4049+
#[test]
4050+
fn test_datum_timestamp_micros_convert_to_timestamptz_nanos() {
4051+
let datum = Datum::timestamp_micros(12345);
4052+
4053+
let result = datum.to(&Primitive(PrimitiveType::TimestamptzNs)).unwrap();
4054+
4055+
let expected = Datum::timestamptz_nanos(12345000);
4056+
4057+
assert_eq!(result, expected);
4058+
}
4059+
4060+
#[test]
4061+
fn test_datum_timestamptz_micros_convert_to_timestamp_nanos() {
4062+
let datum = Datum::timestamptz_micros(12345);
4063+
4064+
let result = datum.to(&Primitive(PrimitiveType::TimestampNs)).unwrap();
4065+
4066+
let expected = Datum::timestamp_nanos(12345000);
4067+
4068+
assert_eq!(result, expected);
4069+
}
4070+
4071+
#[test]
4072+
fn test_datum_timestamptz_micros_convert_to_timestamptz_nanos() {
4073+
let datum = Datum::timestamptz_micros(12345);
4074+
4075+
let result = datum.to(&Primitive(PrimitiveType::TimestamptzNs)).unwrap();
4076+
4077+
let expected = Datum::timestamptz_nanos(12345000);
4078+
4079+
assert_eq!(result, expected);
4080+
}
4081+
4082+
#[test]
4083+
fn test_datum_timestamp_nanos_convert_to_timestamp_nanos() {
4084+
let datum = Datum::timestamp_nanos(12345);
4085+
4086+
let result = datum.to(&Primitive(PrimitiveType::TimestampNs)).unwrap();
4087+
4088+
let expected = Datum::timestamp_nanos(12345);
4089+
4090+
assert_eq!(result, expected);
4091+
}
4092+
4093+
#[test]
4094+
fn test_datum_timestamp_nanos_convert_to_timestamptz_nanos() {
4095+
let datum = Datum::timestamp_nanos(12345);
4096+
4097+
let result = datum.to(&Primitive(PrimitiveType::TimestamptzNs)).unwrap();
4098+
4099+
let expected = Datum::timestamptz_nanos(12345);
4100+
4101+
assert_eq!(result, expected);
4102+
}
4103+
4104+
#[test]
4105+
fn test_datum_timestamptz_nanos_convert_to_timestamp_nanos() {
4106+
let datum = Datum::timestamptz_nanos(12345);
4107+
4108+
let result = datum.to(&Primitive(PrimitiveType::TimestampNs)).unwrap();
4109+
4110+
let expected = Datum::timestamp_nanos(12345);
4111+
4112+
assert_eq!(result, expected);
4113+
}
4114+
4115+
#[test]
4116+
fn test_datum_timestamptz_nanos_convert_to_timestamptz_nanos() {
4117+
let datum = Datum::timestamptz_nanos(12345);
4118+
4119+
let result = datum.to(&Primitive(PrimitiveType::TimestamptzNs)).unwrap();
4120+
4121+
let expected = Datum::timestamptz_nanos(12345);
4122+
4123+
assert_eq!(result, expected);
4124+
}
4125+
4126+
#[test]
4127+
fn test_datum_long_convert_to_timestamp_nanos() {
4128+
let datum = Datum::long(12345);
4129+
4130+
let result = datum.to(&Primitive(PrimitiveType::TimestampNs)).unwrap();
4131+
4132+
let expected = Datum::timestamp_nanos(12345);
4133+
4134+
assert_eq!(result, expected);
4135+
}
4136+
4137+
#[test]
4138+
fn test_datum_long_convert_to_timestamptz_nanos() {
4139+
let datum = Datum::long(12345);
4140+
4141+
let result = datum.to(&Primitive(PrimitiveType::TimestamptzNs)).unwrap();
4142+
4143+
let expected = Datum::timestamptz_nanos(12345);
4144+
4145+
assert_eq!(result, expected);
4146+
}
4147+
4148+
#[test]
4149+
fn test_datum_timestamp_nanos_to_micros() {
4150+
let datum = Datum::timestamp_nanos(12345678);
4151+
4152+
let result = datum.to(&Primitive(PrimitiveType::Timestamp)).unwrap();
4153+
4154+
let expected = Datum::timestamp_micros(12345);
4155+
4156+
assert_eq!(result, expected);
4157+
}
4158+
4159+
#[test]
4160+
fn test_datum_timestamp_micros_to_nanos() {
4161+
let datum = Datum::timestamp_micros(12345);
4162+
4163+
let result = datum.to(&Primitive(PrimitiveType::TimestampNs)).unwrap();
4164+
4165+
let expected = Datum::timestamp_nanos(12345000);
4166+
4167+
assert_eq!(result, expected);
4168+
}
39464169
}

crates/integrations/datafusion/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ repository = { workspace = true }
3131
[dependencies]
3232
anyhow = { workspace = true }
3333
async-trait = { workspace = true }
34+
chrono = { workspace = true }
3435
datafusion = { workspace = true }
3536
futures = { workspace = true }
3637
iceberg = { workspace = true }

0 commit comments

Comments
 (0)