Skip to content

Commit 8c9de85

Browse files
Coerce timestamps inside structs and lists
Perform nanosecond-to-microsecond conversion inside structs and lists; otherwise Delta table creation fails.
1 parent 1dad77b commit 8c9de85

File tree

1 file changed

+61
-0
lines changed

1 file changed

+61
-0
lines changed

crates/oxbow/src/lib.rs

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -467,6 +467,21 @@ fn coerce_field(
467467
_ => {}
468468
}
469469
}
470+
DataType::List(field) => {
471+
let coerced = coerce_field(field.clone());
472+
let list_field = Field::new(field.name(), DataType::List(coerced), field.is_nullable());
473+
return Arc::new(list_field);
474+
}
475+
DataType::Struct(fields) => {
476+
let coerced: Vec<deltalake::arrow::datatypes::FieldRef> =
477+
fields.iter().map(|f| coerce_field(f.clone())).collect();
478+
let struct_field = Field::new(
479+
field.name(),
480+
DataType::Struct(coerced.iter().map(|f| f.as_ref().clone()).collect()),
481+
field.is_nullable(),
482+
);
483+
return Arc::new(struct_field);
484+
}
470485
_ => {}
471486
};
472487
field.clone()
@@ -1262,4 +1277,50 @@ mod tests {
12621277
);
12631278
assert_eq!(mods.adds().len(), 1, "Why are there two? {mods:#?}");
12641279
}
1280+
1281+
#[test]
1282+
fn test_coerce_field_struct() {
1283+
use deltalake::arrow::datatypes::*;
1284+
let field = Field::new(
1285+
"meta",
1286+
DataType::Struct(
1287+
vec![
1288+
Field::new(
1289+
"timestamp_ns",
1290+
DataType::Timestamp(TimeUnit::Nanosecond, None),
1291+
true,
1292+
),
1293+
Field::new(
1294+
"timestamp_ms",
1295+
DataType::Timestamp(TimeUnit::Microsecond, None),
1296+
true,
1297+
),
1298+
Field::new(
1299+
"timestamps",
1300+
DataType::List(Arc::new(Field::new(
1301+
"item",
1302+
DataType::Timestamp(TimeUnit::Nanosecond, None),
1303+
true,
1304+
))),
1305+
true,
1306+
),
1307+
Field::new("id", DataType::Int32, true),
1308+
]
1309+
.into(),
1310+
),
1311+
true,
1312+
);
1313+
1314+
let coerced = coerce_field(Arc::new(field));
1315+
let formatted = format!("{}", coerced);
1316+
1317+
assert!(
1318+
formatted.contains("Timestamp(Microsecond"),
1319+
"Expected to find a Timestamp(Microsecond) in the coerced schema, got: {formatted}"
1320+
);
1321+
assert!(
1322+
!formatted.contains("Timestamp(Nanosecond"),
1323+
"Expected to not find a Timestamp(Nanosecond) in the coerced schema, got: {formatted}"
1324+
);
1325+
}
12651326
}

0 commit comments

Comments
 (0)