Skip to content

Commit 6bfc9fb

Browse files
alambsum12
authored andcommitted
allow casting from Timestamp based arrays to utf8 (apache#664) (apache#698)
the change adds uses the existing `PrimitiveArray::value_as_datetime` to support casting from `Timestamp(_,_)` to ``[Large]Utf8`. Co-authored-by: Sumit <[email protected]>
1 parent 15715de commit 6bfc9fb

File tree

1 file changed

+67
-0
lines changed

1 file changed

+67
-0
lines changed

arrow/src/compute/kernels/cast.rs

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,7 @@ pub fn can_cast_types(from_type: &DataType, to_type: &DataType) -> bool {
101101
(LargeUtf8, Date64) => true,
102102
(LargeUtf8, Timestamp(TimeUnit::Nanosecond, None)) => true,
103103
(LargeUtf8, _) => DataType::is_numeric(to_type),
104+
(Timestamp(_, _), Utf8) | (Timestamp(_, _), LargeUtf8) => true,
104105
(_, Utf8) | (_, LargeUtf8) => {
105106
DataType::is_numeric(from_type) || from_type == &Binary
106107
}
@@ -782,6 +783,20 @@ pub fn cast_with_options(
782783
}
783784
Float32 => cast_numeric_to_string::<Float32Type, i32>(array),
784785
Float64 => cast_numeric_to_string::<Float64Type, i32>(array),
786+
Timestamp(unit, _) => match unit {
787+
TimeUnit::Nanosecond => {
788+
cast_timestamp_to_string::<TimestampNanosecondType, i32>(array)
789+
}
790+
TimeUnit::Microsecond => {
791+
cast_timestamp_to_string::<TimestampMicrosecondType, i32>(array)
792+
}
793+
TimeUnit::Millisecond => {
794+
cast_timestamp_to_string::<TimestampMillisecondType, i32>(array)
795+
}
796+
TimeUnit::Second => {
797+
cast_timestamp_to_string::<TimestampSecondType, i32>(array)
798+
}
799+
},
785800
Binary => {
786801
let array = array.as_any().downcast_ref::<BinaryArray>().unwrap();
787802
Ok(Arc::new(
@@ -827,6 +842,20 @@ pub fn cast_with_options(
827842
}
828843
Float32 => cast_numeric_to_string::<Float32Type, i64>(array),
829844
Float64 => cast_numeric_to_string::<Float64Type, i64>(array),
845+
Timestamp(unit, _) => match unit {
846+
TimeUnit::Nanosecond => {
847+
cast_timestamp_to_string::<TimestampNanosecondType, i64>(array)
848+
}
849+
TimeUnit::Microsecond => {
850+
cast_timestamp_to_string::<TimestampMicrosecondType, i64>(array)
851+
}
852+
TimeUnit::Millisecond => {
853+
cast_timestamp_to_string::<TimestampMillisecondType, i64>(array)
854+
}
855+
TimeUnit::Second => {
856+
cast_timestamp_to_string::<TimestampSecondType, i64>(array)
857+
}
858+
},
830859
Binary => {
831860
let array = array.as_any().downcast_ref::<BinaryArray>().unwrap();
832861
Ok(Arc::new(
@@ -1384,6 +1413,28 @@ where
13841413
unsafe { PrimitiveArray::<R>::from_trusted_len_iter(iter) }
13851414
}
13861415

1416+
/// Cast timestamp types to Utf8/LargeUtf8
1417+
fn cast_timestamp_to_string<T, OffsetSize>(array: &ArrayRef) -> Result<ArrayRef>
1418+
where
1419+
T: ArrowTemporalType + ArrowNumericType,
1420+
i64: From<<T as ArrowPrimitiveType>::Native>,
1421+
OffsetSize: StringOffsetSizeTrait,
1422+
{
1423+
let array = array.as_any().downcast_ref::<PrimitiveArray<T>>().unwrap();
1424+
1425+
Ok(Arc::new(
1426+
(0..array.len())
1427+
.map(|ix| {
1428+
if array.is_null(ix) {
1429+
None
1430+
} else {
1431+
array.value_as_datetime(ix).map(|v| v.to_string())
1432+
}
1433+
})
1434+
.collect::<GenericStringArray<OffsetSize>>(),
1435+
))
1436+
}
1437+
13871438
/// Cast numeric types to Utf8
13881439
fn cast_numeric_to_string<FROM, OffsetSize>(array: &ArrayRef) -> Result<ArrayRef>
13891440
where
@@ -2553,6 +2604,22 @@ mod tests {
25532604
assert!(c.is_null(2));
25542605
}
25552606

2607+
#[test]
2608+
fn test_cast_timestamp_to_string() {
2609+
let a = TimestampMillisecondArray::from_opt_vec(
2610+
vec![Some(864000000005), Some(1545696000001), None],
2611+
Some("UTC".to_string()),
2612+
);
2613+
let array = Arc::new(a) as ArrayRef;
2614+
dbg!(&array);
2615+
let b = cast(&array, &DataType::Utf8).unwrap();
2616+
let c = b.as_any().downcast_ref::<StringArray>().unwrap();
2617+
assert_eq!(&DataType::Utf8, c.data_type());
2618+
assert_eq!("1997-05-19 00:00:00.005", c.value(0));
2619+
assert_eq!("2018-12-25 00:00:00.001", c.value(1));
2620+
assert!(c.is_null(2));
2621+
}
2622+
25562623
#[test]
25572624
fn test_cast_between_timestamps() {
25582625
let a = TimestampMillisecondArray::from_opt_vec(

0 commit comments

Comments
 (0)