@@ -18,7 +18,7 @@ use crate::error::Error as DatafusionIcebergError;
1818use datafusion:: {
1919 arrow:: {
2020 array:: ArrayRef ,
21- datatypes:: { DataType , Schema as ArrowSchema } ,
21+ datatypes:: { DataType , Schema as ArrowSchema , TimeUnit } ,
2222 } ,
2323 common:: {
2424 tree_node:: { Transformed , TreeNode } ,
@@ -362,7 +362,15 @@ impl DateTransform {
362362 TypeSignature :: Exact ( vec![ DataType :: Utf8 , DataType :: Date32 ] ) ,
363363 TypeSignature :: Exact ( vec![
364364 DataType :: Utf8 ,
365- DataType :: Timestamp ( datafusion:: arrow:: datatypes:: TimeUnit :: Microsecond , None ) ,
365+ DataType :: Timestamp ( TimeUnit :: Microsecond , None ) ,
366+ ] ) ,
367+ // Iceberg `timestamptz` is always UTC microseconds, mapped to
368+ // Timestamp(Microsecond, Some("UTC")) in iceberg-rust-spec/src/arrow/schema.rs.
369+ // Arrow allows arbitrary tz strings [1] but we only accept "UTC".
370+ // [1] https://github.com/apache/arrow/blob/apache-arrow-23.0.1/format/Schema.fbs#L385
371+ TypeSignature :: Exact ( vec![
372+ DataType :: Utf8 ,
373+ DataType :: Timestamp ( TimeUnit :: Microsecond , Some ( "UTC" . into( ) ) ) ,
366374 ] ) ,
367375 ] ) ,
368376 volatility : Volatility :: Immutable ,
@@ -467,3 +475,242 @@ fn value_to_scalarvalue(value: Value) -> Result<ScalarValue, Error> {
467475 ) ) ) ,
468476 }
469477}
478+
479+ #[ cfg( test) ]
480+ mod date_transform_tests {
481+ use super :: * ;
482+ use datafusion:: arrow:: datatypes:: Field ;
483+ use datafusion:: common:: config:: ConfigOptions ;
484+ use std:: sync:: Arc ;
485+
486+ /// Helper: invoke `DateTransform` directly with a transform name and scalar value.
487+ fn invoke_date_transform (
488+ transform_name : & str ,
489+ scalar : ScalarValue ,
490+ ) -> datafusion:: error:: Result < ColumnarValue > {
491+ let dt = DateTransform :: new ( ) ;
492+ let value_type = scalar. data_type ( ) ;
493+ dt. invoke_with_args ( ScalarFunctionArgs {
494+ args : vec ! [
495+ ColumnarValue :: Scalar ( ScalarValue :: new_utf8( transform_name) ) ,
496+ ColumnarValue :: Scalar ( scalar) ,
497+ ] ,
498+ arg_fields : vec ! [
499+ Arc :: new( Field :: new( "transform" , DataType :: Utf8 , false ) ) ,
500+ Arc :: new( Field :: new( "value" , value_type, true ) ) ,
501+ ] ,
502+ number_rows : 1 ,
503+ return_field : Arc :: new ( Field :: new ( "result" , DataType :: Int32 , true ) ) ,
504+ config_options : Arc :: new ( ConfigOptions :: default ( ) ) ,
505+ } )
506+ }
507+
508+ /// Extract Int32 from a ColumnarValue, panicking with a clear message on mismatch.
509+ fn unwrap_int32 ( result : ColumnarValue ) -> i32 {
510+ match result {
511+ ColumnarValue :: Scalar ( ScalarValue :: Int32 ( Some ( v) ) ) => v,
512+ other => panic ! ( "expected ScalarValue::Int32, got {other:?}" ) ,
513+ }
514+ }
515+
516+ // 2024-03-15T10:30:00Z in microseconds since epoch
517+ const TS_MICROS : i64 = 1_710_498_600_000_000 ;
518+
519+ // -- invoke DateTransform with Date32 (19797 days since epoch = 2024-03-15) --
520+
521+ #[ test]
522+ fn year_on_date32 ( ) {
523+ let result =
524+ invoke_date_transform ( "year" , ScalarValue :: Date32 ( Some ( 19797 ) ) ) . unwrap ( ) ;
525+ // 2024 - 1970 = 54
526+ assert_eq ! ( unwrap_int32( result) , 54 ) ;
527+ }
528+
529+ #[ test]
530+ fn month_on_date32 ( ) {
531+ let result =
532+ invoke_date_transform ( "month" , ScalarValue :: Date32 ( Some ( 19797 ) ) ) . unwrap ( ) ;
533+ // (2024 - 1970) * 12 + 3 = 651 (month is 1-based)
534+ assert_eq ! ( unwrap_int32( result) , 651 ) ;
535+ }
536+
537+ #[ test]
538+ fn day_on_date32 ( ) {
539+ let result =
540+ invoke_date_transform ( "day" , ScalarValue :: Date32 ( Some ( 19797 ) ) ) . unwrap ( ) ;
541+ assert_eq ! ( unwrap_int32( result) , 19797 ) ;
542+ }
543+
544+ #[ test]
545+ fn hour_on_date32_is_rejected ( ) {
546+ // Date32 has no time component — hour transform is not supported
547+ let result = invoke_date_transform ( "hour" , ScalarValue :: Date32 ( Some ( 19797 ) ) ) ;
548+ assert ! ( result. is_err( ) , "hour transform should not be supported for Date32" ) ;
549+ }
550+
551+ // -- invoke DateTransform directly with Timestamp (no TZ) --
552+
553+ #[ test]
554+ fn year_on_timestamp ( ) {
555+ let result = invoke_date_transform (
556+ "year" ,
557+ ScalarValue :: TimestampMicrosecond ( Some ( TS_MICROS ) , None ) ,
558+ )
559+ . unwrap ( ) ;
560+ // 2024 - 1970 = 54
561+ assert_eq ! ( unwrap_int32( result) , 54 ) ;
562+ }
563+
564+ #[ test]
565+ fn month_on_timestamp ( ) {
566+ let result = invoke_date_transform (
567+ "month" ,
568+ ScalarValue :: TimestampMicrosecond ( Some ( TS_MICROS ) , None ) ,
569+ )
570+ . unwrap ( ) ;
571+ // (2024 - 1970) * 12 + 3 = 651 (month is 1-based)
572+ assert_eq ! ( unwrap_int32( result) , 651 ) ;
573+ }
574+
575+ #[ test]
576+ fn day_on_timestamp ( ) {
577+ let result = invoke_date_transform (
578+ "day" ,
579+ ScalarValue :: TimestampMicrosecond ( Some ( TS_MICROS ) , None ) ,
580+ )
581+ . unwrap ( ) ;
582+ // 2024-03-15 is day 19797 since epoch
583+ assert_eq ! ( unwrap_int32( result) , 19797 ) ;
584+ }
585+
586+ #[ test]
587+ fn hour_on_timestamp ( ) {
588+ let result = invoke_date_transform (
589+ "hour" ,
590+ ScalarValue :: TimestampMicrosecond ( Some ( TS_MICROS ) , None ) ,
591+ )
592+ . unwrap ( ) ;
593+ // 19797 * 24 + 10 = 475138
594+ assert_eq ! ( unwrap_int32( result) , 475138 ) ;
595+ }
596+
597+ // -- invoke DateTransform with Timestamp(UTC) --
598+
599+ #[ test]
600+ fn year_on_timestamp_with_utc ( ) {
601+ let result = invoke_date_transform (
602+ "year" ,
603+ ScalarValue :: TimestampMicrosecond ( Some ( TS_MICROS ) , Some ( "UTC" . into ( ) ) ) ,
604+ )
605+ . unwrap ( ) ;
606+ assert_eq ! ( unwrap_int32( result) , 54 ) ;
607+ }
608+
609+ #[ test]
610+ fn month_on_timestamp_with_utc ( ) {
611+ let result = invoke_date_transform (
612+ "month" ,
613+ ScalarValue :: TimestampMicrosecond ( Some ( TS_MICROS ) , Some ( "UTC" . into ( ) ) ) ,
614+ )
615+ . unwrap ( ) ;
616+ assert_eq ! ( unwrap_int32( result) , 651 ) ;
617+ }
618+
619+ #[ test]
620+ fn day_on_timestamp_with_utc ( ) {
621+ let result = invoke_date_transform (
622+ "day" ,
623+ ScalarValue :: TimestampMicrosecond ( Some ( TS_MICROS ) , Some ( "UTC" . into ( ) ) ) ,
624+ )
625+ . unwrap ( ) ;
626+ assert_eq ! ( unwrap_int32( result) , 19797 ) ;
627+ }
628+
629+ #[ test]
630+ fn hour_on_timestamp_with_utc ( ) {
631+ let result = invoke_date_transform (
632+ "hour" ,
633+ ScalarValue :: TimestampMicrosecond ( Some ( TS_MICROS ) , Some ( "UTC" . into ( ) ) ) ,
634+ )
635+ . unwrap ( ) ;
636+ assert_eq ! ( unwrap_int32( result) , 475138 ) ;
637+ }
638+
639+ // -- edge cases --
640+
641+ #[ test]
642+ fn epoch_zero_transforms ( ) {
643+ // 1970-01-01T00:00:00Z
644+ let cases = vec ! [
645+ ( "year" , 0 ) , // 1970 - 1970 = 0
646+ ( "month" , 1 ) , // 0 * 12 + 1 = 1 (month is 1-based)
647+ ( "day" , 0 ) , // day 0 since epoch
648+ ( "hour" , 0 ) , // hour 0 since epoch
649+ ] ;
650+ for ( name, expected) in cases {
651+ let result = invoke_date_transform (
652+ name,
653+ ScalarValue :: TimestampMicrosecond ( Some ( 0 ) , None ) ,
654+ )
655+ . unwrap ( ) ;
656+ assert_eq ! (
657+ unwrap_int32( result) ,
658+ expected,
659+ "epoch zero: {name} transform"
660+ ) ;
661+ }
662+ }
663+
664+ #[ test]
665+ fn invalid_transform_name_is_rejected ( ) {
666+ let result = invoke_date_transform (
667+ "century" ,
668+ ScalarValue :: TimestampMicrosecond ( Some ( TS_MICROS ) , None ) ,
669+ ) ;
670+ assert ! ( result. is_err( ) , "unknown transform name should be rejected" ) ;
671+ }
672+
673+ #[ test]
674+ fn signature_rejects_non_utc_timezone ( ) {
675+ // Iceberg only maps timestamptz to Timestamp(Microsecond, Some("UTC"))
676+ // per iceberg-rust-spec/src/arrow/schema.rs. The DateTransform signature
677+ // enforces this — non-UTC tz strings are not accepted.
678+ let udf = ScalarUDF :: new_from_impl ( DateTransform :: new ( ) ) ;
679+ let sig = & udf. signature ( ) . type_signature ;
680+ let non_utc = vec ! [
681+ Some ( "+00:00" . into( ) ) ,
682+ Some ( "Etc/UTC" . into( ) ) ,
683+ Some ( "America/New_York" . into( ) ) ,
684+ ] ;
685+ for tz in non_utc {
686+ let args = vec ! [
687+ DataType :: Utf8 ,
688+ DataType :: Timestamp ( TimeUnit :: Microsecond , tz. clone( ) ) ,
689+ ] ;
690+ let accepts = match sig {
691+ TypeSignature :: OneOf ( variants) => variants. iter ( ) . any ( |v| match v {
692+ TypeSignature :: Exact ( expected) => expected == & args,
693+ _ => false ,
694+ } ) ,
695+ _ => false ,
696+ } ;
697+ assert ! (
698+ !accepts,
699+ "signature should reject Timestamp(Microsecond, {tz:?})"
700+ ) ;
701+ }
702+ }
703+
704+ // -- transform_literal wiring --
705+
706+ #[ test]
707+ fn transform_literal_identity_passes_through ( ) {
708+ let input = Expr :: Literal (
709+ ScalarValue :: TimestampMicrosecond ( Some ( 42 ) , None ) ,
710+ None ,
711+ ) ;
712+ let result = transform_literal ( input. clone ( ) , & Transform :: Identity )
713+ . expect ( "identity should pass through" ) ;
714+ assert_eq ! ( result, input) ;
715+ }
716+ }
0 commit comments