@@ -600,6 +600,45 @@ void CArrowTableIterator::convertTimeColumn_nanoarrow(
600600 ArrowArrayMove (newArray, columnArray->array );
601601}
602602
603+ /* *
604+ * Helper function to detect nanosecond timestamp overflow and determine if
605+ * downscaling to microseconds is needed.
606+ * @param columnArray The Arrow array containing the timestamp data
607+ * @param epochArray The Arrow array containing epoch values
608+ * @param fractionArray The Arrow array containing fraction values
609+ * @return true if overflow was detected and downscaling to microseconds is
610+ * safe, false otherwise
611+ * @throws std::overflow_error if overflow is detected but downscaling would
612+ * lose precision
613+ */
614+ static bool _checkNanosecondTimestampOverflowAndDownscale (
615+ ArrowArrayView* columnArray, ArrowArrayView* epochArray,
616+ ArrowArrayView* fractionArray) {
617+ int powTenSB4 = sf::internal::powTenSB4[9 ];
618+ for (int64_t rowIdx = 0 ; rowIdx < columnArray->array ->length ; rowIdx++) {
619+ if (!ArrowArrayViewIsNull (columnArray, rowIdx)) {
620+ int64_t epoch = ArrowArrayViewGetIntUnsafe (epochArray, rowIdx);
621+ int64_t fraction = ArrowArrayViewGetIntUnsafe (fractionArray, rowIdx);
622+ if (epoch > (INT64_MAX / powTenSB4) || epoch < (INT64_MIN / powTenSB4)) {
623+ if (fraction % 1000 != 0 ) {
624+ std::string errorInfo = Logger::formatString (
625+ " The total number of nanoseconds %d%d overflows int64 range. "
626+ " If you use a timestamp with "
627+ " the nanosecond part over 6-digits in the Snowflake database, "
628+ " the timestamp must be "
629+ " between '1677-09-21 00:12:43.145224192' and '2262-04-11 "
630+ " 23:47:16.854775807' to not overflow." ,
631+ epoch, fraction);
632+ throw std::overflow_error (errorInfo.c_str ());
633+ } else {
634+ return true ; // Safe to downscale
635+ }
636+ }
637+ }
638+ }
639+ return false ;
640+ }
641+
603642void CArrowTableIterator::convertTimestampColumn_nanoarrow (
604643 ArrowSchemaView* field, ArrowArrayView* columnArray, const int scale,
605644 const std::string timezone) {
@@ -614,11 +653,11 @@ void CArrowTableIterator::convertTimestampColumn_nanoarrow(
614653 newSchema->flags &=
615654 (field->schema ->flags & ARROW_FLAG_NULLABLE); // map to nullable()
616655
617- // calculate has_overflow_to_downscale
656+ // Find epoch and fraction arrays for overflow detection
657+ ArrowArrayView* epochArray = nullptr ;
658+ ArrowArrayView* fractionArray = nullptr ;
618659 bool has_overflow_to_downscale = false ;
619660 if (scale > 6 && field->type == NANOARROW_TYPE_STRUCT) {
620- ArrowArrayView* epochArray;
621- ArrowArrayView* fractionArray;
622661 for (int64_t i = 0 ; i < field->schema ->n_children ; i++) {
623662 ArrowSchema* c_schema = field->schema ->children [i];
624663 if (std::strcmp (c_schema->name , internal::FIELD_NAME_EPOCH.c_str ()) ==
@@ -631,30 +670,8 @@ void CArrowTableIterator::convertTimestampColumn_nanoarrow(
631670 // do nothing
632671 }
633672 }
634-
635- int powTenSB4 = sf::internal::powTenSB4[9 ];
636- for (int64_t rowIdx = 0 ; rowIdx < columnArray->array ->length ; rowIdx++) {
637- if (!ArrowArrayViewIsNull (columnArray, rowIdx)) {
638- int64_t epoch = ArrowArrayViewGetIntUnsafe (epochArray, rowIdx);
639- int64_t fraction = ArrowArrayViewGetIntUnsafe (fractionArray, rowIdx);
640- if (epoch > (INT64_MAX / powTenSB4) ||
641- epoch < (INT64_MIN / powTenSB4)) {
642- if (fraction % 1000 != 0 ) {
643- std::string errorInfo = Logger::formatString (
644- " The total number of nanoseconds %d%d overflows int64 range. "
645- " If you use a timestamp with "
646- " the nanosecond part over 6-digits in the Snowflake database, "
647- " the timestamp must be "
648- " between '1677-09-21 00:12:43.145224192' and '2262-04-11 "
649- " 23:47:16.854775807' to not overflow." ,
650- epoch, fraction);
651- throw std::overflow_error (errorInfo.c_str ());
652- } else {
653- has_overflow_to_downscale = true ;
654- }
655- }
656- }
657- }
673+ has_overflow_to_downscale = _checkNanosecondTimestampOverflowAndDownscale (
674+ columnArray, epochArray, fractionArray);
658675 }
659676
660677 if (scale <= 6 ) {
@@ -855,6 +872,29 @@ void CArrowTableIterator::convertTimestampTZColumn_nanoarrow(
855872 ArrowSchemaInit (newSchema);
856873 newSchema->flags &=
857874 (field->schema ->flags & ARROW_FLAG_NULLABLE); // map to nullable()
875+
876+ // Find epoch and fraction arrays
877+ ArrowArrayView* epochArray = nullptr ;
878+ ArrowArrayView* fractionArray = nullptr ;
879+ for (int64_t i = 0 ; i < field->schema ->n_children ; i++) {
880+ ArrowSchema* c_schema = field->schema ->children [i];
881+ if (std::strcmp (c_schema->name , internal::FIELD_NAME_EPOCH.c_str ()) == 0 ) {
882+ epochArray = columnArray->children [i];
883+ } else if (std::strcmp (c_schema->name ,
884+ internal::FIELD_NAME_FRACTION.c_str ()) == 0 ) {
885+ fractionArray = columnArray->children [i];
886+ } else {
887+ // do nothing
888+ }
889+ }
890+
891+ // Check for timestamp overflow and determine if downscaling is needed
892+ bool has_overflow_to_downscale = false ;
893+ if (scale > 6 && byteLength == 16 ) {
894+ has_overflow_to_downscale = _checkNanosecondTimestampOverflowAndDownscale (
895+ columnArray, epochArray, fractionArray);
896+ }
897+
858898 auto timeunit = NANOARROW_TIME_UNIT_SECOND;
859899 if (scale == 0 ) {
860900 timeunit = NANOARROW_TIME_UNIT_SECOND;
@@ -863,7 +903,9 @@ void CArrowTableIterator::convertTimestampTZColumn_nanoarrow(
863903 } else if (scale <= 6 ) {
864904 timeunit = NANOARROW_TIME_UNIT_MICRO;
865905 } else {
866- timeunit = NANOARROW_TIME_UNIT_NANO;
906+ // Use microsecond precision if we detected overflow, otherwise nanosecond
907+ timeunit = has_overflow_to_downscale ? NANOARROW_TIME_UNIT_MICRO
908+ : NANOARROW_TIME_UNIT_NANO;
867909 }
868910
869911 if (!timezone.empty ()) {
@@ -893,20 +935,6 @@ void CArrowTableIterator::convertTimestampTZColumn_nanoarrow(
893935 " from schema : %s, error code: %d" ,
894936 ArrowErrorMessage (&error), returnCode);
895937
896- ArrowArrayView* epochArray;
897- ArrowArrayView* fractionArray;
898- for (int64_t i = 0 ; i < field->schema ->n_children ; i++) {
899- ArrowSchema* c_schema = field->schema ->children [i];
900- if (std::strcmp (c_schema->name , internal::FIELD_NAME_EPOCH.c_str ()) == 0 ) {
901- epochArray = columnArray->children [i];
902- } else if (std::strcmp (c_schema->name ,
903- internal::FIELD_NAME_FRACTION.c_str ()) == 0 ) {
904- fractionArray = columnArray->children [i];
905- } else {
906- // do nothing
907- }
908- }
909-
910938 for (int64_t rowIdx = 0 ; rowIdx < columnArray->array ->length ; rowIdx++) {
911939 if (!ArrowArrayViewIsNull (columnArray, rowIdx)) {
912940 if (byteLength == 8 ) {
@@ -920,8 +948,14 @@ void CArrowTableIterator::convertTimestampTZColumn_nanoarrow(
920948 returnCode = ArrowArrayAppendInt (
921949 newArray, epoch * sf::internal::powTenSB4[6 - scale]);
922950 } else {
923- returnCode = ArrowArrayAppendInt (
924- newArray, epoch * sf::internal::powTenSB4[9 - scale]);
951+ // Handle overflow by falling back to microsecond precision
952+ if (has_overflow_to_downscale) {
953+ returnCode = ArrowArrayAppendInt (
954+ newArray, epoch * sf::internal::powTenSB4[6 ]);
955+ } else {
956+ returnCode = ArrowArrayAppendInt (
957+ newArray, epoch * sf::internal::powTenSB4[9 - scale]);
958+ }
925959 }
926960 SF_CHECK_ARROW_RC (returnCode,
927961 " [Snowflake Exception] error appending int to "
@@ -941,8 +975,14 @@ void CArrowTableIterator::convertTimestampTZColumn_nanoarrow(
941975 newArray, epoch * sf::internal::powTenSB4[6 ] +
942976 fraction / sf::internal::powTenSB4[3 ]);
943977 } else {
944- returnCode = ArrowArrayAppendInt (
945- newArray, epoch * sf::internal::powTenSB4[9 ] + fraction);
978+ // Handle overflow by falling back to microsecond precision
979+ if (has_overflow_to_downscale) {
980+ returnCode = ArrowArrayAppendInt (
981+ newArray, epoch * sf::internal::powTenSB4[6 ] + fraction / 1000 );
982+ } else {
983+ returnCode = ArrowArrayAppendInt (
984+ newArray, epoch * sf::internal::powTenSB4[9 ] + fraction);
985+ }
946986 }
947987 SF_CHECK_ARROW_RC (returnCode,
948988 " [Snowflake Exception] error appending int to "
0 commit comments