@@ -600,6 +600,45 @@ void CArrowTableIterator::convertTimeColumn_nanoarrow(
600
600
ArrowArrayMove (newArray, columnArray->array );
601
601
}
602
602
603
+ /* *
604
+ * Helper function to detect nanosecond timestamp overflow and determine if
605
+ * downscaling to microseconds is needed.
606
+ * @param columnArray The Arrow array containing the timestamp data
607
+ * @param epochArray The Arrow array containing epoch values
608
+ * @param fractionArray The Arrow array containing fraction values
609
+ * @return true if overflow was detected and downscaling to microseconds is
610
+ * safe, false otherwise
611
+ * @throws std::overflow_error if overflow is detected but downscaling would
612
+ * lose precision
613
+ */
614
+ static bool _checkNanosecondTimestampOverflowAndDownscale (
615
+ ArrowArrayView* columnArray, ArrowArrayView* epochArray,
616
+ ArrowArrayView* fractionArray) {
617
+ int powTenSB4 = sf::internal::powTenSB4[9 ];
618
+ for (int64_t rowIdx = 0 ; rowIdx < columnArray->array ->length ; rowIdx++) {
619
+ if (!ArrowArrayViewIsNull (columnArray, rowIdx)) {
620
+ int64_t epoch = ArrowArrayViewGetIntUnsafe (epochArray, rowIdx);
621
+ int64_t fraction = ArrowArrayViewGetIntUnsafe (fractionArray, rowIdx);
622
+ if (epoch > (INT64_MAX / powTenSB4) || epoch < (INT64_MIN / powTenSB4)) {
623
+ if (fraction % 1000 != 0 ) {
624
+ std::string errorInfo = Logger::formatString (
625
+ " The total number of nanoseconds %d%d overflows int64 range. "
626
+ " If you use a timestamp with "
627
+ " the nanosecond part over 6-digits in the Snowflake database, "
628
+ " the timestamp must be "
629
+ " between '1677-09-21 00:12:43.145224192' and '2262-04-11 "
630
+ " 23:47:16.854775807' to not overflow." ,
631
+ epoch, fraction);
632
+ throw std::overflow_error (errorInfo.c_str ());
633
+ } else {
634
+ return true ; // Safe to downscale
635
+ }
636
+ }
637
+ }
638
+ }
639
+ return false ;
640
+ }
641
+
603
642
void CArrowTableIterator::convertTimestampColumn_nanoarrow (
604
643
ArrowSchemaView* field, ArrowArrayView* columnArray, const int scale,
605
644
const std::string timezone) {
@@ -614,11 +653,11 @@ void CArrowTableIterator::convertTimestampColumn_nanoarrow(
614
653
newSchema->flags &=
615
654
(field->schema ->flags & ARROW_FLAG_NULLABLE); // map to nullable()
616
655
617
- // calculate has_overflow_to_downscale
656
+ // Find epoch and fraction arrays for overflow detection
657
+ ArrowArrayView* epochArray = nullptr ;
658
+ ArrowArrayView* fractionArray = nullptr ;
618
659
bool has_overflow_to_downscale = false ;
619
- if (scale > 6 && field->type == NANOARROW_TYPE_STRUCT) {
620
- ArrowArrayView* epochArray;
621
- ArrowArrayView* fractionArray;
660
+ if (field->type == NANOARROW_TYPE_STRUCT) {
622
661
for (int64_t i = 0 ; i < field->schema ->n_children ; i++) {
623
662
ArrowSchema* c_schema = field->schema ->children [i];
624
663
if (std::strcmp (c_schema->name , internal::FIELD_NAME_EPOCH.c_str ()) ==
@@ -631,29 +670,9 @@ void CArrowTableIterator::convertTimestampColumn_nanoarrow(
631
670
// do nothing
632
671
}
633
672
}
634
-
635
- int powTenSB4 = sf::internal::powTenSB4[9 ];
636
- for (int64_t rowIdx = 0 ; rowIdx < columnArray->array ->length ; rowIdx++) {
637
- if (!ArrowArrayViewIsNull (columnArray, rowIdx)) {
638
- int64_t epoch = ArrowArrayViewGetIntUnsafe (epochArray, rowIdx);
639
- int64_t fraction = ArrowArrayViewGetIntUnsafe (fractionArray, rowIdx);
640
- if (epoch > (INT64_MAX / powTenSB4) ||
641
- epoch < (INT64_MIN / powTenSB4)) {
642
- if (fraction % 1000 != 0 ) {
643
- std::string errorInfo = Logger::formatString (
644
- " The total number of nanoseconds %d%d overflows int64 range. "
645
- " If you use a timestamp with "
646
- " the nanosecond part over 6-digits in the Snowflake database, "
647
- " the timestamp must be "
648
- " between '1677-09-21 00:12:43.145224192' and '2262-04-11 "
649
- " 23:47:16.854775807' to not overflow." ,
650
- epoch, fraction);
651
- throw std::overflow_error (errorInfo.c_str ());
652
- } else {
653
- has_overflow_to_downscale = true ;
654
- }
655
- }
656
- }
673
+ if (scale > 6 ) {
674
+ has_overflow_to_downscale = _checkNanosecondTimestampOverflowAndDownscale (
675
+ columnArray, epochArray, fractionArray);
657
676
}
658
677
}
659
678
@@ -857,8 +876,8 @@ void CArrowTableIterator::convertTimestampTZColumn_nanoarrow(
857
876
(field->schema ->flags & ARROW_FLAG_NULLABLE); // map to nullable()
858
877
859
878
// Find epoch and fraction arrays
860
- ArrowArrayView* epochArray;
861
- ArrowArrayView* fractionArray;
879
+ ArrowArrayView* epochArray = nullptr ;
880
+ ArrowArrayView* fractionArray = nullptr ;
862
881
for (int64_t i = 0 ; i < field->schema ->n_children ; i++) {
863
882
ArrowSchema* c_schema = field->schema ->children [i];
864
883
if (std::strcmp (c_schema->name , internal::FIELD_NAME_EPOCH.c_str ()) == 0 ) {
@@ -871,32 +890,11 @@ void CArrowTableIterator::convertTimestampTZColumn_nanoarrow(
871
890
}
872
891
}
873
892
874
- // Calculate has_overflow_to_downscale for timestamps that would overflow
893
+ // Check for timestamp overflow and determine if downscaling is needed
875
894
bool has_overflow_to_downscale = false ;
876
895
if (scale > 6 && byteLength == 16 ) {
877
- int powTenSB4 = sf::internal::powTenSB4[9 ];
878
- for (int64_t rowIdx = 0 ; rowIdx < columnArray->array ->length ; rowIdx++) {
879
- if (!ArrowArrayViewIsNull (columnArray, rowIdx)) {
880
- int64_t epoch = ArrowArrayViewGetIntUnsafe (epochArray, rowIdx);
881
- int64_t fraction = ArrowArrayViewGetIntUnsafe (fractionArray, rowIdx);
882
- if (epoch > (INT64_MAX / powTenSB4) ||
883
- epoch < (INT64_MIN / powTenSB4)) {
884
- if (fraction % 1000 != 0 ) {
885
- std::string errorInfo = Logger::formatString (
886
- " The total number of nanoseconds %d%d overflows int64 range. "
887
- " If you use a timestamp with "
888
- " the nanosecond part over 6-digits in the Snowflake database, "
889
- " the timestamp must be "
890
- " between '1677-09-21 00:12:43.145224192' and '2262-04-11 "
891
- " 23:47:16.854775807' to not overflow." ,
892
- epoch, fraction);
893
- throw std::overflow_error (errorInfo.c_str ());
894
- } else {
895
- has_overflow_to_downscale = true ;
896
- }
897
- }
898
- }
899
- }
896
+ has_overflow_to_downscale = _checkNanosecondTimestampOverflowAndDownscale (
897
+ columnArray, epochArray, fractionArray);
900
898
}
901
899
902
900
auto timeunit = NANOARROW_TIME_UNIT_SECOND;
0 commit comments