Skip to content

Commit 75bbc8a

Browse files
refactor
1 parent be87629 commit 75bbc8a

File tree

1 file changed

+51
-53
lines changed

1 file changed

+51
-53
lines changed

src/snowflake/connector/nanoarrow_cpp/ArrowIterator/CArrowTableIterator.cpp

Lines changed: 51 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -600,6 +600,45 @@ void CArrowTableIterator::convertTimeColumn_nanoarrow(
600600
ArrowArrayMove(newArray, columnArray->array);
601601
}
602602

603+
/**
604+
* Helper function to detect nanosecond timestamp overflow and determine if
605+
* downscaling to microseconds is needed.
606+
* @param columnArray The Arrow array containing the timestamp data
607+
* @param epochArray The Arrow array containing epoch values
608+
* @param fractionArray The Arrow array containing fraction values
609+
* @return true if overflow was detected and downscaling to microseconds is
610+
* safe, false otherwise
611+
* @throws std::overflow_error if overflow is detected but downscaling would
612+
* lose precision
613+
*/
614+
static bool _checkNanosecondTimestampOverflowAndDownscale(
615+
ArrowArrayView* columnArray, ArrowArrayView* epochArray,
616+
ArrowArrayView* fractionArray) {
617+
int powTenSB4 = sf::internal::powTenSB4[9];
618+
for (int64_t rowIdx = 0; rowIdx < columnArray->array->length; rowIdx++) {
619+
if (!ArrowArrayViewIsNull(columnArray, rowIdx)) {
620+
int64_t epoch = ArrowArrayViewGetIntUnsafe(epochArray, rowIdx);
621+
int64_t fraction = ArrowArrayViewGetIntUnsafe(fractionArray, rowIdx);
622+
if (epoch > (INT64_MAX / powTenSB4) || epoch < (INT64_MIN / powTenSB4)) {
623+
if (fraction % 1000 != 0) {
624+
std::string errorInfo = Logger::formatString(
625+
"The total number of nanoseconds %d%d overflows int64 range. "
626+
"If you use a timestamp with "
627+
"the nanosecond part over 6-digits in the Snowflake database, "
628+
"the timestamp must be "
629+
"between '1677-09-21 00:12:43.145224192' and '2262-04-11 "
630+
"23:47:16.854775807' to not overflow.",
631+
epoch, fraction);
632+
throw std::overflow_error(errorInfo.c_str());
633+
} else {
634+
return true; // Safe to downscale
635+
}
636+
}
637+
}
638+
}
639+
return false;
640+
}
641+
603642
void CArrowTableIterator::convertTimestampColumn_nanoarrow(
604643
ArrowSchemaView* field, ArrowArrayView* columnArray, const int scale,
605644
const std::string timezone) {
@@ -614,11 +653,11 @@ void CArrowTableIterator::convertTimestampColumn_nanoarrow(
614653
newSchema->flags &=
615654
(field->schema->flags & ARROW_FLAG_NULLABLE); // map to nullable()
616655

617-
// calculate has_overflow_to_downscale
656+
// Find epoch and fraction arrays for overflow detection
657+
ArrowArrayView* epochArray = nullptr;
658+
ArrowArrayView* fractionArray = nullptr;
618659
bool has_overflow_to_downscale = false;
619-
if (scale > 6 && field->type == NANOARROW_TYPE_STRUCT) {
620-
ArrowArrayView* epochArray;
621-
ArrowArrayView* fractionArray;
660+
if (field->type == NANOARROW_TYPE_STRUCT) {
622661
for (int64_t i = 0; i < field->schema->n_children; i++) {
623662
ArrowSchema* c_schema = field->schema->children[i];
624663
if (std::strcmp(c_schema->name, internal::FIELD_NAME_EPOCH.c_str()) ==
@@ -631,29 +670,9 @@ void CArrowTableIterator::convertTimestampColumn_nanoarrow(
631670
// do nothing
632671
}
633672
}
634-
635-
int powTenSB4 = sf::internal::powTenSB4[9];
636-
for (int64_t rowIdx = 0; rowIdx < columnArray->array->length; rowIdx++) {
637-
if (!ArrowArrayViewIsNull(columnArray, rowIdx)) {
638-
int64_t epoch = ArrowArrayViewGetIntUnsafe(epochArray, rowIdx);
639-
int64_t fraction = ArrowArrayViewGetIntUnsafe(fractionArray, rowIdx);
640-
if (epoch > (INT64_MAX / powTenSB4) ||
641-
epoch < (INT64_MIN / powTenSB4)) {
642-
if (fraction % 1000 != 0) {
643-
std::string errorInfo = Logger::formatString(
644-
"The total number of nanoseconds %d%d overflows int64 range. "
645-
"If you use a timestamp with "
646-
"the nanosecond part over 6-digits in the Snowflake database, "
647-
"the timestamp must be "
648-
"between '1677-09-21 00:12:43.145224192' and '2262-04-11 "
649-
"23:47:16.854775807' to not overflow.",
650-
epoch, fraction);
651-
throw std::overflow_error(errorInfo.c_str());
652-
} else {
653-
has_overflow_to_downscale = true;
654-
}
655-
}
656-
}
673+
if (scale > 6) {
674+
has_overflow_to_downscale = _checkNanosecondTimestampOverflowAndDownscale(
675+
columnArray, epochArray, fractionArray);
657676
}
658677
}
659678

@@ -857,8 +876,8 @@ void CArrowTableIterator::convertTimestampTZColumn_nanoarrow(
857876
(field->schema->flags & ARROW_FLAG_NULLABLE); // map to nullable()
858877

859878
// Find epoch and fraction arrays
860-
ArrowArrayView* epochArray;
861-
ArrowArrayView* fractionArray;
879+
ArrowArrayView* epochArray = nullptr;
880+
ArrowArrayView* fractionArray = nullptr;
862881
for (int64_t i = 0; i < field->schema->n_children; i++) {
863882
ArrowSchema* c_schema = field->schema->children[i];
864883
if (std::strcmp(c_schema->name, internal::FIELD_NAME_EPOCH.c_str()) == 0) {
@@ -871,32 +890,11 @@ void CArrowTableIterator::convertTimestampTZColumn_nanoarrow(
871890
}
872891
}
873892

874-
// Calculate has_overflow_to_downscale for timestamps that would overflow
893+
// Check for timestamp overflow and determine if downscaling is needed
875894
bool has_overflow_to_downscale = false;
876895
if (scale > 6 && byteLength == 16) {
877-
int powTenSB4 = sf::internal::powTenSB4[9];
878-
for (int64_t rowIdx = 0; rowIdx < columnArray->array->length; rowIdx++) {
879-
if (!ArrowArrayViewIsNull(columnArray, rowIdx)) {
880-
int64_t epoch = ArrowArrayViewGetIntUnsafe(epochArray, rowIdx);
881-
int64_t fraction = ArrowArrayViewGetIntUnsafe(fractionArray, rowIdx);
882-
if (epoch > (INT64_MAX / powTenSB4) ||
883-
epoch < (INT64_MIN / powTenSB4)) {
884-
if (fraction % 1000 != 0) {
885-
std::string errorInfo = Logger::formatString(
886-
"The total number of nanoseconds %d%d overflows int64 range. "
887-
"If you use a timestamp with "
888-
"the nanosecond part over 6-digits in the Snowflake database, "
889-
"the timestamp must be "
890-
"between '1677-09-21 00:12:43.145224192' and '2262-04-11 "
891-
"23:47:16.854775807' to not overflow.",
892-
epoch, fraction);
893-
throw std::overflow_error(errorInfo.c_str());
894-
} else {
895-
has_overflow_to_downscale = true;
896-
}
897-
}
898-
}
899-
}
896+
has_overflow_to_downscale = _checkNanosecondTimestampOverflowAndDownscale(
897+
columnArray, epochArray, fractionArray);
900898
}
901899

902900
auto timeunit = NANOARROW_TIME_UNIT_SECOND;

0 commit comments

Comments
 (0)