Skip to content

Commit be87629

Browse files
Fallback to microseconds in timestamp TZ conversion
1 parent bcb8c80 commit be87629

File tree

2 files changed

+103
-19
lines changed

2 files changed

+103
-19
lines changed

src/snowflake/connector/nanoarrow_cpp/ArrowIterator/CArrowTableIterator.cpp

Lines changed: 63 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -855,6 +855,50 @@ void CArrowTableIterator::convertTimestampTZColumn_nanoarrow(
855855
ArrowSchemaInit(newSchema);
856856
newSchema->flags &=
857857
(field->schema->flags & ARROW_FLAG_NULLABLE); // map to nullable()
858+
859+
// Find epoch and fraction arrays
860+
ArrowArrayView* epochArray;
861+
ArrowArrayView* fractionArray;
862+
for (int64_t i = 0; i < field->schema->n_children; i++) {
863+
ArrowSchema* c_schema = field->schema->children[i];
864+
if (std::strcmp(c_schema->name, internal::FIELD_NAME_EPOCH.c_str()) == 0) {
865+
epochArray = columnArray->children[i];
866+
} else if (std::strcmp(c_schema->name,
867+
internal::FIELD_NAME_FRACTION.c_str()) == 0) {
868+
fractionArray = columnArray->children[i];
869+
} else {
870+
// do nothing
871+
}
872+
}
873+
874+
// Calculate has_overflow_to_downscale for timestamps that would overflow
875+
bool has_overflow_to_downscale = false;
876+
if (scale > 6 && byteLength == 16) {
877+
int powTenSB4 = sf::internal::powTenSB4[9];
878+
for (int64_t rowIdx = 0; rowIdx < columnArray->array->length; rowIdx++) {
879+
if (!ArrowArrayViewIsNull(columnArray, rowIdx)) {
880+
int64_t epoch = ArrowArrayViewGetIntUnsafe(epochArray, rowIdx);
881+
int64_t fraction = ArrowArrayViewGetIntUnsafe(fractionArray, rowIdx);
882+
if (epoch > (INT64_MAX / powTenSB4) ||
883+
epoch < (INT64_MIN / powTenSB4)) {
884+
if (fraction % 1000 != 0) {
885+
std::string errorInfo = Logger::formatString(
886+
"The total number of nanoseconds %d%d overflows int64 range. "
887+
"If you use a timestamp with "
888+
"the nanosecond part over 6-digits in the Snowflake database, "
889+
"the timestamp must be "
890+
"between '1677-09-21 00:12:43.145224192' and '2262-04-11 "
891+
"23:47:16.854775807' to not overflow.",
892+
epoch, fraction);
893+
throw std::overflow_error(errorInfo.c_str());
894+
} else {
895+
has_overflow_to_downscale = true;
896+
}
897+
}
898+
}
899+
}
900+
}
901+
858902
auto timeunit = NANOARROW_TIME_UNIT_SECOND;
859903
if (scale == 0) {
860904
timeunit = NANOARROW_TIME_UNIT_SECOND;
@@ -863,7 +907,9 @@ void CArrowTableIterator::convertTimestampTZColumn_nanoarrow(
863907
} else if (scale <= 6) {
864908
timeunit = NANOARROW_TIME_UNIT_MICRO;
865909
} else {
866-
timeunit = NANOARROW_TIME_UNIT_NANO;
910+
// Use microsecond precision if we detected overflow, otherwise nanosecond
911+
timeunit = has_overflow_to_downscale ? NANOARROW_TIME_UNIT_MICRO
912+
: NANOARROW_TIME_UNIT_NANO;
867913
}
868914

869915
if (!timezone.empty()) {
@@ -893,20 +939,6 @@ void CArrowTableIterator::convertTimestampTZColumn_nanoarrow(
893939
"from schema : %s, error code: %d",
894940
ArrowErrorMessage(&error), returnCode);
895941

896-
ArrowArrayView* epochArray;
897-
ArrowArrayView* fractionArray;
898-
for (int64_t i = 0; i < field->schema->n_children; i++) {
899-
ArrowSchema* c_schema = field->schema->children[i];
900-
if (std::strcmp(c_schema->name, internal::FIELD_NAME_EPOCH.c_str()) == 0) {
901-
epochArray = columnArray->children[i];
902-
} else if (std::strcmp(c_schema->name,
903-
internal::FIELD_NAME_FRACTION.c_str()) == 0) {
904-
fractionArray = columnArray->children[i];
905-
} else {
906-
// do nothing
907-
}
908-
}
909-
910942
for (int64_t rowIdx = 0; rowIdx < columnArray->array->length; rowIdx++) {
911943
if (!ArrowArrayViewIsNull(columnArray, rowIdx)) {
912944
if (byteLength == 8) {
@@ -920,8 +952,14 @@ void CArrowTableIterator::convertTimestampTZColumn_nanoarrow(
920952
returnCode = ArrowArrayAppendInt(
921953
newArray, epoch * sf::internal::powTenSB4[6 - scale]);
922954
} else {
923-
returnCode = ArrowArrayAppendInt(
924-
newArray, epoch * sf::internal::powTenSB4[9 - scale]);
955+
// Handle overflow by falling back to microsecond precision
956+
if (has_overflow_to_downscale) {
957+
returnCode = ArrowArrayAppendInt(
958+
newArray, epoch * sf::internal::powTenSB4[6]);
959+
} else {
960+
returnCode = ArrowArrayAppendInt(
961+
newArray, epoch * sf::internal::powTenSB4[9 - scale]);
962+
}
925963
}
926964
SF_CHECK_ARROW_RC(returnCode,
927965
"[Snowflake Exception] error appending int to "
@@ -941,8 +979,14 @@ void CArrowTableIterator::convertTimestampTZColumn_nanoarrow(
941979
newArray, epoch * sf::internal::powTenSB4[6] +
942980
fraction / sf::internal::powTenSB4[3]);
943981
} else {
944-
returnCode = ArrowArrayAppendInt(
945-
newArray, epoch * sf::internal::powTenSB4[9] + fraction);
982+
// Handle overflow by falling back to microsecond precision
983+
if (has_overflow_to_downscale) {
984+
returnCode = ArrowArrayAppendInt(
985+
newArray, epoch * sf::internal::powTenSB4[6] + fraction / 1000);
986+
} else {
987+
returnCode = ArrowArrayAppendInt(
988+
newArray, epoch * sf::internal::powTenSB4[9] + fraction);
989+
}
946990
}
947991
SF_CHECK_ARROW_RC(returnCode,
948992
"[Snowflake Exception] error appending int to "

test/integ/pandas_it/test_arrow_pandas.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1491,3 +1491,43 @@ def test_fetch_with_pandas_nullable_types(conn_cnx):
14911491
df = cursor_table.fetch_pandas_all(types_mapper=dtype_mapping.get)
14921492
pandas._testing.assert_series_equal(df.dtypes, expected_dtypes)
14931493
assert df.to_string() == expected_df_to_string
1494+
1495+
1496+
def test_convert_timezone_overflow(conn_cnx):
1497+
"""Test CONVERT_TIMEZONE function with microsecond fallback for year 2999.
1498+
1499+
This test verifies that dates beyond the nanosecond range automatically
1500+
fall back to microsecond precision instead of failing.
1501+
"""
1502+
with conn_cnx() as cnx:
1503+
cur = cnx.cursor()
1504+
cur.execute(SQL_ENABLE_ARROW)
1505+
1506+
# Test with regular fetchone first - this should work fine
1507+
result = cur.execute(
1508+
"SELECT CONVERT_TIMEZONE ('UTC', '2999-12-31 00:00:00.000 +0000') AS result1"
1509+
).fetchone()
1510+
assert str(result[0]) == "2999-12-31 00:00:00+00:00"
1511+
1512+
# Test with fetch_pandas_all - this should now work with microsecond fallback
1513+
# instead of throwing an error or returning wrong data
1514+
pandas_result = cur.execute(
1515+
"SELECT CONVERT_TIMEZONE ('UTC', '2999-12-31 00:00:00.000 +0000') AS result1"
1516+
).fetch_pandas_all()
1517+
1518+
# Check that we got a DataFrame with one row and one column
1519+
assert pandas_result.shape == (1, 1)
1520+
assert pandas_result.columns[0] == "RESULT1"
1521+
1522+
# Check the actual timestamp value - should be correct year 2999
1523+
timestamp_value = pandas_result.iloc[0, 0]
1524+
assert str(timestamp_value) == "2999-12-31 00:00:00+00:00"
1525+
1526+
# Test with a date within the nanosecond range (should use nanoseconds)
1527+
pandas_result_2200 = cur.execute(
1528+
"SELECT CONVERT_TIMEZONE ('UTC', '2200-12-31 00:00:00.000 +0000') AS result1"
1529+
).fetch_pandas_all()
1530+
1531+
# Check that the date is correct
1532+
timestamp_value_2200 = pandas_result_2200.iloc[0, 0]
1533+
assert str(timestamp_value_2200) == "2200-12-31 00:00:00+00:00"

0 commit comments

Comments
 (0)