Fallback to microseconds in timestamp TZ conversion

sfc-gh-pczajka · sfc-gh-pczajka · commit be87629f26da · 2025-07-18T12:56:12.000+02:00
diff --git a/src/snowflake/connector/nanoarrow_cpp/ArrowIterator/CArrowTableIterator.cpp b/src/snowflake/connector/nanoarrow_cpp/ArrowIterator/CArrowTableIterator.cpp
@@ -855,6 +855,50 @@ void CArrowTableIterator::convertTimestampTZColumn_nanoarrow(
   ArrowSchemaInit(newSchema);
   newSchema->flags &=
       (field->schema->flags & ARROW_FLAG_NULLABLE);  // map to nullable()
+
+  // Find epoch and fraction arrays
+  ArrowArrayView* epochArray;
+  ArrowArrayView* fractionArray;
+  for (int64_t i = 0; i < field->schema->n_children; i++) {
+    ArrowSchema* c_schema = field->schema->children[i];
+    if (std::strcmp(c_schema->name, internal::FIELD_NAME_EPOCH.c_str()) == 0) {
+      epochArray = columnArray->children[i];
+    } else if (std::strcmp(c_schema->name,
+                           internal::FIELD_NAME_FRACTION.c_str()) == 0) {
+      fractionArray = columnArray->children[i];
+    } else {
+      // do nothing
+    }
+  }
+
+  // Calculate has_overflow_to_downscale for timestamps that would overflow
+  bool has_overflow_to_downscale = false;
+  if (scale > 6 && byteLength == 16) {
+    int powTenSB4 = sf::internal::powTenSB4[9];
+    for (int64_t rowIdx = 0; rowIdx < columnArray->array->length; rowIdx++) {
+      if (!ArrowArrayViewIsNull(columnArray, rowIdx)) {
+        int64_t epoch = ArrowArrayViewGetIntUnsafe(epochArray, rowIdx);
+        int64_t fraction = ArrowArrayViewGetIntUnsafe(fractionArray, rowIdx);
+        if (epoch > (INT64_MAX / powTenSB4) ||
+            epoch < (INT64_MIN / powTenSB4)) {
+          if (fraction % 1000 != 0) {
+            std::string errorInfo = Logger::formatString(
+                "The total number of nanoseconds %d%d overflows int64 range. "
+                "If you use a timestamp with "
+                "the nanosecond part over 6-digits in the Snowflake database, "
+                "the timestamp must be "
+                "between '1677-09-21 00:12:43.145224192' and '2262-04-11 "
+                "23:47:16.854775807' to not overflow.",
+                epoch, fraction);
+            throw std::overflow_error(errorInfo.c_str());
+          } else {
+            has_overflow_to_downscale = true;
+          }
+        }
+      }
+    }
+  }
+
   auto timeunit = NANOARROW_TIME_UNIT_SECOND;
   if (scale == 0) {
     timeunit = NANOARROW_TIME_UNIT_SECOND;
@@ -863,7 +907,9 @@ void CArrowTableIterator::convertTimestampTZColumn_nanoarrow(
   } else if (scale <= 6) {
     timeunit = NANOARROW_TIME_UNIT_MICRO;
   } else {
-    timeunit = NANOARROW_TIME_UNIT_NANO;
+    // Use microsecond precision if we detected overflow, otherwise nanosecond
+    timeunit = has_overflow_to_downscale ? NANOARROW_TIME_UNIT_MICRO
+                                         : NANOARROW_TIME_UNIT_NANO;
   }
 
   if (!timezone.empty()) {
@@ -893,20 +939,6 @@ void CArrowTableIterator::convertTimestampTZColumn_nanoarrow(
                     "from schema : %s, error code: %d",
                     ArrowErrorMessage(&error), returnCode);
 
-  ArrowArrayView* epochArray;
-  ArrowArrayView* fractionArray;
-  for (int64_t i = 0; i < field->schema->n_children; i++) {
-    ArrowSchema* c_schema = field->schema->children[i];
-    if (std::strcmp(c_schema->name, internal::FIELD_NAME_EPOCH.c_str()) == 0) {
-      epochArray = columnArray->children[i];
-    } else if (std::strcmp(c_schema->name,
-                           internal::FIELD_NAME_FRACTION.c_str()) == 0) {
-      fractionArray = columnArray->children[i];
-    } else {
-      // do nothing
-    }
-  }
-
   for (int64_t rowIdx = 0; rowIdx < columnArray->array->length; rowIdx++) {
     if (!ArrowArrayViewIsNull(columnArray, rowIdx)) {
       if (byteLength == 8) {
@@ -920,8 +952,14 @@ void CArrowTableIterator::convertTimestampTZColumn_nanoarrow(
           returnCode = ArrowArrayAppendInt(
               newArray, epoch * sf::internal::powTenSB4[6 - scale]);
         } else {
-          returnCode = ArrowArrayAppendInt(
-              newArray, epoch * sf::internal::powTenSB4[9 - scale]);
+          // Handle overflow by falling back to microsecond precision
+          if (has_overflow_to_downscale) {
+            returnCode = ArrowArrayAppendInt(
+                newArray, epoch * sf::internal::powTenSB4[6]);
+          } else {
+            returnCode = ArrowArrayAppendInt(
+                newArray, epoch * sf::internal::powTenSB4[9 - scale]);
+          }
         }
         SF_CHECK_ARROW_RC(returnCode,
                           "[Snowflake Exception] error appending int to "
@@ -941,8 +979,14 @@ void CArrowTableIterator::convertTimestampTZColumn_nanoarrow(
               newArray, epoch * sf::internal::powTenSB4[6] +
                             fraction / sf::internal::powTenSB4[3]);
         } else {
-          returnCode = ArrowArrayAppendInt(
-              newArray, epoch * sf::internal::powTenSB4[9] + fraction);
+          // Handle overflow by falling back to microsecond precision
+          if (has_overflow_to_downscale) {
+            returnCode = ArrowArrayAppendInt(
+                newArray, epoch * sf::internal::powTenSB4[6] + fraction / 1000);
+          } else {
+            returnCode = ArrowArrayAppendInt(
+                newArray, epoch * sf::internal::powTenSB4[9] + fraction);
+          }
         }
         SF_CHECK_ARROW_RC(returnCode,
                           "[Snowflake Exception] error appending int to "
diff --git a/test/integ/pandas_it/test_arrow_pandas.py b/test/integ/pandas_it/test_arrow_pandas.py
@@ -1491,3 +1491,43 @@ def test_fetch_with_pandas_nullable_types(conn_cnx):
         df = cursor_table.fetch_pandas_all(types_mapper=dtype_mapping.get)
         pandas._testing.assert_series_equal(df.dtypes, expected_dtypes)
         assert df.to_string() == expected_df_to_string
+
+
+def test_convert_timezone_overflow(conn_cnx):
+    """Test CONVERT_TIMEZONE function with microsecond fallback for year 2999.
+
+    This test verifies that dates beyond the nanosecond range automatically
+    fall back to microsecond precision instead of failing.
+    """
+    with conn_cnx() as cnx:
+        cur = cnx.cursor()
+        cur.execute(SQL_ENABLE_ARROW)
+
+        # Test with regular fetchone first - this should work fine
+        result = cur.execute(
+            "SELECT CONVERT_TIMEZONE ('UTC', '2999-12-31 00:00:00.000 +0000') AS result1"
+        ).fetchone()
+        assert str(result[0]) == "2999-12-31 00:00:00+00:00"
+
+        # Test with fetch_pandas_all - this should now work with microsecond fallback
+        # instead of throwing an error or returning wrong data
+        pandas_result = cur.execute(
+            "SELECT CONVERT_TIMEZONE ('UTC', '2999-12-31 00:00:00.000 +0000') AS result1"
+        ).fetch_pandas_all()
+
+        # Check that we got a DataFrame with one row and one column
+        assert pandas_result.shape == (1, 1)
+        assert pandas_result.columns[0] == "RESULT1"
+
+        # Check the actual timestamp value - should be correct year 2999
+        timestamp_value = pandas_result.iloc[0, 0]
+        assert str(timestamp_value) == "2999-12-31 00:00:00+00:00"
+
+        # Test with a date within the nanosecond range (should use nanoseconds)
+        pandas_result_2200 = cur.execute(
+            "SELECT CONVERT_TIMEZONE ('UTC', '2200-12-31 00:00:00.000 +0000') AS result1"
+        ).fetch_pandas_all()
+
+        # Check that the date is correct
+        timestamp_value_2200 = pandas_result_2200.iloc[0, 0]
+        assert str(timestamp_value_2200) == "2200-12-31 00:00:00+00:00"