Skip to content

Commit 1d2459f

Browse files
sfc-gh-nkrishnasfc-gh-turbaszek
authored andcommitted
SNOW-2338989: Ensure Arrow to_pandas maps Interval types (#2536)
1 parent 1878c5a commit 1d2459f

File tree

5 files changed

+188
-0
lines changed

5 files changed

+188
-0
lines changed

DESCRIPTION.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ Source code is also available at: https://github.com/snowflakedb/snowflake-conne
1313
- Added the `oauth_credentials_in_body` parameter supporting an option to send the oauth client credentials in the request body
1414
- Fix retry behavior for `ECONNRESET` error
1515
- Added an option to exclude `botocore` and `boto3` dependencies by setting `SNOWFLAKE_NO_BOTO` environment variable during installation
16+
- Added support for pandas conversion for Day-time and Year-Month Interval types
1617

1718
- v3.17.4(September 22,2025)
1819
- Added support for intermediate certificates as roots when they are stored in the trust store

src/snowflake/connector/nanoarrow_cpp/ArrowIterator/CArrowTableIterator.cpp

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ void CArrowTableIterator::convertIfNeeded(ArrowSchema* columnSchema,
6868
case SnowflakeType::Type::DATE:
6969
case SnowflakeType::Type::REAL:
7070
case SnowflakeType::Type::TEXT:
71+
case SnowflakeType::Type::INTERVAL_YEAR_MONTH:
7172
case SnowflakeType::Type::VARIANT:
7273
case SnowflakeType::Type::VECTOR: {
7374
// Do not need to convert
@@ -174,6 +175,24 @@ void CArrowTableIterator::convertIfNeeded(ArrowSchema* columnSchema,
174175
break;
175176
}
176177

178+
case SnowflakeType::Type::INTERVAL_DAY_TIME: {
179+
int scale = 9;
180+
if (metadata != nullptr) {
181+
struct ArrowStringView scaleString = ArrowCharView(nullptr);
182+
returnCode = ArrowMetadataGetValue(metadata, ArrowCharView("scale"),
183+
&scaleString);
184+
SF_CHECK_ARROW_RC(returnCode,
185+
"[Snowflake Exception] error getting 'scale' "
186+
"from Arrow metadata, error code: %d",
187+
returnCode);
188+
scale =
189+
std::stoi(std::string(scaleString.data, scaleString.size_bytes));
190+
}
191+
convertIntervalDayTimeColumn_nanoarrow(&columnSchemaView, columnArray,
192+
scale);
193+
break;
194+
}
195+
177196
case SnowflakeType::Type::TIME: {
178197
int scale = 9;
179198
if (metadata != nullptr) {
@@ -503,6 +522,76 @@ void CArrowTableIterator::
503522
ArrowArrayMove(newArray, columnArray->array);
504523
}
505524

525+
void CArrowTableIterator::convertIntervalDayTimeColumn_nanoarrow(
526+
ArrowSchemaView* field, ArrowArrayView* columnArray, const int scale) {
527+
int returnCode = 0;
528+
nanoarrow::UniqueSchema newUniqueField;
529+
nanoarrow::UniqueArray newUniqueArray;
530+
ArrowSchema* newSchema = newUniqueField.get();
531+
ArrowArray* newArray = newUniqueArray.get();
532+
ArrowError error;
533+
534+
// create new schema
535+
ArrowSchemaInit(newSchema);
536+
newSchema->flags &=
537+
(field->schema->flags & ARROW_FLAG_NULLABLE); // map to nullable()
538+
539+
returnCode = ArrowSchemaSetTypeDateTime(newSchema, NANOARROW_TYPE_DURATION,
540+
NANOARROW_TIME_UNIT_NANO, NULL);
541+
SF_CHECK_ARROW_RC(returnCode,
542+
"[Snowflake Exception] error setting arrow schema type "
543+
"DateTime, error code: %d",
544+
returnCode);
545+
546+
returnCode = ArrowSchemaSetName(newSchema, field->schema->name);
547+
SF_CHECK_ARROW_RC(
548+
returnCode,
549+
"[Snowflake Exception] error setting schema name, error code: %d",
550+
returnCode);
551+
552+
returnCode = ArrowArrayInitFromSchema(newArray, newSchema, &error);
553+
SF_CHECK_ARROW_RC(returnCode,
554+
"[Snowflake Exception] error initializing ArrowArrayView "
555+
"from schema : %s, error code: %d",
556+
ArrowErrorMessage(&error), returnCode);
557+
558+
returnCode = ArrowArrayStartAppending(newArray);
559+
SF_CHECK_ARROW_RC(
560+
returnCode,
561+
"[Snowflake Exception] error appending arrow array, error code: %d",
562+
returnCode);
563+
564+
for (int64_t rowIdx = 0; rowIdx < columnArray->array->length; rowIdx++) {
565+
if (ArrowArrayViewIsNull(columnArray, rowIdx)) {
566+
returnCode = ArrowArrayAppendNull(newArray, 1);
567+
SF_CHECK_ARROW_RC(returnCode,
568+
"[Snowflake Exception] error appending null to arrow "
569+
"array, error code: %d",
570+
returnCode);
571+
} else {
572+
ArrowDecimal arrowDecimal;
573+
ArrowDecimalInit(&arrowDecimal, 128, 38, 0);
574+
ArrowArrayViewGetDecimalUnsafe(columnArray, rowIdx, &arrowDecimal);
575+
auto originalVal = ArrowDecimalGetIntUnsafe(&arrowDecimal);
576+
returnCode = ArrowArrayAppendInt(newArray, originalVal);
577+
SF_CHECK_ARROW_RC(returnCode,
578+
"[Snowflake Exception] error appending int to arrow "
579+
"array, error code: %d",
580+
returnCode);
581+
}
582+
}
583+
584+
returnCode = ArrowArrayFinishBuildingDefault(newArray, &error);
585+
SF_CHECK_ARROW_RC(returnCode,
586+
"[Snowflake Exception] error finishing building arrow "
587+
"array: %s, error code: %d",
588+
ArrowErrorMessage(&error), returnCode);
589+
field->schema->release(field->schema);
590+
ArrowSchemaMove(newSchema, field->schema);
591+
columnArray->array->release(columnArray->array);
592+
ArrowArrayMove(newArray, columnArray->array);
593+
}
594+
506595
void CArrowTableIterator::convertTimeColumn_nanoarrow(
507596
ArrowSchemaView* field, ArrowArrayView* columnArray, const int scale) {
508597
int returnCode = 0;

src/snowflake/connector/nanoarrow_cpp/ArrowIterator/CArrowTableIterator.hpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,14 @@ class CArrowTableIterator : public CArrowIterator {
9494
ArrowArrayView* columnArray,
9595
const int scale);
9696

97+
/**
98+
* convert Snowflake Interval Day-Time column (Arrow int64/decimal128) to
99+
* Arrow Duration column
100+
*/
101+
void convertIntervalDayTimeColumn_nanoarrow(ArrowSchemaView* field,
102+
ArrowArrayView* columnArray,
103+
const int scale);
104+
97105
/**
98106
* convert Snowflake TimestampNTZ/TimestampLTZ column to Arrow Timestamp
99107
* column

test/integ/aio_it/pandas_it/test_arrow_pandas_async.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -662,6 +662,26 @@ async def test_vector(conn_cnx, is_public_test):
662662
await finish(conn, table)
663663

664664

665+
@pytest.mark.skipif(
666+
not installed_pandas or no_arrow_iterator_ext,
667+
reason="arrow_iterator extension is not built, or pandas is missing.",
668+
)
669+
async def test_interval_year_month(conn_cnx):
670+
cases = ["1-2", "-1-3", "999999999-11", "-999999999-11"]
671+
table = "test_arrow_year_month_interval"
672+
values = "(" + "),(".join([f"'{c}'" for c in cases]) + ")"
673+
async with conn_cnx() as conn:
674+
cursor = conn.cursor()
675+
await cursor.execute("alter session set feature_interval_types=enabled")
676+
await cursor.execute(
677+
f"create or replace table {table} (a interval year to month)"
678+
)
679+
await cursor.execute(f"insert into {table} values {values}")
680+
sql_text = f"select a from {table}"
681+
await validate_pandas(conn, sql_text, cases, 1, "one", "interval_year_month")
682+
await finish(conn, table)
683+
684+
665685
async def validate_pandas(
666686
cnx_table,
667687
sql,
@@ -740,6 +760,23 @@ async def validate_pandas(
740760
c_case = Decimal(cases[i])
741761
elif data_type == "date":
742762
c_case = datetime.strptime(cases[i], "%Y-%m-%d").date()
763+
elif data_type == "interval_year_month":
764+
year_month_list = cases[i].split("-")
765+
if len(year_month_list) == 2:
766+
c_case = int(year_month_list[0]) * 12 + int(
767+
year_month_list[1]
768+
)
769+
else:
770+
# negative value
771+
c_case = -(
772+
int(year_month_list[1]) * 12 + int(year_month_list[2])
773+
)
774+
elif data_type == "interval_day_time":
775+
timedelta_split_days = cases[i].split(" ")
776+
pandas_timedelta_str = (
777+
timedelta_split_days[0] + " days " + timedelta_split_days[1]
778+
)
779+
c_case = pandas.to_timedelta(pandas_timedelta_str)
743780
elif data_type == "time":
744781
time_str_len = 8 if scale == 0 else 9 + scale
745782
c_case = cases[i].strip()[:time_str_len]

test/integ/pandas_it/test_arrow_pandas.py

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,42 @@ def test_num_one(conn_cnx):
6262
fetch_pandas(conn_cnx, sql_exec, row_count, col_count, "one")
6363

6464

65+
@pytest.mark.skipif(
66+
not installed_pandas or no_arrow_iterator_ext,
67+
reason="arrow_iterator extension is not built, or pandas is missing.",
68+
)
69+
def test_interval_year_month(conn_cnx):
70+
cases = ["1-2", "-1-3", "999999999-11", "-999999999-11"]
71+
table = "test_arrow_year_month_interval"
72+
values = "(" + "),(".join([f"'{c}'" for c in cases]) + ")"
73+
with conn_cnx() as conn:
74+
cursor = conn.cursor()
75+
cursor.execute("alter session set feature_interval_types=enabled")
76+
cursor.execute(f"create or replace table {table} (a interval year to month)")
77+
cursor.execute(f"insert into {table} values {values}")
78+
sql_text = f"select a from {table}"
79+
validate_pandas(conn, sql_text, cases, 1, "one", "interval_year_month")
80+
finish(conn, table)
81+
82+
83+
@pytest.mark.skipif(
84+
not installed_pandas or no_arrow_iterator_ext,
85+
reason="arrow_iterator extension is not built, or pandas is missing.",
86+
)
87+
def test_interval_day_time(conn_cnx):
88+
cases = ["106751 23:47:16.854775807", "0 0:0:0.0", "-5 0:0:0.0"]
89+
table = "test_arrow_day_time_interval"
90+
values = "(" + "),(".join([f"'{c}'" for c in cases]) + ")"
91+
with conn_cnx() as conn:
92+
cursor = conn.cursor()
93+
cursor.execute("alter session set feature_interval_types=enabled")
94+
cursor.execute(f"create or replace table {table} (a interval day to second)")
95+
cursor.execute(f"insert into {table} values {values}")
96+
sql_text = f"select a from {table}"
97+
validate_pandas(conn, sql_text, cases, 1, "one", "interval_day_time")
98+
finish(conn, table)
99+
100+
65101
@pytest.mark.skipif(
66102
not installed_pandas or no_arrow_iterator_ext,
67103
reason="arrow_iterator extension is not built, or pandas is missing.",
@@ -734,6 +770,23 @@ def validate_pandas(
734770
c_case = Decimal(cases[i])
735771
elif data_type == "date":
736772
c_case = datetime.strptime(cases[i], "%Y-%m-%d").date()
773+
elif data_type == "interval_year_month":
774+
year_month_list = cases[i].split("-")
775+
if len(year_month_list) == 2:
776+
c_case = int(year_month_list[0]) * 12 + int(
777+
year_month_list[1]
778+
)
779+
else:
780+
# negative value
781+
c_case = -(
782+
int(year_month_list[1]) * 12 + int(year_month_list[2])
783+
)
784+
elif data_type == "interval_day_time":
785+
timedelta_split_days = cases[i].split(" ")
786+
pandas_timedelta_str = (
787+
timedelta_split_days[0] + " days " + timedelta_split_days[1]
788+
)
789+
c_case = pandas.to_timedelta(pandas_timedelta_str)
737790
elif data_type == "time":
738791
time_str_len = 8 if scale == 0 else 9 + scale
739792
c_case = cases[i].strip()[:time_str_len]

0 commit comments

Comments
 (0)