Skip to content

Commit 5cb9b24

Browse files
update to map day-time arrow ipc to arrow duration
1 parent 4083146 commit 5cb9b24

File tree

3 files changed

+149
-1
lines changed

3 files changed

+149
-1
lines changed

src/snowflake/connector/nanoarrow_cpp/ArrowIterator/CArrowTableIterator.cpp

Lines changed: 85 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,6 @@ void CArrowTableIterator::convertIfNeeded(ArrowSchema* columnSchema,
6969
case SnowflakeType::Type::REAL:
7070
case SnowflakeType::Type::TEXT:
7171
case SnowflakeType::Type::INTERVAL_YEAR_MONTH:
72-
case SnowflakeType::Type::INTERVAL_DAY_TIME:
7372
case SnowflakeType::Type::VARIANT:
7473
case SnowflakeType::Type::VECTOR: {
7574
// Do not need to convert
@@ -176,6 +175,24 @@ void CArrowTableIterator::convertIfNeeded(ArrowSchema* columnSchema,
176175
break;
177176
}
178177

178+
case SnowflakeType::Type::INTERVAL_DAY_TIME: {
179+
int scale = 9;
180+
if (metadata != nullptr) {
181+
struct ArrowStringView scaleString = ArrowCharView(nullptr);
182+
returnCode = ArrowMetadataGetValue(metadata, ArrowCharView("scale"),
183+
&scaleString);
184+
SF_CHECK_ARROW_RC(returnCode,
185+
"[Snowflake Exception] error getting 'scale' "
186+
"from Arrow metadata, error code: %d",
187+
returnCode);
188+
scale =
189+
std::stoi(std::string(scaleString.data, scaleString.size_bytes));
190+
}
191+
convertIntervalDayTimeColumn_nanoarrow(&columnSchemaView, columnArray,
192+
scale);
193+
break;
194+
}
195+
179196
case SnowflakeType::Type::TIME: {
180197
int scale = 9;
181198
if (metadata != nullptr) {
@@ -505,6 +522,73 @@ void CArrowTableIterator::
505522
ArrowArrayMove(newArray, columnArray->array);
506523
}
507524

525+
void CArrowTableIterator::convertIntervalDayTimeColumn_nanoarrow(
526+
ArrowSchemaView* field, ArrowArrayView* columnArray, const int scale) {
527+
int returnCode = 0;
528+
nanoarrow::UniqueSchema newUniqueField;
529+
nanoarrow::UniqueArray newUniqueArray;
530+
ArrowSchema* newSchema = newUniqueField.get();
531+
ArrowArray* newArray = newUniqueArray.get();
532+
ArrowError error;
533+
534+
// create new schema
535+
ArrowSchemaInit(newSchema);
536+
newSchema->flags &=
537+
(field->schema->flags & ARROW_FLAG_NULLABLE); // map to nullable()
538+
539+
returnCode = ArrowSchemaSetTypeDateTime(newSchema, NANOARROW_TYPE_DURATION,
540+
NANOARROW_TIME_UNIT_NANO, NULL);
541+
SF_CHECK_ARROW_RC(returnCode,
542+
"[Snowflake Exception] error setting arrow schema type "
543+
"DateTime, error code: %d",
544+
returnCode);
545+
546+
returnCode = ArrowSchemaSetName(newSchema, field->schema->name);
547+
SF_CHECK_ARROW_RC(
548+
returnCode,
549+
"[Snowflake Exception] error setting schema name, error code: %d",
550+
returnCode);
551+
552+
returnCode = ArrowArrayInitFromSchema(newArray, newSchema, &error);
553+
SF_CHECK_ARROW_RC(returnCode,
554+
"[Snowflake Exception] error initializing ArrowArrayView "
555+
"from schema : %s, error code: %d",
556+
ArrowErrorMessage(&error), returnCode);
557+
558+
returnCode = ArrowArrayStartAppending(newArray);
559+
SF_CHECK_ARROW_RC(
560+
returnCode,
561+
"[Snowflake Exception] error appending arrow array, error code: %d",
562+
returnCode);
563+
564+
for (int64_t rowIdx = 0; rowIdx < columnArray->array->length; rowIdx++) {
565+
if (ArrowArrayViewIsNull(columnArray, rowIdx)) {
566+
returnCode = ArrowArrayAppendNull(newArray, 1);
567+
SF_CHECK_ARROW_RC(returnCode,
568+
"[Snowflake Exception] error appending null to arrow "
569+
"array, error code: %d",
570+
returnCode);
571+
} else {
572+
auto originalVal = ArrowArrayViewGetIntUnsafe(columnArray, rowIdx);
573+
returnCode = ArrowArrayAppendInt(newArray, originalVal);
574+
SF_CHECK_ARROW_RC(returnCode,
575+
"[Snowflake Exception] error appending int to arrow "
576+
"array, error code: %d",
577+
returnCode);
578+
}
579+
}
580+
581+
returnCode = ArrowArrayFinishBuildingDefault(newArray, &error);
582+
SF_CHECK_ARROW_RC(returnCode,
583+
"[Snowflake Exception] error finishing building arrow "
584+
"array: %s, error code: %d",
585+
ArrowErrorMessage(&error), returnCode);
586+
field->schema->release(field->schema);
587+
ArrowSchemaMove(newSchema, field->schema);
588+
columnArray->array->release(columnArray->array);
589+
ArrowArrayMove(newArray, columnArray->array);
590+
}
591+
508592
void CArrowTableIterator::convertTimeColumn_nanoarrow(
509593
ArrowSchemaView* field, ArrowArrayView* columnArray, const int scale) {
510594
int returnCode = 0;

src/snowflake/connector/nanoarrow_cpp/ArrowIterator/CArrowTableIterator.hpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,14 @@ class CArrowTableIterator : public CArrowIterator {
9494
ArrowArrayView* columnArray,
9595
const int scale);
9696

97+
/**
98+
* convert Snowflake Interval Day-Time column (Arrow in64/decimal128) to
99+
* Arrow Duration column
100+
*/
101+
void convertIntervalDayTimeColumn_nanoarrow(ArrowSchemaView* field,
102+
ArrowArrayView* columnArray,
103+
const int scale);
104+
97105
/**
98106
* convert Snowflake TimestampNTZ/TimestampLTZ column to Arrow Timestamp
99107
* column

test/integ/pandas_it/test_arrow_pandas.py

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,49 @@ def test_num_one(conn_cnx):
6262
fetch_pandas(conn_cnx, sql_exec, row_count, col_count, "one")
6363

6464

65+
@pytest.mark.skipif(
66+
not installed_pandas or no_arrow_iterator_ext,
67+
reason="arrow_iterator extension is not built, or pandas is missing.",
68+
)
69+
def test_interval_year_month(conn_cnx):
70+
cases = ["1-2", "-1-3", "999999999-11", "-999999999-11"]
71+
table = "test_arrow_year_month_interval"
72+
values = "(" + "),(".join([f"'{c}'" for c in cases]) + ")"
73+
with conn_cnx() as conn:
74+
cursor = conn.cursor()
75+
cursor.execute("alter session set feature_interval_types=enabled")
76+
cursor.execute(f"create or replace table {table} (a interval year to month)")
77+
cursor.execute(f"insert into {table} values {values}")
78+
sql_text = f"select a from {table}"
79+
validate_pandas(conn, sql_text, cases, 1, "one", "interval_year_month")
80+
finish(conn, table)
81+
82+
83+
@pytest.mark.skipif(
84+
not installed_pandas or no_arrow_iterator_ext,
85+
reason="arrow_iterator extension is not built, or pandas is missing.",
86+
)
87+
def test_interval_day_time(conn_cnx):
88+
cases = [
89+
"0 00:00:51",
90+
# "0 0:0:0.0",
91+
# "12 3:4:5.678",
92+
# "-1 2:3:4.567",
93+
# "999999999 23:59:59.999999",
94+
# "-99999 23:59:59.999999"
95+
]
96+
table = "test_arrow_day_time_interval"
97+
values = "(" + "),(".join([f"'{c}'" for c in cases]) + ")"
98+
with conn_cnx() as conn:
99+
cursor = conn.cursor()
100+
cursor.execute("alter session set feature_interval_types=enabled")
101+
cursor.execute(f"create or replace table {table} (a interval day to second)")
102+
cursor.execute(f"insert into {table} values {values}")
103+
sql_text = f"select a from {table}"
104+
validate_pandas(conn, sql_text, cases, 1, "one", "interval_day_time")
105+
finish(conn, table)
106+
107+
65108
@pytest.mark.skipif(
66109
not installed_pandas or no_arrow_iterator_ext,
67110
reason="arrow_iterator extension is not built, or pandas is missing.",
@@ -734,6 +777,19 @@ def validate_pandas(
734777
c_case = Decimal(cases[i])
735778
elif data_type == "date":
736779
c_case = datetime.strptime(cases[i], "%Y-%m-%d").date()
780+
elif data_type == "interval_year_month":
781+
year_month_list = cases[i].split("-")
782+
if len(year_month_list) == 2:
783+
c_case = int(year_month_list[0]) * 12 + int(
784+
year_month_list[1]
785+
)
786+
else:
787+
# negative value
788+
c_case = -(
789+
int(year_month_list[1]) * 12 + int(year_month_list[2])
790+
)
791+
elif data_type == "interval_day_time":
792+
c_case = pandas.Timedelta(cases[i])
737793
elif data_type == "time":
738794
time_str_len = 8 if scale == 0 else 9 + scale
739795
c_case = cases[i].strip()[:time_str_len]

0 commit comments

Comments
 (0)