Skip to content

Commit fa1addc

Browse files
add pandas test
1 parent 240def5 commit fa1addc

File tree

1 file changed

+125
-57
lines changed

1 file changed

+125
-57
lines changed

test/integ/pandas_it/test_arrow_pandas.py

Lines changed: 125 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -438,40 +438,67 @@ def test_timestampntz(conn_cnx, scale):
438438
[
439439
"'1400-01-01 01:02:03.123456789'::timestamp as low_ts",
440440
"'9999-01-01 01:02:03.123456789789'::timestamp as high_ts",
441+
"convert_timezone('UTC', '1400-01-01 01:02:03.123456789') as low_ts",
442+
"convert_timezone('UTC', '9999-01-01 01:02:03.123456789789') as high_ts",
441443
],
442444
)
443-
def test_timestampntz_raises_overflow(conn_cnx, timestamp_str):
445+
def test_timestamp_raises_overflow(conn_cnx, timestamp_str):
444446
with conn_cnx() as conn:
445447
r = conn.cursor().execute(f"select {timestamp_str}")
446448
with pytest.raises(OverflowError, match="overflows int64 range."):
447449
r.fetch_arrow_all()
448450

449451

450-
def test_timestampntz_down_scale(conn_cnx):
452+
def test_timestamp_down_scale(conn_cnx):
451453
with conn_cnx() as conn:
452454
r = conn.cursor().execute(
453-
"select '1400-01-01 01:02:03.123456'::timestamp as low_ts, '9999-01-01 01:02:03.123456'::timestamp as high_ts"
455+
"""select '1400-01-01 01:02:03.123456'::timestamp as low_ntz,
456+
'9999-01-01 01:02:03.123456'::timestamp as high_ntz,
457+
convert_timezone('UTC', '1400-01-01 01:02:03.123456') as low_tz,
458+
convert_timezone('UTC', '9999-01-01 01:02:03.123456') as high_tz
459+
"""
454460
)
455461
table = r.fetch_arrow_all()
456-
lower_dt = table[0][0].as_py() # type: datetime
462+
lower_ntz = table[0][0].as_py() # type: datetime
457463
assert (
458-
lower_dt.year,
459-
lower_dt.month,
460-
lower_dt.day,
461-
lower_dt.hour,
462-
lower_dt.minute,
463-
lower_dt.second,
464-
lower_dt.microsecond,
464+
lower_ntz.year,
465+
lower_ntz.month,
466+
lower_ntz.day,
467+
lower_ntz.hour,
468+
lower_ntz.minute,
469+
lower_ntz.second,
470+
lower_ntz.microsecond,
465471
) == (1400, 1, 1, 1, 2, 3, 123456)
466-
higher_dt = table[1][0].as_py()
472+
higher_ntz = table[1][0].as_py() # type: datetime
467473
assert (
468-
higher_dt.year,
469-
higher_dt.month,
470-
higher_dt.day,
471-
higher_dt.hour,
472-
higher_dt.minute,
473-
higher_dt.second,
474-
higher_dt.microsecond,
474+
higher_ntz.year,
475+
higher_ntz.month,
476+
higher_ntz.day,
477+
higher_ntz.hour,
478+
higher_ntz.minute,
479+
higher_ntz.second,
480+
higher_ntz.microsecond,
481+
) == (9999, 1, 1, 1, 2, 3, 123456)
482+
483+
lower_tz = table[2][0].as_py() # type: datetime
484+
assert (
485+
lower_tz.year,
486+
lower_tz.month,
487+
lower_tz.day,
488+
lower_tz.hour,
489+
lower_tz.minute,
490+
lower_tz.second,
491+
lower_tz.microsecond,
492+
) == (1400, 1, 1, 1, 2, 3, 123456)
493+
higher_tz = table[3][0].as_py() # type: datetime
494+
assert (
495+
higher_tz.year,
496+
higher_tz.month,
497+
higher_tz.day,
498+
higher_tz.hour,
499+
higher_tz.minute,
500+
higher_tz.second,
501+
higher_tz.microsecond,
475502
) == (9999, 1, 1, 1, 2, 3, 123456)
476503

477504

@@ -1493,41 +1520,82 @@ def test_fetch_with_pandas_nullable_types(conn_cnx):
14931520
assert df.to_string() == expected_df_to_string
14941521

14951522

1496-
def test_convert_timezone_overflow(conn_cnx):
1497-
"""Test CONVERT_TIMEZONE function with microsecond fallback for year 2999.
1498-
1499-
This test verifies that dates beyond the nanosecond range automatically
1500-
fall back to microsecond precision instead of failing.
1501-
"""
1502-
with conn_cnx() as cnx:
1503-
cur = cnx.cursor()
1504-
cur.execute(SQL_ENABLE_ARROW)
1505-
1506-
# Test with regular fetchone first - this should work fine
1507-
result = cur.execute(
1508-
"SELECT CONVERT_TIMEZONE ('UTC', '2999-12-31 00:00:00.000 +0000') AS result1"
1509-
).fetchone()
1510-
assert str(result[0]) == "2999-12-31 00:00:00+00:00"
1511-
1512-
# Test with fetch_pandas_all - this should now work with microsecond fallback
1513-
# instead of throwing an error or returning wrong data
1514-
pandas_result = cur.execute(
1515-
"SELECT CONVERT_TIMEZONE ('UTC', '2999-12-31 00:00:00.000 +0000') AS result1"
1516-
).fetch_pandas_all()
1517-
1518-
# Check that we got a DataFrame with one row and one column
1519-
assert pandas_result.shape == (1, 1)
1520-
assert pandas_result.columns[0] == "RESULT1"
1521-
1522-
# Check the actual timestamp value - should be correct year 2999
1523-
timestamp_value = pandas_result.iloc[0, 0]
1524-
assert str(timestamp_value) == "2999-12-31 00:00:00+00:00"
1525-
1526-
# Test with a date within the nanosecond range (should use nanoseconds)
1527-
pandas_result_2200 = cur.execute(
1528-
"SELECT CONVERT_TIMEZONE ('UTC', '2200-12-31 00:00:00.000 +0000') AS result1"
1529-
).fetch_pandas_all()
1530-
1531-
# Check that the date is correct
1532-
timestamp_value_2200 = pandas_result_2200.iloc[0, 0]
1533-
assert str(timestamp_value_2200) == "2200-12-31 00:00:00+00:00"
1523+
# @pytest.mark.parametrize(
1524+
# "timestamp_type", ["timestamp_ntz", "timestamp_ltz", "timestamp_tz"]
1525+
# )
1526+
# def test_convert_timestamp_overflow(conn_cnx, timestamp_type):
1527+
# """Test whether large timestamps are correctly falling back to microsecond precision."""
1528+
1529+
# def query(timestamp):
1530+
# if timestamp_type == "timestamp_tz":
1531+
# return f"SELECT CONVERT_TIMEZONE ('UTC', '{timestamp}') AS result"
1532+
# return f"SELECT '{timestamp}'::{timestamp_type} AS result"
1533+
1534+
# with conn_cnx() as cnx:
1535+
# cur = cnx.cursor()
1536+
1537+
# # Check that "large" dates are correctly falling back to microsecond precision
1538+
# cur.execute(query("2999-12-31 00:00:00.001234"))
1539+
# result = cur.fetchall()
1540+
# assert str(result[0][0]).startswith("2999-12-31 00:00:00.001234")
1541+
# result_pandas = cur.fetch_pandas_all()
1542+
# assert str(result_pandas.iloc[0, 0]).startswith("2999-12-31 00:00:00.001234")
1543+
1544+
# # Check that nanosecond precision is used for dates within the nanosecond range
1545+
# cur.execute(query("2000-12-31 00:00:00.001234567"))
1546+
# result_pandas = cur.fetch_arrow_all()
1547+
# result_pandas = cur.fetch_pandas_all()
1548+
# assert str(result_pandas.iloc[0, 0]).startswith("2999-12-31 00:00:00.001234")
1549+
1550+
# # Check that nanosecond precision used outside of nanosecond range throws an error
1551+
# cur.execute(query("2999-12-31 00:00:00.0012345678"))
1552+
# with pytest.raises(
1553+
# OverflowError,
1554+
# match=(
1555+
# "If you use a timestamp with the nanosecond part over 6-digits in the Snowflake database, "
1556+
# "the timestamp must be between '1677-09-21 00:12:43.145224192' and "
1557+
# "'2262-04-11 23:47:16.854775807' to not overflow."
1558+
# ),
1559+
# ):
1560+
# cur.fetch_pandas_all()
1561+
1562+
1563+
# def test_timestamp_ltz_overflow(conn_cnx):
1564+
# """Test TIMESTAMP_LTZ with microsecond fallback for year 2999.
1565+
1566+
# This test verifies that TIMESTAMP_LTZ dates beyond the nanosecond range automatically
1567+
# fall back to microsecond precision instead of failing.
1568+
# """
1569+
# with conn_cnx() as cnx:
1570+
# cur = cnx.cursor()
1571+
# cur.execute(SQL_ENABLE_ARROW)
1572+
1573+
# # Test with regular fetchone first - this should work fine
1574+
# result = cur.execute(
1575+
# "SELECT '2999-12-31 00:00:00.000'::timestamp_ltz AS result1"
1576+
# ).fetchone()
1577+
# # TIMESTAMP_LTZ will be converted to session timezone (UTC by default in tests)
1578+
# assert str(result[0]) == "2999-12-31 00:00:00+00:00"
1579+
1580+
# # Test with fetch_pandas_all - this should now work with microsecond fallback
1581+
# # instead of throwing an error or returning wrong data
1582+
# pandas_result = cur.execute(
1583+
# "SELECT '2999-12-31 00:00:00.000'::timestamp_ltz AS result1"
1584+
# ).fetch_pandas_all()
1585+
1586+
# # Check that we got a DataFrame with one row and one column
1587+
# assert pandas_result.shape == (1, 1)
1588+
# assert pandas_result.columns[0] == "RESULT1"
1589+
1590+
# # Check the actual timestamp value - should be correct year 2999
1591+
# timestamp_value = pandas_result.iloc[0, 0]
1592+
# assert str(timestamp_value) == "2999-12-31 00:00:00+00:00"
1593+
1594+
# # Test with a date within the nanosecond range (should use nanoseconds)
1595+
# pandas_result_2200 = cur.execute(
1596+
# "SELECT '2200-12-31 00:00:00.000'::timestamp_ltz AS result1"
1597+
# ).fetch_pandas_all()
1598+
1599+
# # Check that the date is correct
1600+
# timestamp_value_2200 = pandas_result_2200.iloc[0, 0]
1601+
# assert str(timestamp_value_2200) == "2200-12-31 00:00:00+00:00"

0 commit comments

Comments
 (0)