Skip to content

Commit 61982e0

Browse files
authored
SNOW-1959569: Fix test_datatype_suite.py::test_structured_dtypes_select (#3160)
1 parent d75e7ea commit 61982e0

File tree

1 file changed

+47
-27
lines changed

1 file changed

+47
-27
lines changed

tests/integ/scala/test_datatype_suite.py

Lines changed: 47 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ def _create_test_dataframe(s, structured_type_support):
8080
nested_field_name = "b" if structured_type_support else "B"
8181
df = s.create_dataframe([1], schema=["a"]).select(
8282
object_construct(lit("k1"), lit(1))
83-
.cast(MapType(StringType(), IntegerType(), structured=True))
83+
.cast(MapType(StringType(), IntegerType(), structured=structured_type_support))
8484
.alias("map"),
8585
object_construct(lit("A"), lit("foo"), lit(nested_field_name), lit(0.05))
8686
.cast(
@@ -89,12 +89,12 @@ def _create_test_dataframe(s, structured_type_support):
8989
StructField("A", StringType()),
9090
StructField(nested_field_name, DoubleType()),
9191
],
92-
structured=True,
92+
structured=structured_type_support,
9393
)
9494
)
9595
.alias("obj"),
9696
array_construct(lit(1.0), lit(3.1), lit(4.5))
97-
.cast(ArrayType(FloatType(), structured=True))
97+
.cast(ArrayType(FloatType(), structured=structured_type_support))
9898
.alias("arr"),
9999
)
100100
return df
@@ -422,7 +422,6 @@ def test_structured_dtypes(structured_type_session, examples, structured_type_su
422422
assert df.dtypes == expected_dtypes
423423

424424

425-
@pytest.mark.skip(reason="SNOW-1959569: Undo once structured types issue is fixed")
426425
@pytest.mark.skipif(
427426
"config.getoption('disable_sql_simplifier', default=False)",
428427
reason="without sql_simplifier returned types are all variants",
@@ -432,7 +431,7 @@ def test_structured_dtypes(structured_type_session, examples, structured_type_su
432431
reason="FEAT: SNOW-1372813 Cast to StructType not supported",
433432
)
434433
def test_structured_dtypes_select(
435-
structured_type_session, examples, structured_type_support
434+
structured_type_session, examples, structured_type_support, max_string
436435
):
437436
query, expected_dtypes, expected_schema = examples
438437
df = _create_test_dataframe(structured_type_session, structured_type_support)
@@ -445,28 +444,42 @@ def test_structured_dtypes_select(
445444
df.arr[1].alias("value3"),
446445
col("arr")[2].alias("value4"),
447446
)
447+
448+
# Semi structured schemas can't extract inner types
449+
override_type = None if structured_type_support else VariantType()
450+
override_dtype = None if structured_type_support else "variant"
451+
448452
assert flattened_df.schema == StructType(
449453
[
450-
StructField("VALUE1", LongType(), nullable=True),
451-
StructField("A", StringType(16777216), nullable=True),
452-
StructField(nested_field_name, DoubleType(), nullable=True),
453-
StructField("VALUE2", DoubleType(), nullable=True),
454-
StructField("VALUE3", DoubleType(), nullable=True),
455-
StructField("VALUE4", DoubleType(), nullable=True),
454+
StructField("VALUE1", override_type or LongType(), nullable=True),
455+
StructField("A", override_type or StringType(max_string), nullable=True),
456+
StructField(
457+
nested_field_name, override_type or DoubleType(), nullable=True
458+
),
459+
StructField("VALUE2", override_type or DoubleType(), nullable=True),
460+
StructField("VALUE3", override_type or DoubleType(), nullable=True),
461+
StructField("VALUE4", override_type or DoubleType(), nullable=True),
456462
]
457463
)
458464
assert flattened_df.dtypes == [
459-
("VALUE1", "bigint"),
460-
("A", "string(16777216)"),
461-
("B", "double"),
462-
("VALUE2", "double"),
463-
("VALUE3", "double"),
464-
("VALUE4", "double"),
465-
]
466-
assert flattened_df.collect() == [
467-
Row(VALUE1=1, A="foo", B=0.05, VALUE2=1.0, VALUE3=3.1, VALUE4=4.5)
465+
("VALUE1", override_dtype or "bigint"),
466+
("A", override_dtype or f"string({max_string})"),
467+
("B", override_dtype or "double"),
468+
("VALUE2", override_dtype or "double"),
469+
("VALUE3", override_dtype or "double"),
470+
("VALUE4", override_dtype or "double"),
468471
]
469472

473+
if structured_type_support:
474+
expected_row = Row(
475+
VALUE1=1, A="foo", B=0.05, VALUE2=1.0, VALUE3=3.1, VALUE4=4.5
476+
)
477+
else:
478+
expected_row = Row(
479+
VALUE1="1", A='"foo"', B="0.05", VALUE2="1", VALUE3="3.1", VALUE4="4.5"
480+
)
481+
assert flattened_df.collect() == [expected_row]
482+
470483

471484
@pytest.mark.skipif(not installed_pandas, reason="Pandas required for this test.")
472485
@pytest.mark.skipif(
@@ -485,25 +498,29 @@ def test_structured_dtypes_pandas(structured_type_session, structured_type_suppo
485498
else:
486499
assert (
487500
pdf.to_json()
488-
== '{"MAP":{"0":"{\\n \\"k1\\": 1\\n}"},"OBJ":{"0":"{\\n \\"A\\": \\"foo\\",\\n \\"B\\": 5.000000000000000e-02\\n}"},"ARR":{"0":"[\\n 1.000000000000000e+00,\\n 3.100000000000000e+00,\\n 4.500000000000000e+00\\n]"}}'
501+
== '{"MAP":{"0":"{\\n \\"k1\\": 1\\n}"},"OBJ":{"0":"{\\n \\"A\\": \\"foo\\",\\n \\"B\\": 0.05\\n}"},"ARR":{"0":"[\\n 1,\\n 3.1,\\n 4.5\\n]"}}'
489502
)
490503

491504

492-
@pytest.mark.skip(reason="SNOW-1959569: Undo once structured types issue is fixed")
493505
@pytest.mark.skipif(
494506
"config.getoption('local_testing_mode', default=False)",
495507
reason="local testing does not fully support structured types yet.",
496508
)
497509
def test_structured_dtypes_iceberg(
498-
structured_type_session, local_testing_mode, structured_type_support, max_string
510+
structured_type_session,
511+
local_testing_mode,
512+
structured_type_support,
513+
server_side_max_string,
499514
):
500515

501516
if not (
502517
structured_type_support
503518
and iceberg_supported(structured_type_session, local_testing_mode)
504519
):
505520
pytest.skip("Test requires iceberg support and structured type support.")
506-
query, expected_dtypes, expected_schema = _create_example(True, max_string)
521+
query, expected_dtypes, expected_schema = _create_example(
522+
True, server_side_max_string
523+
)
507524

508525
table_name = f"snowpark_structured_dtypes_{uuid.uuid4().hex[:5]}".upper()
509526
dynamic_table_name = f"snowpark_dynamic_iceberg_{uuid.uuid4().hex[:5]}".upper()
@@ -549,10 +566,13 @@ def test_structured_dtypes_iceberg(
549566
if structured_type_session.sql_simplifier_enabled
550567
else f"({table_name})"
551568
)
569+
552570
assert dynamic_ddl[0][0] == (
553-
f"create or replace dynamic table {dynamic_table_name}(\n\tMAP,\n\tOBJ,\n\tARR\n) "
554-
f"target_lag = '16 hours, 40 minutes' refresh_mode = AUTO initialize = ON_CREATE "
555-
f"warehouse = {warehouse}\n as SELECT * FROM ( SELECT * FROM {formatted_table_name});"
571+
f"create or replace dynamic iceberg table {dynamic_table_name}(\n\tMAP,\n\tOBJ,\n\tARR\n)"
572+
" target_lag = '16 hours, 40 minutes' refresh_mode = AUTO initialize = ON_CREATE "
573+
f"warehouse = {warehouse} external_volume = 'PYTHON_CONNECTOR_ICEBERG_EXVOL' "
574+
"catalog = 'SNOWFLAKE' base_location = 'python_connector_merge_gate/' \n as "
575+
f"SELECT * FROM ( SELECT * FROM {formatted_table_name});"
556576
)
557577

558578
finally:

0 commit comments

Comments
 (0)