Skip to content

Commit 1ed17cc

Browse files
SNOW-1877449:Exception should be thrown when create df with null value and nullable set to False (#2849)
1 parent 0ee1c34 commit 1ed17cc

File tree

5 files changed

+52
-5
lines changed

5 files changed

+52
-5
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@
5353
- Fixed a bug in local testing mode that caused a column to contain None when it should contain 0
5454
- Fixed a bug in `StructField.from_json` that prevented TimestampTypes with `tzinfo` from being parsed correctly.
5555
- Fixed a bug in function `date_format` that caused an error when the input column was date type or timestamp type.
56+
- Fixed a bug in dataframe that null value can be inserted in a non-nullable column.
5657
- Fixed a bug in `replace` when passing `Column` expression objects.
5758

5859
### Snowpark pandas API Updates

src/snowflake/snowpark/_internal/analyzer/analyzer.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66
from collections import Counter, defaultdict
77
from typing import TYPE_CHECKING, DefaultDict, Dict, List, Optional, Union
88

9+
from snowflake.connector import IntegrityError
10+
911
import snowflake.snowpark
1012
from snowflake.snowpark._internal.analyzer.analyzer_utils import (
1113
alias_expression,
@@ -975,6 +977,8 @@ def do_resolve_with_resolved_children(
975977

976978
if logical_plan.data:
977979
if not logical_plan.is_large_local_data:
980+
if logical_plan.is_contain_illegal_null_value:
981+
raise IntegrityError("NULL result in a non-nullable column")
978982
return self.plan_builder.query(
979983
values_statement(logical_plan.output, logical_plan.data),
980984
logical_plan,

src/snowflake/snowpark/_internal/analyzer/snowflake_plan_node.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,20 @@ def is_large_local_data(self) -> bool:
158158

159159
return len(self.data) * len(self.output) >= ARRAY_BIND_THRESHOLD
160160

161+
@property
162+
def is_contain_illegal_null_value(self) -> bool:
163+
from snowflake.snowpark._internal.analyzer.analyzer import ARRAY_BIND_THRESHOLD
164+
165+
rows_to_compare = min(
166+
ARRAY_BIND_THRESHOLD // len(self.output) + 1, len(self.data)
167+
)
168+
for j in range(len(self.output)):
169+
if not self.output[j].nullable:
170+
for i in range(rows_to_compare):
171+
if self.data[i][j] is None:
172+
return True
173+
return False
174+
161175
@property
162176
def individual_node_complexity(self) -> Dict[PlanNodeCategory, int]:
163177
if self.is_large_local_data:

tests/integ/compiler/test_query_generator.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from typing import List
77

88
import pytest
9+
from snowflake.connector import IntegrityError
910

1011
from snowflake.snowpark import Window
1112
from snowflake.snowpark._internal.analyzer import analyzer
@@ -51,6 +52,7 @@
5152
random_name_for_temp_object,
5253
)
5354
from snowflake.snowpark.functions import avg, col, lit, when_matched
55+
from snowflake.snowpark.types import StructType, StructField, LongType
5456
from tests.integ.scala.test_dataframe_reader_suite import get_reader
5557
from tests.integ.utils.sql_counter import SqlCounter, sql_count_checker
5658
from tests.utils import TestFiles, Utils
@@ -533,3 +535,26 @@ def test_select_alias(session):
533535
# Add a new column d that doesn't use c after c was added previously. Flatten safely.
534536
df2 = df1.select("a", "b", "c", (col("a") + col("b") + 1).as_("d"))
535537
check_generated_plan_queries(df2._plan)
538+
539+
540+
def test_nullable_is_false_dataframe(session):
541+
from snowflake.snowpark._internal.analyzer.analyzer import ARRAY_BIND_THRESHOLD
542+
543+
schema = StructType([StructField("key", LongType(), nullable=True)])
544+
assert session.create_dataframe([None], schema=schema).collect()[0][0] is None
545+
546+
assert (
547+
session.create_dataframe(
548+
[None for _ in range(ARRAY_BIND_THRESHOLD + 1)], schema=schema
549+
).collect()[0][0]
550+
is None
551+
)
552+
553+
schema = StructType([StructField("key", LongType(), nullable=False)])
554+
with pytest.raises(IntegrityError, match="NULL result in a non-nullable column"):
555+
session.create_dataframe([None for _ in range(10)], schema=schema).collect()
556+
557+
with pytest.raises(IntegrityError, match="NULL result in a non-nullable column"):
558+
session.create_dataframe(
559+
[None for _ in range(ARRAY_BIND_THRESHOLD + 1)], schema=schema
560+
).collect()

tests/integ/test_dataframe.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1827,9 +1827,12 @@ def test_create_dataframe_with_schema_col_names(session):
18271827
for field, expected_name in zip(df.schema.fields, col_names[:2] + ["_3", "_4"]):
18281828
assert Utils.equals_ignore_case(field.name, expected_name)
18291829

1830+
# specify nullable in structtype to avoid insert null value into non-nullable column
1831+
struct_col_name = StructType([StructField(col, StringType()) for col in col_names])
1832+
18301833
# the column names provided via schema keyword will overwrite other column names
18311834
df = session.create_dataframe(
1832-
[{"aa": 1, "bb": 2, "cc": 3, "dd": 4}], schema=col_names
1835+
[{"aa": 1, "bb": 2, "cc": 3, "dd": 4}], schema=struct_col_name
18331836
)
18341837
for field, expected_name in zip(df.schema.fields, col_names):
18351838
assert Utils.equals_ignore_case(field.name, expected_name)
@@ -2734,15 +2737,15 @@ def test_save_as_table_nullable_test(
27342737
StructField("B", data_type, True),
27352738
]
27362739
)
2737-
df = session.create_dataframe(
2738-
[(None, None)] * (5000 if large_data else 1), schema=schema
2739-
)
27402740

27412741
try:
27422742
with pytest.raises(
27432743
(IntegrityError, SnowparkSQLException),
27442744
match="NULL result in a non-nullable column",
27452745
):
2746+
df = session.create_dataframe(
2747+
[(None, None)] * (5000 if large_data else 1), schema=schema
2748+
)
27462749
df.write.save_as_table(table_name, mode=save_mode)
27472750
finally:
27482751
Utils.drop_table(session, table_name)
@@ -2768,13 +2771,13 @@ def mock_run_query(*args, **kwargs):
27682771
StructField("B", IntegerType(), True),
27692772
]
27702773
)
2771-
df = session.create_dataframe([(None, None)], schema=schema)
27722774

27732775
try:
27742776
with pytest.raises(
27752777
(IntegrityError, SnowparkSQLException),
27762778
match="NULL result in a non-nullable column",
27772779
):
2780+
df = session.create_dataframe([(None, None)], schema=schema)
27782781
df.write.save_as_table(table_name, mode=save_mode)
27792782
finally:
27802783
Utils.drop_table(session, table_name)

0 commit comments

Comments
 (0)