Skip to content

Commit fb0c4b8

Browse files
xinrong-mengzhengruifeng
authored andcommitted
[SPARK-53506][PS] Disallow % between Decimal and float under ANSI
### What changes were proposed in this pull request? Disallow `%` between Decimal and float under ANSI pandas: ```py >>> pdf['decimal'] % 0.1 Traceback (most recent call last): ... TypeError: unsupported operand type(s) for %: 'decimal.Decimal' and 'float' ``` pandas on spark before: ```py >>> psdf['decimal'] % 0.1 0 0.1 1 0.1 2 0.1 Name: decimal, dtype: float64 ``` pandas on spark after: ```py >>> psdf['decimal'] % 0.1 Traceback (most recent call last): ... TypeError: Modulo can not be applied to given types. ``` ### Why are the changes needed? Part of https://issues.apache.org/jira/browse/SPARK-53389 ### Does this PR introduce _any_ user-facing change? No, the feature is not released yet ### How was this patch tested? Unit tests Commands below passed: ```py 1097 SPARK_ANSI_SQL_MODE=true ./python/run-tests --python-executables=python3.11 --testnames "pyspark.pandas.tests.data_type_ops.test_num_mul_div NumMulDivTests.test_mod" 1098 SPARK_ANSI_SQL_MODE=false ./python/run-tests --python-executables=python3.11 --testnames "pyspark.pandas.tests.data_type_ops.test_num_mul_div NumMulDivTests.test_mod" ``` ### Was this patch authored or co-authored using generative AI tooling? No Closes #52255 from xinrong-meng/mod. Authored-by: Xinrong Meng <xinrong@apache.org> Signed-off-by: Ruifeng Zheng <ruifengz@apache.org>
1 parent b08ac30 commit fb0c4b8

File tree

2 files changed

+43
-0
lines changed

2 files changed

+43
-0
lines changed

python/pyspark/pandas/data_type_ops/num_ops.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -469,6 +469,13 @@ def wrapped_mul(lc: PySparkColumn, rc: Any) -> PySparkColumn:
469469

470470
return column_op(wrapped_mul)(left, new_right)
471471

472+
def mod(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
473+
is_ansi = is_ansi_mode_enabled(left._internal.spark_frame.sparkSession)
474+
if is_ansi and _is_decimal_float_mixed(left, right):
475+
raise TypeError("Modulo can not be applied to given types.")
476+
477+
return super().mod(left, right)
478+
472479
def truediv(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
473480
_sanitize_list_like(right)
474481
if not is_valid_operand_for_numeric_arithmetic(right):
@@ -572,6 +579,13 @@ def rmul(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
572579
raise TypeError("Multiplication can not be applied to given types.")
573580
return super().rmul(left, right)
574581

582+
def rmod(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
583+
is_ansi = is_ansi_mode_enabled(left._internal.spark_frame.sparkSession)
584+
if is_ansi and _is_decimal_float_mixed(left, right):
585+
raise TypeError("Modulo can not be applied to given types.")
586+
587+
return super().rmod(left, right)
588+
575589
def isnull(self, index_ops: IndexOpsLike) -> IndexOpsLike:
576590
return index_ops._with_new_scol(
577591
index_ops.spark.column.isNull() | F.isnan(index_ops.spark.column),

python/pyspark/pandas/tests/data_type_ops/test_num_mul_div.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,35 @@ def test_floordiv(self):
111111
self.assertRaises(TypeError, lambda: psdf["decimal"] // 0.1)
112112
self.assertRaises(TypeError, lambda: 0.1 // psdf["decimal"])
113113

114+
def test_mod(self):
115+
pdf, psdf = self.pdf, self.psdf
116+
117+
# element-wise modulo for numeric columns
118+
for col in self.numeric_df_cols:
119+
pser, psser = pdf[col], psdf[col]
120+
121+
if psser.dtype in [float, int, np.int32]:
122+
self.assert_eq(pser % pser, psser % psser)
123+
self.assert_eq(pser % pser.astype(bool), psser % psser.astype(bool))
124+
self.assert_eq(pser % True, psser % True)
125+
# TODO: decide if to follow pser % False
126+
self.assert_eq(pser % 0, psser % False)
127+
128+
# modulo with non-numeric columns
129+
for n_col in self.non_numeric_df_cols:
130+
if n_col == "bool":
131+
self.assert_eq(pdf["float"] % pdf["bool"], psdf["float"] % psdf["bool"])
132+
else:
133+
self.assertRaises(TypeError, lambda: psser % psdf[n_col])
134+
135+
if is_ansi_mode_test:
136+
self.assertRaises(TypeError, lambda: psdf["decimal"] % psdf["float"])
137+
self.assertRaises(TypeError, lambda: psdf["float"] % psdf["decimal"])
138+
self.assertRaises(TypeError, lambda: psdf["decimal"] % psdf["float32"])
139+
self.assertRaises(TypeError, lambda: psdf["float32"] % psdf["decimal"])
140+
self.assertRaises(TypeError, lambda: psdf["decimal"] % 0.1)
141+
self.assertRaises(TypeError, lambda: 0.1 % psdf["decimal"])
142+
114143

115144
class NumMulDivTests(
116145
NumMulDivTestsMixin,

0 commit comments

Comments
 (0)