-
Notifications
You must be signed in to change notification settings - Fork 172
Closed
Labels
cihigh priorityYour PR will be reviewed very quickly if you address thisYour PR will be reviewed very quickly if you address this
Description
the test introduced in #2636 fails for pyspark, which isn't run by default
we should update the pyspark code to use try_divide
https://github.com/narwhals-dev/narwhals/actions/runs/15945539506/job/44979105403
_________________ test_floordiv_int_by_zero[pyspark-0-0-None] __________________
left = 0, right = 0, expected = None
constructor = <function pyspark_lazy_constructor.<locals>._constructor at 0x7fe8efc98180>
request = <FixtureRequest for <Function test_floordiv_int_by_zero[pyspark-0-0-None]>>
@pytest.mark.parametrize(
("left", "right", "expected"),
[(-2, 0, float("-inf")), (0, 0, None), (2, 0, float("inf"))],
)
@pytest.mark.skipif(PANDAS_VERSION < (2, 0), reason="converts floordiv by zero to 0")
def test_floordiv_int_by_zero(
left: int,
right: int,
expected: float | None,
constructor: Constructor,
request: pytest.FixtureRequest,
) -> None:
data: dict[str, list[int]] = {"a": [left]}
df = nw.from_native(constructor(data))
# pyarrow backend floordiv raises divide by zero error
# ibis backend floordiv cannot cast value to inf or -inf
if any(x in str(constructor) for x in ["ibis", "pyarrow"]):
request.applymarker(pytest.mark.xfail)
# duckdb backend floordiv return None
if "duckdb" in str(constructor):
floordiv_result = df.select(nw.col("a") // right)
assert_equal_data(floordiv_result, {"a": [None]})
# polars backend floordiv returns null
elif "polars" in str(constructor) and "lazy" not in str(constructor):
floordiv_result = df.select(nw.col("a") // right)
assert all(floordiv_result["a"].is_null())
# polars lazy floordiv cannot be sliced and returns None
elif all(x in str(constructor) for x in ["polars", "lazy"]):
floordiv_result = df.select(nw.col("a") // right)
assert_equal_data(floordiv_result, {"a": [None]})
# pandas[nullable] backend floordiv always returns 0
elif all(x in str(constructor) for x in ["pandas", "nullable"]):
floordiv_result = df.select(nw.col("a") // right)
assert_equal_data(floordiv_result, {"a": [0]})
else:
floordiv_result = df.select(nw.col("a") // right)
> assert_equal_data(floordiv_result, {"a": [expected]})
tests/expr_and_series/division_by_zero_test.py:115:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
tests/utils.py:92: in assert_equal_data
result = result.collect(**kwargs.get(result.implementation, {}))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
narwhals/dataframe.py:2336: in collect
self._compliant_frame.collect(backend=eager_backend, **kwargs), level="full"
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
narwhals/_spark_like/dataframe.py:234: in collect
self._collect_to_arrow(),
^^^^^^^^^^^^^^^^^^^^^^^^
narwhals/_spark_like/dataframe.py:196: in _collect_to_arrow
return self.native.toArrow()
^^^^^^^^^^^^^^^^^^^^^
/opt/hostedtoolcache/Python/3.11.13/x64/lib/python3.11/site-packages/pyspark/sql/classic/dataframe.py:1789: in toArrow
return PandasConversionMixin.toArrow(self)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
/opt/hostedtoolcache/Python/3.11.13/x64/lib/python3.11/site-packages/pyspark/sql/pandas/conversion.py:249: in toArrow
batches = self._collect_as_arrow(
/opt/hostedtoolcache/Python/3.11.13/x64/lib/python3.11/site-packages/pyspark/sql/pandas/conversion.py:315: in _collect_as_arrow
with unwrap_spark_exception():
/opt/hostedtoolcache/Python/3.11.13/x64/lib/python3.11/contextlib.py:158: in __exit__
self.gen.throw(typ, value, traceback)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
@contextmanager
def unwrap_spark_exception() -> Iterator[Any]:
from pyspark import SparkContext
from py4j.protocol import Py4JJavaError
from py4j.java_gateway import is_instance_of
assert SparkContext._gateway is not None
gw = SparkContext._gateway
try:
yield
except Py4JJavaError as e:
je: "Py4JJavaError" = e.java_exception
if je is not None and is_instance_of(gw, je, "org.apache.spark.SparkException"):
converted = convert_exception(je.getCause())
if not isinstance(converted, UnknownException):
> raise converted from None
E pyspark.errors.exceptions.captured.ArithmeticException: [DIVIDE_BY_ZERO] Division by zero. Use `try_divide` to tolerate divisor being 0 and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. SQLSTATE: 22012
E == DataFrame ==
E "__truediv__" was called from
E /home/runner/work/narwhals/narwhals/narwhals/_spark_like/expr.py:403
Metadata
Metadata
Assignees
Labels
cihigh priorityYour PR will be reviewed very quickly if you address thisYour PR will be reviewed very quickly if you address this