Skip to content

ci: division by zero test failing for pyspark #2753

@MarcoGorelli

Description

@MarcoGorelli

the test introduced in #2636 fails for pyspark, which isn't run by default

we should update the pyspark code to use try_divide


https://github.com/narwhals-dev/narwhals/actions/runs/15945539506/job/44979105403

_________________ test_floordiv_int_by_zero[pyspark-0-0-None] __________________

left = 0, right = 0, expected = None
constructor = <function pyspark_lazy_constructor.<locals>._constructor at 0x7fe8efc98180>
request = <FixtureRequest for <Function test_floordiv_int_by_zero[pyspark-0-0-None]>>

    @pytest.mark.parametrize(
        ("left", "right", "expected"),
        [(-2, 0, float("-inf")), (0, 0, None), (2, 0, float("inf"))],
    )
    @pytest.mark.skipif(PANDAS_VERSION < (2, 0), reason="converts floordiv by zero to 0")
    def test_floordiv_int_by_zero(
        left: int,
        right: int,
        expected: float | None,
        constructor: Constructor,
        request: pytest.FixtureRequest,
    ) -> None:
        data: dict[str, list[int]] = {"a": [left]}
        df = nw.from_native(constructor(data))
        # pyarrow backend floordiv raises divide by zero error
        # ibis backend floordiv cannot cast value to inf or -inf
        if any(x in str(constructor) for x in ["ibis", "pyarrow"]):
            request.applymarker(pytest.mark.xfail)
        # duckdb backend floordiv return None
        if "duckdb" in str(constructor):
            floordiv_result = df.select(nw.col("a") // right)
            assert_equal_data(floordiv_result, {"a": [None]})
        # polars backend floordiv returns null
        elif "polars" in str(constructor) and "lazy" not in str(constructor):
            floordiv_result = df.select(nw.col("a") // right)
            assert all(floordiv_result["a"].is_null())
        # polars lazy floordiv cannot be sliced and returns None
        elif all(x in str(constructor) for x in ["polars", "lazy"]):
            floordiv_result = df.select(nw.col("a") // right)
            assert_equal_data(floordiv_result, {"a": [None]})
        # pandas[nullable] backend floordiv always returns 0
        elif all(x in str(constructor) for x in ["pandas", "nullable"]):
            floordiv_result = df.select(nw.col("a") // right)
            assert_equal_data(floordiv_result, {"a": [0]})
        else:
            floordiv_result = df.select(nw.col("a") // right)
>           assert_equal_data(floordiv_result, {"a": [expected]})

tests/expr_and_series/division_by_zero_test.py:115: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
tests/utils.py:92: in assert_equal_data
    result = result.collect(**kwargs.get(result.implementation, {}))
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
narwhals/dataframe.py:2336: in collect
    self._compliant_frame.collect(backend=eager_backend, **kwargs), level="full"
    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
narwhals/_spark_like/dataframe.py:234: in collect
    self._collect_to_arrow(),
    ^^^^^^^^^^^^^^^^^^^^^^^^
narwhals/_spark_like/dataframe.py:196: in _collect_to_arrow
    return self.native.toArrow()
           ^^^^^^^^^^^^^^^^^^^^^
/opt/hostedtoolcache/Python/3.11.13/x64/lib/python3.11/site-packages/pyspark/sql/classic/dataframe.py:1789: in toArrow
    return PandasConversionMixin.toArrow(self)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
/opt/hostedtoolcache/Python/3.11.13/x64/lib/python3.11/site-packages/pyspark/sql/pandas/conversion.py:249: in toArrow
    batches = self._collect_as_arrow(
/opt/hostedtoolcache/Python/3.11.13/x64/lib/python3.11/site-packages/pyspark/sql/pandas/conversion.py:315: in _collect_as_arrow
    with unwrap_spark_exception():
/opt/hostedtoolcache/Python/3.11.13/x64/lib/python3.11/contextlib.py:158: in __exit__
    self.gen.throw(typ, value, traceback)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

    @contextmanager
    def unwrap_spark_exception() -> Iterator[Any]:
        from pyspark import SparkContext
        from py4j.protocol import Py4JJavaError
        from py4j.java_gateway import is_instance_of
    
        assert SparkContext._gateway is not None
    
        gw = SparkContext._gateway
        try:
            yield
        except Py4JJavaError as e:
            je: "Py4JJavaError" = e.java_exception
            if je is not None and is_instance_of(gw, je, "org.apache.spark.SparkException"):
                converted = convert_exception(je.getCause())
                if not isinstance(converted, UnknownException):
>                   raise converted from None
E                   pyspark.errors.exceptions.captured.ArithmeticException: [DIVIDE_BY_ZERO] Division by zero. Use `try_divide` to tolerate divisor being 0 and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. SQLSTATE: 22012
E                   == DataFrame ==
E                   "__truediv__" was called from
E                   /home/runner/work/narwhals/narwhals/narwhals/_spark_like/expr.py:403

Metadata

Metadata

Assignees

No one assigned

    Labels

    cihigh priorityYour PR will be reviewed very quickly if you address this

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions