Skip to content
Merged
Changes from 13 commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
95 changes: 95 additions & 0 deletions tests/expr_and_series/arithmetic_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,101 @@ def test_truediv_same_dims(
assert_equal_data({"a": result}, {"a": [2, 1, 1 / 3]})


@pytest.mark.parametrize(
("left", "right", "expected"),
[(-2, 0, float("-inf")), (0, 0, None), (2, 0, float("inf"))],
)
def test_series_truediv_by_zero(
left: int, right: int, expected: float | None, constructor_eager: ConstructorEager
) -> None:
data: dict[str, list[int]] = {"a": [left], "b": [right]}
df = nw.from_native(constructor_eager(data), eager_only=True)
truediv_result = df["a"] / df["b"]
assert_equal_data({"a": truediv_result}, {"a": [expected]})


@pytest.mark.parametrize(
("left", "right", "expected"),
[(-2, 0, float("-inf")), (0, 0, None), (2, 0, float("inf"))],
)
@pytest.mark.skipif(PANDAS_VERSION < (2, 0), reason="converts floordiv by zero to 0")
def test_series_floordiv_by_zero(
left: int,
right: int,
expected: float | None,
constructor_eager: ConstructorEager,
request: pytest.FixtureRequest,
) -> None:
data: dict[str, list[int]] = {"a": [left], "b": [right]}
df = nw.from_native(constructor_eager(data), eager_only=True)
# pyarrow backend floordiv raises divide by zero error
if "pyarrow" in str(constructor_eager):
request.applymarker(pytest.mark.xfail)
# polars backend floordiv by zero always returns null
if "polars" in str(constructor_eager):
floordiv_result = df["a"] // df["b"]
assert all(floordiv_result.is_null())
# pandas[nullable] backend floordiv always returns 0
elif all(x in str(constructor_eager) for x in ["pandas", "nullable"]):
floordiv_result = df["a"] // df["b"]
assert_equal_data({"a": floordiv_result}, {"a": [0]})
Copy link
Member

@FBruzzesi FBruzzesi Jun 11, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@MarcoGorelli this might be worth reporting upstream as well. WDYT? Is this expected?
Repro:

pd.Series([-1, 0, 1]).convert_dtypes("numpy_nullable")//0
Out[4]: 
0    0
1    0
2    0
dtype: Int64

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@FBruzzesi upstream is aware: pandas-dev/pandas#30188 on this one. This corner of NaN producing operations on "numpy_nullable" backed values likely won't be resolved (pandas may be at yet another crossroads on this) so I think we should just xfail for this specific test.

Pinging @MarcoGorelli to verify.

Copy link
Contributor Author

@jrw34 jrw34 Jun 19, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@camriddell, @FBruzzesi, and @MarcoGorelli. Thank you for all of the review, I am happy to change this case to xfail if needed so just let me know,

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

On second thought it let's keep the current code as it captures the behaviors of each of these backends, so if they change in the future we'll be aware of this shift.

Would it be possible to also add another variant of these tests that works with floating point values as the inputs for the numerator/denominator? We should see much more consistent results returned (e.g. see [inf, NaN, -inf]) across each of the backends.


Some thoughts on the oddities you observed. Feel free to ignore, I wanted to capture this in case we ever need to revisit this decision.

It seems that there are a few camps on the set of results that one would obtain from floor dividing two integer arrays (where the denom is 0):

  1. return i64 of nulls (Polars): if you divide floats by 0.0, you end with values of inf, -inf, or NaN which do not exist in integer dtypes, therefore the result is (shortcutted?) i64 with Null values.
  2. return i64 all 0s (pandas[nullable], numpy). Tough to reason about, numpy issues a RuntimeWarning but pandas[nullable] does not.
  3. return f64 output (pandas). Perhaps more mathematically sound, but some may be surprised at seeing a floats returned when floor-dividing two integers.
  4. raise (PyArrow pandas[pyarrow]) this is an opt-in behavior and we followed pandas

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Interesting with the floating point differences, I will add that in. Given that there are now going to be 8 different test functions, should I create a test_divsion_by_zero.py file to house the all these tests instead of bulking up arithmetic_tests.py even more?

else:
floordiv_result = df["a"] // df["b"]
assert_equal_data({"a": floordiv_result}, {"a": [expected]})


@pytest.mark.parametrize(
("left", "right", "expected"),
[(-2, 0, float("-inf")), (0, 0, None), (2, 0, float("inf"))],
)
def test_truediv_by_zero(
left: int, right: int, expected: float | None, constructor: Constructor
) -> None:
data: dict[str, list[int]] = {"a": [left]}
df = nw.from_native(constructor(data))
truediv_result = df.select(nw.col("a") / right)
assert_equal_data(truediv_result, {"a": [expected]})


@pytest.mark.parametrize(
("left", "right", "expected"),
[(-2, 0, float("-inf")), (0, 0, None), (2, 0, float("inf"))],
)
@pytest.mark.skipif(PANDAS_VERSION < (2, 0), reason="converts floordiv by zero to 0")
def test_floordiv_by_zero(
left: int,
right: int,
expected: float | None,
constructor: Constructor,
request: pytest.FixtureRequest,
) -> None:
data: dict[str, list[int]] = {"a": [left]}
df = nw.from_native(constructor(data))
# pyarrow backend floordiv raises divide by zero error
# ibis backend floordiv cannot cast value to inf or -inf
if any(x in str(constructor) for x in ["ibis", "pyarrow"]):
request.applymarker(pytest.mark.xfail)
# duckdb backend floordiv return None
if "duckdb" in str(constructor):
floordiv_result = df.select(nw.col("a") // right)
assert_equal_data(floordiv_result, {"a": [None]})
# polars backend floordiv returns null
elif "polars" in str(constructor) and "lazy" not in str(constructor):
floordiv_result = df.select(nw.col("a") // right)
assert all(floordiv_result["a"].is_null())
# polars lazy floordiv cannot be sliced and returns None
elif all(x in str(constructor) for x in ["polars", "lazy"]):
floordiv_result = df.select(nw.col("a") // right)
assert_equal_data(floordiv_result, {"a": [None]})
# pandas[nullable] backend floordiv always returns 0
elif all(x in str(constructor) for x in ["pandas", "nullable"]):
floordiv_result = df.select(nw.col("a") // right)
assert_equal_data(floordiv_result, {"a": [0]})
else:
floordiv_result = df.select(nw.col("a") // right)
assert_equal_data(floordiv_result, {"a": [expected]})


@pytest.mark.slow
@given(left=st.integers(-100, 100), right=st.integers(-100, 100))
@pytest.mark.skipif(PANDAS_VERSION < (2, 0), reason="convert_dtypes not available")
Expand Down
Loading