Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 17 additions & 3 deletions python/cudf/cudf/core/column/numerical.py
Original file line number Diff line number Diff line change
Expand Up @@ -490,7 +490,7 @@ def as_string_column(self, dtype: DtypeObj) -> StringColumn:
)
if len(self) == 0:
return cast(
cudf.core.column.StringColumn,
"cudf.core.column.StringColumn",
column_empty(0, dtype=CUDF_STRING_DTYPE),
)

Expand Down Expand Up @@ -576,6 +576,20 @@ def as_numerical_column(self, dtype: DtypeObj) -> NumericalColumn:
res = self.nans_to_nulls().cast(dtype=dtype)
res._dtype = dtype
return res # type: ignore[return-value]

# --- FIX: Match Pandas behavior when casting Float(with Nulls) -> Bool ---
# Pandas treats NaN as truthy (True) when casting float -> bool.
# In cuDF, Nulls propagate. We must fill Nulls with np.nan so the
# cast treats them as True.
if (
self.dtype.kind == "f"
and dtype.kind == "b"
and not is_pandas_nullable_extension_dtype(dtype)
and self.has_nulls()
):
return self.fillna(np.nan).cast(dtype=dtype) # type: ignore[return-value]
# ------------------------------------------------------------------------

if dtype_to_pylibcudf_type(dtype) == dtype_to_pylibcudf_type(
self.dtype
):
Expand Down Expand Up @@ -746,7 +760,7 @@ def find_and_replace(
replacement_col = replacement_col.repeat(len(to_replace_col))
elif len(replacement_col) == 1 and len(to_replace_col) == 0:
return self.copy()
replaced = cast(Self, self.astype(common_type))
replaced = cast("Self", self.astype(common_type))
df = cudf.DataFrame._from_data(
{
"old": to_replace_col.astype(common_type),
Expand Down Expand Up @@ -910,7 +924,7 @@ def _with_type_metadata(
) -> ColumnBase:
if isinstance(dtype, CategoricalDtype):
codes_dtype = min_unsigned_type(len(dtype.categories))
codes = cast(NumericalColumn, self.astype(codes_dtype))
codes = cast("NumericalColumn", self.astype(codes_dtype))
return CategoricalColumn(
plc_column=codes.to_pylibcudf(mode="read"),
size=codes.size,
Expand Down
44 changes: 44 additions & 0 deletions python/cudf/cudf/tests/test_issue_20746.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION.
# SPDX-License-Identifier: Apache-2.0

import numpy as np
import pandas as pd

import cudf
from cudf.testing import assert_series_equal


def test_cast_float_nan_to_bool_pandas_compat():
"""
Regression test for Issue #20746.
Ensures that casting float columns with NaNs to boolean
treats NaNs as True (matching Pandas behavior) when
mode.pandas_compatible is enabled.
"""
# Enable pandas compatibility mode
cudf.set_option("mode.pandas_compatible", True)

try:
data = [1.0, 0.0, np.nan, None]

# Create cuDF Series
gs = cudf.Series(data, dtype="float64")

# Cast to bool
got = gs.astype("bool")

# Create expected Pandas Series (Pandas casts NaN/None to True)
expected = pd.Series([True, False, True, True], dtype="bool")

# Verify
# In Pandas compat mode, we expect NO nulls in the boolean result
assert got.null_count == 0

# Convert to pandas for easy comparison or use testing utils
expected_cudf = cudf.Series(expected)

assert_series_equal(got, expected_cudf)

finally:
# Reset option to avoid side effects on other tests
cudf.set_option("mode.pandas_compatible", False)