Skip to content
Merged
32 changes: 17 additions & 15 deletions pandas/core/groupby/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -371,27 +371,29 @@ def _call_cython_op(

is_datetimelike = dtype.kind in "mM"

if is_datetimelike:
values = values.view("int64")
is_numeric = True
elif dtype.kind == "b":
values = values.view("uint8")
if values.dtype == "float16":
values = values.astype(np.float32)

if self.how in ["any", "all"]:
if mask is None:
mask = isna(values)
if dtype == object:
if kwargs["skipna"]:
# GH#37501: don't raise on pd.NA when skipna=True
if mask.any():
# mask on original values computed separately
values = values.copy()
values[mask] = True
values = values.astype(bool, copy=False).view(np.int8)
is_numeric = True

if is_datetimelike:
# Handle NaT values correctly
if self.how == "any" and mask is not None:
# For "any", we want True only if there's at least one non-NaT value
values = (~mask).astype(np.int8) # Convert mask to int8
elif self.how == "all" and mask is not None:
# For "all", we want True only if all values are non-NaT
values = (~mask).all(axis=1, keepdims=True).astype(np.int8)
is_numeric = True
else:
values = values.view("int64") # Handle other cases appropriately

elif dtype.kind == "b":
values = values.view("uint8")
if values.dtype == "float16":
values = values.astype(np.float32)

values = values.T
if mask is not None:
mask = mask.T
Expand Down
25 changes: 25 additions & 0 deletions pandas/tests/groupby/test_grouping.py
Original file line number Diff line number Diff line change
Expand Up @@ -1180,3 +1180,28 @@ def test_grouping_by_key_is_in_axis():
result = gb.sum()
expected = DataFrame({"a": [1, 2], "b": [1, 2], "c": [7, 5]})
tm.assert_frame_equal(result, expected)


def test_groupby_any_with_timedelta():
# Create a DataFrame with Timedelta and NaT values
df = DataFrame(
{
"A": ["foo", "foo", "bar", "bar"],
"B": [pd.Timedelta(1, unit="D"), pd.NaT, pd.Timedelta(2, unit="D"), pd.NaT],
}
)

# Group by column A with sorting enabled and check if any Timedelta exists
result = df.groupby("A", sort=True)["B"].any()

# Corrected expected result: groups with only NaT should return False, else True
expected = Series([True, True], index=["foo", "bar"], name="B")

# Set the expected index name to match the result
expected.index.name = "A"

# Sort the expected result to match the order of result
expected = expected.sort_index()

# Assert that the result matches the expected output
tm.assert_series_equal(result, expected)
Loading