Skip to content

Commit 61f95ff

Browse files
committed
BUG: Groupby sum for object type should be None instead of 0 for all nan values
1 parent a4fc97e commit 61f95ff

File tree

3 files changed

+8
-3
lines changed

3 files changed

+8
-3
lines changed

pandas/_libs/groupby.pyx

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -717,8 +717,12 @@ def group_sum(
717717
raise ValueError("len(index) != len(labels)")
718718

719719
nobs = np.zeros((<object>out).shape, dtype=np.int64)
720-
# the below is equivalent to `np.zeros_like(out)` but faster
721-
sumx = np.zeros((<object>out).shape, dtype=(<object>out).base.dtype)
720+
if sum_t is object:
721+
# For object dtype, fill value should not be 0 (#60229)
722+
sumx = np.empty((<object>out).shape, dtype=object)
723+
else:
724+
# the below is equivalent to `np.zeros_like(out)` but faster
725+
sumx = np.zeros((<object>out).shape, dtype=(<object>out).base.dtype)
722726
compensation = np.zeros((<object>out).shape, dtype=(<object>out).base.dtype)
723727

724728
N, K = (<object>values).shape

pandas/tests/groupby/test_categorical.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -361,6 +361,7 @@ def test_observed(request, using_infer_string, observed):
361361
expected = cartesian_product_for_groupers(
362362
expected, [cat1, cat2], list("AB"), fill_value=0
363363
)
364+
expected.loc[expected.C == 0, "C"] = None
364365

365366
tm.assert_frame_equal(result, expected)
366367

pandas/tests/groupby/test_timegrouper.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@ def test_groupby_with_timegrouper(self):
113113
unit=df.index.unit,
114114
)
115115
expected = DataFrame(
116-
{"Buyer": 0, "Quantity": 0},
116+
{"Buyer": None, "Quantity": 0},
117117
index=exp_dti,
118118
)
119119
# Cast to object to avoid implicit cast when setting entry to "CarlCarlCarl"

0 commit comments

Comments
 (0)