Skip to content

Commit fba9f78

Browse files
committed
Groupby sum for all-nan object array should be nan instead of None
1 parent 5d6663e commit fba9f78

File tree

4 files changed

+23
-4
lines changed

4 files changed

+23
-4
lines changed

pandas/_libs/groupby.pyx

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -727,7 +727,7 @@ def group_sum(
727727
nobs = np.zeros((<object>out).shape, dtype=np.int64)
728728
if sum_t is object:
729729
# For object dtype, fill value should not be 0 (#60229)
730-
sumx = np.empty((<object>out).shape, dtype=object)
730+
sumx = np.full((<object>out).shape, NAN, dtype=object)
731731
else:
732732
# the below is equivalent to `np.zeros_like(out)` but faster
733733
sumx = np.zeros((<object>out).shape, dtype=(<object>out).base.dtype)
@@ -764,7 +764,10 @@ def group_sum(
764764
if uses_mask:
765765
isna_result = result_mask[lab, j]
766766
else:
767-
isna_result = _treat_as_na(sumx[lab, j], is_datetimelike)
767+
isna_result = (
768+
_treat_as_na(sumx[lab, j], is_datetimelike) and
769+
nobs[lab, j] > 0
770+
)
768771

769772
if isna_result:
770773
# If sum is already NA, don't add to it. This is important for
@@ -799,6 +802,7 @@ def group_sum(
799802
compensation[lab, j] = 0
800803
sumx[lab, j] = t
801804
elif not skipna:
805+
nobs[lab, j] += 1
802806
if uses_mask:
803807
result_mask[lab, j] = True
804808
else:

pandas/tests/groupby/test_categorical.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -358,7 +358,7 @@ def test_observed(request, using_infer_string, observed):
358358
expected = cartesian_product_for_groupers(
359359
expected, [cat1, cat2], list("AB"), fill_value=0
360360
)
361-
expected.loc[expected.C == 0, "C"] = None
361+
expected.loc[expected.C == 0, "C"] = np.nan
362362

363363
tm.assert_frame_equal(result, expected)
364364

pandas/tests/groupby/test_reductions.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -514,6 +514,21 @@ def test_sum_skipna_object(skipna):
514514
tm.assert_series_equal(result, expected)
515515

516516

517+
def test_sum_allnan_object(skipna):
518+
# GH#60229
519+
df = DataFrame(
520+
{
521+
"val": [np.nan] * 10,
522+
"cat": ["A", "B"] * 5,
523+
}
524+
).astype({"val": object})
525+
expected = Series(
526+
[np.nan, np.nan], index=pd.Index(["A", "B"], name="cat"), name="val"
527+
).astype(object)
528+
result = df.groupby("cat")["val"].sum(skipna=skipna)
529+
tm.assert_series_equal(result, expected)
530+
531+
517532
@pytest.mark.parametrize(
518533
"func, values, dtype, result_dtype",
519534
[

pandas/tests/groupby/test_timegrouper.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@ def test_groupby_with_timegrouper(self):
113113
unit=df.index.unit,
114114
)
115115
expected = DataFrame(
116-
{"Buyer": None, "Quantity": 0},
116+
{"Buyer": np.nan, "Quantity": 0},
117117
index=exp_dti,
118118
)
119119
# Cast to object to avoid implicit cast when setting entry to "CarlCarlCarl"

0 commit comments

Comments
 (0)