Skip to content

Commit b193cd5

Browse files
fix reindex to work for string dtype
1 parent 66caaae commit b193cd5

File tree

2 files changed

+25
-1
lines changed

2 files changed

+25
-1
lines changed

pandas/core/groupby/groupby.py

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ class providing the base-class of operations.
3434

3535
import numpy as np
3636

37+
from pandas._config import using_string_dtype
3738
from pandas._config.config import option_context
3839

3940
from pandas._libs import (
@@ -3156,7 +3157,7 @@ def sum(
31563157
npfunc=np.sum,
31573158
)
31583159

3159-
return self._reindex_output(result, fill_value=0)
3160+
return self._reindex_output(result, fill_value=0, method="sum")
31603161

31613162
@final
31623163
@doc(
@@ -5574,6 +5575,7 @@ def _reindex_output(
55745575
output: OutputFrameOrSeries,
55755576
fill_value: Scalar = np.nan,
55765577
qs: npt.NDArray[np.float64] | None = None,
5578+
method: str | None = None,
55775579
) -> OutputFrameOrSeries:
55785580
"""
55795581
If we have categorical groupers, then we might want to make sure that
@@ -5634,6 +5636,24 @@ def _reindex_output(
56345636
"copy": False,
56355637
"fill_value": fill_value,
56365638
}
5639+
if using_string_dtype() and method == "sum":
5640+
if isinstance(output, Series) and isinstance(output.dtype, StringDtype):
5641+
d["fill_value"] = ""
5642+
return output.reindex(**d) # type: ignore[arg-type]
5643+
elif isinstance(output, DataFrame) and any(
5644+
isinstance(dtype, StringDtype) for dtype in output.dtypes
5645+
):
5646+
orig_dtypes = output.dtypes
5647+
indices = np.nonzero(output.dtypes == "string")[0]
5648+
for idx in indices:
5649+
output.isetitem(idx, output.iloc[:, idx].astype(object))
5650+
output = output.reindex(**d)
5651+
for idx in indices:
5652+
col = output.iloc[:, idx]
5653+
output.isetitem(
5654+
idx, col.mask(col == 0, "").astype(orig_dtypes.iloc[idx])
5655+
)
5656+
return output
56375657
return output.reindex(**d) # type: ignore[arg-type]
56385658

56395659
# GH 13204

pandas/tests/groupby/test_categorical.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -391,6 +391,10 @@ def test_observed(observed, using_infer_string):
391391

392392
tm.assert_frame_equal(result, expected)
393393

394+
result = gb["C"].sum()
395+
expected = expected["C"]
396+
tm.assert_series_equal(result, expected)
397+
394398
# https://github.com/pandas-dev/pandas/issues/8138
395399
d = {
396400
"cat": Categorical(

0 commit comments

Comments
 (0)