fix reindex to work for string dtype

jorisvandenbossche · jorisvandenbossche · commit b193cd57f199 · 2025-08-20T10:14:44.000+02:00
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
@@ -34,6 +34,7 @@ class providing the base-class of operations.
 
 import numpy as np
 
+from pandas._config import using_string_dtype
 from pandas._config.config import option_context
 
 from pandas._libs import (
@@ -3156,7 +3157,7 @@ def sum(
                     npfunc=np.sum,
                 )
 
-            return self._reindex_output(result, fill_value=0)
+            return self._reindex_output(result, fill_value=0, method="sum")
 
     @final
     @doc(
@@ -5574,6 +5575,7 @@ def _reindex_output(
         output: OutputFrameOrSeries,
         fill_value: Scalar = np.nan,
         qs: npt.NDArray[np.float64] | None = None,
+        method: str | None = None,
     ) -> OutputFrameOrSeries:
         """
         If we have categorical groupers, then we might want to make sure that
@@ -5634,6 +5636,24 @@ def _reindex_output(
                 "copy": False,
                 "fill_value": fill_value,
             }
+            if using_string_dtype() and method == "sum":
+                if isinstance(output, Series) and isinstance(output.dtype, StringDtype):
+                    d["fill_value"] = ""
+                    return output.reindex(**d)  # type: ignore[arg-type]
+                elif isinstance(output, DataFrame) and any(
+                    isinstance(dtype, StringDtype) for dtype in output.dtypes
+                ):
+                    orig_dtypes = output.dtypes
+                    indices = np.nonzero(output.dtypes == "string")[0]
+                    for idx in indices:
+                        output.isetitem(idx, output.iloc[:, idx].astype(object))
+                    output = output.reindex(**d)
+                    for idx in indices:
+                        col = output.iloc[:, idx]
+                        output.isetitem(
+                            idx, col.mask(col == 0, "").astype(orig_dtypes.iloc[idx])
+                        )
+                    return output
             return output.reindex(**d)  # type: ignore[arg-type]
 
         # GH 13204
diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py
@@ -391,6 +391,10 @@ def test_observed(observed, using_infer_string):
 
     tm.assert_frame_equal(result, expected)
 
+    result = gb["C"].sum()
+    expected = expected["C"]
+    tm.assert_series_equal(result, expected)
+
     # https://github.com/pandas-dev/pandas/issues/8138
     d = {
         "cat": Categorical(