Merge branch 'pandas-dev:main' into main

Abu Jabar Mubarak · web-flow · commit 8400cd301c64 · 2025-07-14T17:10:20.000+05:30
diff --git a/README.md b/README.md
@@ -175,7 +175,7 @@ All contributions, bug reports, bug fixes, documentation improvements, enhanceme
 
 A detailed overview on how to contribute can be found in the **[contributing guide](https://pandas.pydata.org/docs/dev/development/contributing.html)**.
 
-If you are simply looking to start working with the pandas codebase, navigate to the [GitHub "issues" tab](https://github.com/pandas-dev/pandas/issues) and start looking through interesting issues. There are a number of issues listed under [Docs](https://github.com/pandas-dev/pandas/issues?labels=Docs&sort=updated&state=open) and [good first issue](https://github.com/pandas-dev/pandas/issues?labels=good+first+issue&sort=updated&state=open) where you could start out.
+If you are simply looking to start working with the pandas codebase, navigate to the [GitHub "issues" tab](https://github.com/pandas-dev/pandas/issues) and start looking through interesting issues. There are a number of issues listed under [Docs](https://github.com/pandas-dev/pandas/issues?q=is%3Aissue%20state%3Aopen%20label%3ADocs%20sort%3Aupdated-desc) and [good first issue](https://github.com/pandas-dev/pandas/issues?q=is%3Aissue%20state%3Aopen%20label%3A%22good%20first%20issue%22%20sort%3Aupdated-desc) where you could start out.
 
 You can also triage issues which may include reproducing bug reports, or asking for vital information such as version numbers or reproduction instructions. If you would like to start triaging issues, one easy way to get started is to [subscribe to pandas on CodeTriage](https://www.codetriage.com/pandas-dev/pandas).
 
diff --git a/doc/source/whatsnew/v0.4.x.rst b/doc/source/whatsnew/v0.4.x.rst
@@ -11,7 +11,7 @@ New features
 - Added Python 3 support using 2to3 (:issue:`200`)
 - :ref:`Added <dsintro.name_attribute>` ``name`` attribute to ``Series``, now
   prints as part of ``Series.__repr__``
-- :meth:`Series.isnull`` and :meth:`Series.notnull` (:issue:`209`, :issue:`203`)
+- :meth:`Series.isnull` and :meth:`Series.notnull` (:issue:`209`, :issue:`203`)
 - :ref:`Added <basics.align>` ``Series.align`` method for aligning two series
   with choice of join method (ENH56_)
 - :ref:`Added <advanced.get_level_values>` method ``get_level_values`` to
diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst
@@ -984,7 +984,7 @@ Removal of prior version deprecations/changes
 - Removed :meth:`Series.str.__iter__` (:issue:`28277`)
 - Removed ``pandas.SparseArray`` in favor of :class:`arrays.SparseArray` (:issue:`30642`)
 - Removed ``pandas.SparseSeries`` and ``pandas.SparseDataFrame``, including pickle support. (:issue:`30642`)
-- Enforced disallowing passing an integer ``fill_value`` to :meth:`DataFrame.shift` and :meth:`Series.shift`` with datetime64, timedelta64, or period dtypes (:issue:`32591`)
+- Enforced disallowing passing an integer ``fill_value`` to :meth:`DataFrame.shift` and :meth:`Series.shift` with datetime64, timedelta64, or period dtypes (:issue:`32591`)
 - Enforced disallowing a string column label into ``times`` in :meth:`DataFrame.ewm` (:issue:`43265`)
 - Enforced disallowing passing ``True`` and ``False`` into ``inclusive`` in :meth:`Series.between` in favor of ``"both"`` and ``"neither"`` respectively (:issue:`40628`)
 - Enforced disallowing using ``usecols`` with out of bounds indices for ``read_csv`` with ``engine="c"`` (:issue:`25623`)
@@ -1045,7 +1045,7 @@ Removal of prior version deprecations/changes
 - Enforced deprecation of silently dropping columns that raised a ``TypeError`` in :class:`Series.transform` and :class:`DataFrame.transform` when used with a list or dictionary (:issue:`43740`)
 - Changed behavior of :meth:`DataFrame.apply` with list-like so that any partial failure will raise an error (:issue:`43740`)
 - Changed behaviour of :meth:`DataFrame.to_latex` to now use the Styler implementation via :meth:`.Styler.to_latex` (:issue:`47970`)
-- Changed behavior of :meth:`Series.__setitem__` with an integer key and a :class:`Float64Index` when the key is not present in the index; previously we treated the key as positional (behaving like ``series.iloc[key] = val``), now we treat it is a label (behaving like ``series.loc[key] = val``), consistent with :meth:`Series.__getitem__`` behavior (:issue:`33469`)
+- Changed behavior of :meth:`Series.__setitem__` with an integer key and a :class:`Float64Index` when the key is not present in the index; previously we treated the key as positional (behaving like ``series.iloc[key] = val``), now we treat it is a label (behaving like ``series.loc[key] = val``), consistent with :meth:`Series.__getitem__` behavior (:issue:`33469`)
 - Removed ``na_sentinel`` argument from :func:`factorize`, :meth:`.Index.factorize`, and :meth:`.ExtensionArray.factorize` (:issue:`47157`)
 - Changed behavior of :meth:`Series.diff` and :meth:`DataFrame.diff` with :class:`ExtensionDtype` dtypes whose arrays do not implement ``diff``, these now raise ``TypeError`` rather than casting to numpy (:issue:`31025`)
 - Enforced deprecation of calling numpy "ufunc"s on :class:`DataFrame` with ``method="outer"``; this now raises ``NotImplementedError`` (:issue:`36955`)
diff --git a/doc/source/whatsnew/v2.0.3.rst b/doc/source/whatsnew/v2.0.3.rst
@@ -13,7 +13,7 @@ including other versions of pandas.
 
 Fixed regressions
 ~~~~~~~~~~~~~~~~~
-- Bug in :meth:`Timestamp.weekday`` was returning incorrect results before ``'0000-02-29'`` (:issue:`53738`)
+- Bug in :meth:`Timestamp.weekday` was returning incorrect results before ``'0000-02-29'`` (:issue:`53738`)
 - Fixed performance regression in merging on datetime-like columns (:issue:`53231`)
 - Fixed regression when :meth:`DataFrame.to_string` creates extra space for string dtypes (:issue:`52690`)
 
diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst
@@ -721,7 +721,7 @@ Conversion
 Strings
 ^^^^^^^
 - Bug in :meth:`Series.str` that did not raise a  ``TypeError`` when iterated (:issue:`54173`)
-- Bug in ``repr`` for :class:`DataFrame`` with string-dtype columns (:issue:`54797`)
+- Bug in ``repr`` for :class:`DataFrame` with string-dtype columns (:issue:`54797`)
 
 Interval
 ^^^^^^^^
diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
@@ -784,7 +784,7 @@ MultiIndex
 
 I/O
 ^^^
-- Bug in :class:`DataFrame` and :class:`Series` ``repr`` of :py:class:`collections.abc.Mapping`` elements. (:issue:`57915`)
+- Bug in :class:`DataFrame` and :class:`Series` ``repr`` of :py:class:`collections.abc.Mapping` elements. (:issue:`57915`)
 - Bug in :meth:`.DataFrame.to_json` when ``"index"`` was a value in the :attr:`DataFrame.column` and :attr:`Index.name` was ``None``. Now, this will fail with a ``ValueError`` (:issue:`58925`)
 - Bug in :meth:`.io.common.is_fsspec_url` not recognizing chained fsspec URLs (:issue:`48978`)
 - Bug in :meth:`DataFrame._repr_html_` which ignored the ``"display.float_format"`` option (:issue:`59876`)
@@ -869,6 +869,7 @@ Reshaping
 - Bug in :meth:`DataFrame.merge` when merging two :class:`DataFrame` on ``intc`` or ``uintc`` types on Windows (:issue:`60091`, :issue:`58713`)
 - Bug in :meth:`DataFrame.pivot_table` incorrectly subaggregating results when called without an ``index`` argument (:issue:`58722`)
 - Bug in :meth:`DataFrame.pivot_table` incorrectly ignoring the ``values`` argument when also supplied to the ``index`` or ``columns`` parameters (:issue:`57876`, :issue:`61292`)
+- Bug in :meth:`DataFrame.pivot_table` where ``margins=True`` did not correctly include groups with ``NaN`` values in the index or columns when ``dropna=False`` was explicitly passed. (:issue:`61509`)
 - Bug in :meth:`DataFrame.stack` with the new implementation where ``ValueError`` is raised when ``level=[]`` (:issue:`60740`)
 - Bug in :meth:`DataFrame.unstack` producing incorrect results when manipulating empty :class:`DataFrame` with an :class:`ExtentionDtype` (:issue:`59123`)
 - Bug in :meth:`concat` where concatenating DataFrame and Series with ``ignore_index = True`` drops the series name (:issue:`60723`, :issue:`56257`)
diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py
@@ -396,6 +396,7 @@ def __internal_pivot_table(
             observed=dropna,
             margins_name=margins_name,
             fill_value=fill_value,
+            dropna=dropna,
         )
 
     # discard the top level
@@ -422,6 +423,7 @@ def _add_margins(
     observed: bool,
     margins_name: Hashable = "All",
     fill_value=None,
+    dropna: bool = True,
 ):
     if not isinstance(margins_name, str):
         raise ValueError("margins_name argument must be a string")
@@ -461,6 +463,7 @@ def _add_margins(
             kwargs,
             observed,
             margins_name,
+            dropna,
         )
         if not isinstance(marginal_result_set, tuple):
             return marginal_result_set
@@ -469,7 +472,7 @@ def _add_margins(
         # no values, and table is a DataFrame
         assert isinstance(table, ABCDataFrame)
         marginal_result_set = _generate_marginal_results_without_values(
-            table, data, rows, cols, aggfunc, kwargs, observed, margins_name
+            table, data, rows, cols, aggfunc, kwargs, observed, margins_name, dropna
         )
         if not isinstance(marginal_result_set, tuple):
             return marginal_result_set
@@ -538,6 +541,7 @@ def _generate_marginal_results(
     kwargs,
     observed: bool,
     margins_name: Hashable = "All",
+    dropna: bool = True,
 ):
     margin_keys: list | Index
     if len(cols) > 0:
@@ -551,7 +555,7 @@ def _all_key(key):
         if len(rows) > 0:
             margin = (
                 data[rows + values]
-                .groupby(rows, observed=observed)
+                .groupby(rows, observed=observed, dropna=dropna)
                 .agg(aggfunc, **kwargs)
             )
             cat_axis = 1
@@ -567,7 +571,7 @@ def _all_key(key):
         else:
             margin = (
                 data[cols[:1] + values]
-                .groupby(cols[:1], observed=observed)
+                .groupby(cols[:1], observed=observed, dropna=dropna)
                 .agg(aggfunc, **kwargs)
                 .T
             )
@@ -610,7 +614,9 @@ def _all_key(key):
 
     if len(cols) > 0:
         row_margin = (
-            data[cols + values].groupby(cols, observed=observed).agg(aggfunc, **kwargs)
+            data[cols + values]
+            .groupby(cols, observed=observed, dropna=dropna)
+            .agg(aggfunc, **kwargs)
         )
         row_margin = row_margin.stack()
 
@@ -633,6 +639,7 @@ def _generate_marginal_results_without_values(
     kwargs,
     observed: bool,
     margins_name: Hashable = "All",
+    dropna: bool = True,
 ):
     margin_keys: list | Index
     if len(cols) > 0:
@@ -645,7 +652,7 @@ def _all_key():
             return (margins_name,) + ("",) * (len(cols) - 1)
 
         if len(rows) > 0:
-            margin = data.groupby(rows, observed=observed)[rows].apply(
+            margin = data.groupby(rows, observed=observed, dropna=dropna)[rows].apply(
                 aggfunc, **kwargs
             )
             all_key = _all_key()
@@ -654,7 +661,9 @@ def _all_key():
             margin_keys.append(all_key)
 
         else:
-            margin = data.groupby(level=0, observed=observed).apply(aggfunc, **kwargs)
+            margin = data.groupby(level=0, observed=observed, dropna=dropna).apply(
+                aggfunc, **kwargs
+            )
             all_key = _all_key()
             table[all_key] = margin
             result = table
@@ -665,7 +674,7 @@ def _all_key():
         margin_keys = table.columns
 
     if len(cols):
-        row_margin = data.groupby(cols, observed=observed)[cols].apply(
+        row_margin = data.groupby(cols, observed=observed, dropna=dropna)[cols].apply(
             aggfunc, **kwargs
         )
     else:
diff --git a/pandas/tests/reshape/test_crosstab.py b/pandas/tests/reshape/test_crosstab.py
@@ -289,7 +289,7 @@ def test_margin_dropna4(self):
         # GH: 10772: Keep np.nan in result with dropna=False
         df = DataFrame({"a": [1, 2, 2, 2, 2, np.nan], "b": [3, 3, 4, 4, 4, 4]})
         actual = crosstab(df.a, df.b, margins=True, dropna=False)
-        expected = DataFrame([[1, 0, 1.0], [1, 3, 4.0], [0, 1, np.nan], [2, 4, 6.0]])
+        expected = DataFrame([[1, 0, 1], [1, 3, 4], [0, 1, 1], [2, 4, 6]])
         expected.index = Index([1.0, 2.0, np.nan, "All"], name="a")
         expected.columns = Index([3, 4, "All"], name="b")
         tm.assert_frame_equal(actual, expected)
@@ -301,11 +301,11 @@ def test_margin_dropna5(self):
         )
         actual = crosstab(df.a, df.b, margins=True, dropna=False)
         expected = DataFrame(
-            [[1, 0, 0, 1.0], [0, 1, 0, 1.0], [0, 3, 1, np.nan], [1, 4, 0, 6.0]]
+            [[1, 0, 0, 1.0], [0, 1, 0, 1.0], [0, 3, 1, 4.0], [1, 4, 1, 6.0]]
         )
         expected.index = Index([1.0, 2.0, np.nan, "All"], name="a")
         expected.columns = Index([3.0, 4.0, np.nan, "All"], name="b")
-        tm.assert_frame_equal(actual, expected)
+        tm.assert_frame_equal(actual, expected, check_dtype=False)
 
     def test_margin_dropna6(self):
         # GH: 10772: Keep np.nan in result with dropna=False
@@ -326,7 +326,7 @@ def test_margin_dropna6(self):
             names=["b", "c"],
         )
         expected = DataFrame(
-            [[1, 0, 1, 0, 0, 0, 2], [2, 0, 1, 1, 0, 1, 5], [3, 0, 2, 1, 0, 0, 7]],
+            [[1, 0, 1, 0, 0, 0, 2], [2, 0, 1, 1, 0, 1, 5], [3, 0, 2, 1, 0, 1, 7]],
             columns=m,
         )
         expected.index = Index(["bar", "foo", "All"], name="a")
@@ -349,7 +349,7 @@ def test_margin_dropna6(self):
                 [0, 0, np.nan],
                 [2, 0, 2.0],
                 [1, 1, 2.0],
-                [0, 1, np.nan],
+                [0, 1, 1.0],
                 [5, 2, 7.0],
             ],
             index=m,
diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py
@@ -2585,6 +2585,36 @@ def test_pivot_table_values_as_two_params(
         expected = DataFrame(data=e_data, index=e_index, columns=e_cols)
         tm.assert_frame_equal(result, expected)
 
+    def test_pivot_table_margins_include_nan_groups(self):
+        # GH#61509
+        df = DataFrame(
+            {
+                "i": [1, 2, 3],
+                "g1": ["a", "b", "b"],
+                "g2": ["x", None, None],
+            }
+        )
+
+        result = df.pivot_table(
+            index="g1",
+            columns="g2",
+            values="i",
+            aggfunc="count",
+            dropna=False,
+            margins=True,
+        )
+
+        expected = DataFrame(
+            {
+                "x": {"a": 1.0, "b": np.nan, "All": 1.0},
+                np.nan: {"a": np.nan, "b": 2.0, "All": 2.0},
+                "All": {"a": 1.0, "b": 2.0, "All": 3.0},
+            }
+        )
+        expected.index.name = "g1"
+        expected.columns.name = "g2"
+        tm.assert_frame_equal(result, expected, check_dtype=False)
+
 
 class TestPivot:
     def test_pivot(self):