From 72d66adc45cd4b48828b7ed1ae0229f3c0e64724 Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 6 Sep 2025 10:16:41 -0700 Subject: [PATCH] BUG: MultiIndex.factorize raising with length-0 --- doc/source/whatsnew/v3.0.0.rst | 1 + pandas/core/base.py | 6 +++++- pandas/tests/test_algos.py | 10 ++++++++++ 3 files changed, 16 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 7ec50137c3039..8a1f97056b010 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -986,6 +986,7 @@ I/O - Bug in :meth:`DataFrame.to_string` that raised ``StopIteration`` with nested DataFrames. (:issue:`16098`) - Bug in :meth:`HDFStore.get` was failing to save data of dtype datetime64[s] correctly (:issue:`59004`) - Bug in :meth:`HDFStore.select` causing queries on categorical string columns to return unexpected results (:issue:`57608`) +- Bug in :meth:`MultiIndex.factorize` incorrectly raising on length-0 indexes (:issue:`57517`) - Bug in :meth:`read_csv` causing segmentation fault when ``encoding_errors`` is not a string. (:issue:`59059`) - Bug in :meth:`read_csv` raising ``TypeError`` when ``index_col`` is specified and ``na_values`` is a dict containing the key ``None``. (:issue:`57547`) - Bug in :meth:`read_csv` raising ``TypeError`` when ``nrows`` and ``iterator`` are specified without specifying a ``chunksize``. (:issue:`59079`) diff --git a/pandas/core/base.py b/pandas/core/base.py index 1d571df400e49..6d2ab581470bc 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -1298,7 +1298,11 @@ def factorize( if isinstance(self, ABCMultiIndex): # preserve MultiIndex - uniques = self._constructor(uniques) + if len(self) == 0: + # GH#57517 + uniques = self[:0] + else: + uniques = self._constructor(uniques) else: from pandas import Index diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index 1525f648e2d5a..5eef7cddfee41 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -582,6 +582,16 @@ def test_index_returned(self, index): expected = expected.normalize() tm.assert_index_equal(result, expected, exact=True) + def test_factorize_multiindex_empty(self): + # GH#57517 + mi = MultiIndex.from_product( + [Index([], name="a", dtype=object), Index([], name="i", dtype="f4")] + ) + codes, uniques = mi.factorize() + exp_codes = np.array([], dtype=np.intp) + tm.assert_numpy_array_equal(codes, exp_codes) + tm.assert_index_equal(uniques, mi[:0]) + def test_dtype_preservation(self, any_numpy_dtype): # GH 15442 if any_numpy_dtype in (tm.BYTES_DTYPES + tm.STRING_DTYPES):