pandas-dev · mroeschke · Jan 30, 2024 · Jan 25, 2024 · Jan 25, 2024 · Jan 25, 2024
diff --git a/doc/source/whatsnew/v2.2.1.rst b/doc/source/whatsnew/v2.2.1.rst
@@ -14,6 +14,7 @@ including other versions of pandas.
 Fixed regressions
 ~~~~~~~~~~~~~~~~~
 - Fixed regression in :func:`merge_ordered` raising ``TypeError`` for ``fill_method="ffill"`` and ``how="left"`` (:issue:`57010`)
+- Fixed regression in :meth:`DataFrame.loc` raising ``IndexError`` for non-unique, masked dtype indexes where result has more than 10,000 rows (:issue:`57027`)
 - Fixed regression in :meth:`Series.pct_change` raising a ``ValueError`` for an empty :class:`Series` (:issue:`57056`)
 
 .. ---------------------------------------------------------------------------

diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx
@@ -1220,6 +1220,7 @@ cdef class MaskedIndexEngine(IndexEngine):
                             n_alloc *= 2
                             if n_alloc > max_alloc:
                                 n_alloc = max_alloc
+                            result = np.resize(result, n_alloc)
 
                         result[count] = na_idx
                         count += 1
@@ -1236,14 +1237,17 @@ cdef class MaskedIndexEngine(IndexEngine):
                         n_alloc *= 2
                         if n_alloc > max_alloc:
                             n_alloc = max_alloc
+                        result = np.resize(result, n_alloc)
 
                     result[count] = j
                     count += 1
                 continue
 
             # value not found
             if count >= n_alloc:
-                n_alloc += 10_000
+                n_alloc *= 2
+                if n_alloc > max_alloc:
+                    n_alloc = max_alloc
                 result = np.resize(result, n_alloc)
             result[count] = -1
             count += 1

diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py
@@ -3347,3 +3347,15 @@ def test_getitem_loc_str_periodindex(self):
             index = pd.period_range(start="2000", periods=20, freq="B")
             series = Series(range(20), index=index)
             assert series.loc["2000-01-14"] == 9
+
+    def test_loc_nonunique_masked_index(self):
+        # GH 57027
+        ids = list(range(11))
+        index = Index(ids * 1000, dtype="Int64")
+        df = DataFrame({"val": np.arange(len(index))}, index=index)
+        result = df.loc[ids]
+        expected = DataFrame(
+            {"val": index.argsort(kind="stable")},
+            index=Index(np.array(ids).repeat(1000), dtype="Int64"),
+        )
+        tm.assert_frame_equal(result, expected)