pandas-dev
diff --git a/‎doc/source/whatsnew/v2.2.1.rst
Lines changed: 4 additions & 2 deletions b/‎doc/source/whatsnew/v2.2.1.rst
Lines changed: 4 additions & 2 deletions
diff --git a/‎pandas/_libs/groupby.pyi
Lines changed: 2 additions & 0 deletions b/‎pandas/_libs/groupby.pyi
Lines changed: 2 additions & 0 deletions
diff --git a/‎pandas/_libs/groupby.pyx
Lines changed: 26 additions & 15 deletions b/‎pandas/_libs/groupby.pyx
Lines changed: 26 additions & 15 deletions
diff --git a/‎pandas/_libs/index.pyx
Lines changed: 31 additions & 32 deletions b/‎pandas/_libs/index.pyx
Lines changed: 31 additions & 32 deletions
diff --git a/‎pandas/_libs/ops.pyx
Lines changed: 1 addition & 1 deletion b/‎pandas/_libs/ops.pyx
Lines changed: 1 addition & 1 deletion
diff --git a/‎pandas/_testing/__init__.py
Lines changed: 7 additions & 0 deletions b/‎pandas/_testing/__init__.py
Lines changed: 7 additions & 0 deletions
diff --git a/‎pandas/conftest.py
Lines changed: 32 additions & 0 deletions b/‎pandas/conftest.py
Lines changed: 32 additions & 0 deletions
diff --git a/‎pandas/core/common.py
Lines changed: 2 additions & 0 deletions b/‎pandas/core/common.py
Lines changed: 2 additions & 0 deletions
diff --git a/‎pandas/core/groupby/groupby.py
Lines changed: 28 additions & 8 deletions b/‎pandas/core/groupby/groupby.py
Lines changed: 28 additions & 8 deletions
@@ -17,6 +17,7 @@ Fixed regressions
 - Fixed performance regression in :meth:`Series.combine_first` (:issue:`55845`)
 - Fixed regression in :func:`merge_ordered` raising ``TypeError`` for ``fill_method="ffill"`` and ``how="left"`` (:issue:`57010`)
 - Fixed regression in :func:`wide_to_long` raising an ``AttributeError`` for string columns (:issue:`57066`)
+- Fixed regression in :meth:`DataFrame.loc` raising ``IndexError`` for non-unique, masked dtype indexes where result has more than 10,000 rows (:issue:`57027`)
 - Fixed regression in :meth:`DataFrameGroupBy.idxmin`, :meth:`DataFrameGroupBy.idxmax`, :meth:`SeriesGroupBy.idxmin`, :meth:`SeriesGroupBy.idxmax` ignoring the ``skipna`` argument (:issue:`57040`)
 - Fixed regression in :meth:`DataFrameGroupBy.idxmin`, :meth:`DataFrameGroupBy.idxmax`, :meth:`SeriesGroupBy.idxmin`, :meth:`SeriesGroupBy.idxmax` where values containing the minimum or maximum value for the dtype could produce incorrect results (:issue:`57040`)
 - Fixed regression in :meth:`Index.join` raising ``TypeError`` when joining an empty index to a non-empty index containing mixed dtype values (:issue:`57048`)
@@ -27,14 +28,15 @@ Fixed regressions
 
 Bug fixes
 ~~~~~~~~~
--
+- Fixed bug in :meth:`DataFrame.__getitem__` for empty :class:`DataFrame` with Copy-on-Write enabled (:issue:`57130`)
 
 .. ---------------------------------------------------------------------------
 .. _whatsnew_221.other:
 
 Other
 ~~~~~
--
+- Added the argument ``skipna`` to :meth:`DataFrameGroupBy.first`, :meth:`DataFrameGroupBy.last`, :meth:`SeriesGroupBy.first`, and :meth:`SeriesGroupBy.last`; achieving ``skipna=False`` used to be available via :meth:`DataFrameGroupBy.nth`, but the behavior was changed in pandas 2.0.0 (:issue:`57019`)
+- Added the argument ``skipna`` to :meth:`Resampler.first`, :meth:`Resampler.last` (:issue:`57019`)
 
 .. ---------------------------------------------------------------------------
 .. _whatsnew_221.contributors:
 
@@ -136,6 +136,7 @@ def group_last(
     result_mask: npt.NDArray[np.bool_] | None = ...,
     min_count: int = ...,  # Py_ssize_t
     is_datetimelike: bool = ...,
+    skipna: bool = ...,
 ) -> None: ...
 def group_nth(
     out: np.ndarray,  # rank_t[:, ::1]
@@ -147,6 +148,7 @@ def group_nth(
     min_count: int = ...,  # int64_t
     rank: int = ...,  # int64_t
     is_datetimelike: bool = ...,
+    skipna: bool = ...,
 ) -> None: ...
 def group_rank(
     out: np.ndarray,  # float64_t[:, ::1]
 
@@ -1428,6 +1428,7 @@ def group_last(
     uint8_t[:, ::1] result_mask=None,
     Py_ssize_t min_count=-1,
     bint is_datetimelike=False,
+    bint skipna=True,
 ) -> None:
     """
     Only aggregates on axis=0
@@ -1462,14 +1463,19 @@ def group_last(
             for j in range(K):
                 val = values[i, j]
 
-                if uses_mask:
-                    isna_entry = mask[i, j]
-                else:
-                    isna_entry = _treat_as_na(val, is_datetimelike)
+                if skipna:
+                    if uses_mask:
+                        isna_entry = mask[i, j]
+                    else:
+                        isna_entry = _treat_as_na(val, is_datetimelike)
+                    if isna_entry:
+                        continue
 
-                if not isna_entry:
-                    nobs[lab, j] += 1
-                    resx[lab, j] = val
+                nobs[lab, j] += 1
+                resx[lab, j] = val
+
+                if uses_mask and not skipna:
+                    result_mask[lab, j] = mask[i, j]
 
     _check_below_mincount(
         out, uses_mask, result_mask, ncounts, K, nobs, min_count, resx
@@ -1490,6 +1496,7 @@ def group_nth(
     int64_t min_count=-1,
     int64_t rank=1,
     bint is_datetimelike=False,
+    bint skipna=True,
 ) -> None:
     """
     Only aggregates on axis=0
@@ -1524,15 +1531,19 @@ def group_nth(
             for j in range(K):
                 val = values[i, j]
 
-                if uses_mask:
-                    isna_entry = mask[i, j]
-                else:
-                    isna_entry = _treat_as_na(val, is_datetimelike)
+                if skipna:
+                    if uses_mask:
+                        isna_entry = mask[i, j]
+                    else:
+                        isna_entry = _treat_as_na(val, is_datetimelike)
+                    if isna_entry:
+                        continue
 
-                if not isna_entry:
-                    nobs[lab, j] += 1
-                    if nobs[lab, j] == rank:
-                        resx[lab, j] = val
+                nobs[lab, j] += 1
+                if nobs[lab, j] == rank:
+                    resx[lab, j] = val
+                    if uses_mask and not skipna:
+                        result_mask[lab, j] = mask[i, j]
 
     _check_below_mincount(
         out, uses_mask, result_mask, ncounts, K, nobs, min_count, resx
 
@@ -98,6 +98,20 @@ cdef ndarray _get_bool_indexer(ndarray values, object val, ndarray mask = None):
     return indexer.view(bool)
 
 
+cdef _maybe_resize_array(ndarray values, Py_ssize_t loc, Py_ssize_t max_length):
+    """
+    Resize array if loc is out of bounds.
+    """
+    cdef:
+        Py_ssize_t n = len(values)
+
+    if loc >= n:
+        while loc >= n:
+            n *= 2
+        values = np.resize(values, min(n, max_length))
+    return values
+
+
 # Don't populate hash tables in monotonic indexes larger than this
 _SIZE_CUTOFF = 1_000_000
 
@@ -456,27 +470,18 @@ cdef class IndexEngine:
             # found
             if val in d:
                 key = val
-
+                result = _maybe_resize_array(
+                    result,
+                    count + len(d[key]) - 1,
+                    max_alloc
+                )
                 for j in d[key]:
-
-                    # realloc if needed
-                    if count >= n_alloc:
-                        n_alloc *= 2
-                        if n_alloc > max_alloc:
-                            n_alloc = max_alloc
-                        result = np.resize(result, n_alloc)
-
                     result[count] = j
                     count += 1
 
             # value not found
             else:
-
-                if count >= n_alloc:
-                    n_alloc *= 2
-                    if n_alloc > max_alloc:
-                        n_alloc = max_alloc
-                    result = np.resize(result, n_alloc)
+                result = _maybe_resize_array(result, count, max_alloc)
                 result[count] = -1
                 count += 1
                 missing[count_missing] = i
@@ -1214,37 +1219,31 @@ cdef class MaskedIndexEngine(IndexEngine):
 
             if PySequence_GetItem(target_mask, i):
                 if na_pos:
+                    result = _maybe_resize_array(
+                        result,
+                        count + len(na_pos) - 1,
+                        max_alloc,
+                    )
                     for na_idx in na_pos:
-                        # realloc if needed
-                        if count >= n_alloc:
-                            n_alloc *= 2
-                            if n_alloc > max_alloc:
-                                n_alloc = max_alloc
-
                         result[count] = na_idx
                         count += 1
                     continue
 
             elif val in d:
                 # found
                 key = val
-
+                result = _maybe_resize_array(
+                    result,
+                    count + len(d[key]) - 1,
+                    max_alloc,
+                )
                 for j in d[key]:
-
-                    # realloc if needed
-                    if count >= n_alloc:
-                        n_alloc *= 2
-                        if n_alloc > max_alloc:
-                            n_alloc = max_alloc
-
                     result[count] = j
                     count += 1
                 continue
 
             # value not found
-            if count >= n_alloc:
-                n_alloc += 10_000
-                result = np.resize(result, n_alloc)
+            result = _maybe_resize_array(result, count, max_alloc)
             result[count] = -1
             count += 1
             missing[count_missing] = i
 
@@ -29,7 +29,7 @@ from pandas._libs.util cimport is_nan
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def scalar_compare(object[:] values, object val, object op) -> ndarray:
+def scalar_compare(ndarray[object] values, object val, object op) -> ndarray:
     """
     Compare each element of `values` array with the scalar `val`, with
     the comparison operation described by `op`.
 
@@ -235,11 +235,18 @@
         + TIMEDELTA_PYARROW_DTYPES
         + BOOL_PYARROW_DTYPES
     )
+    ALL_REAL_PYARROW_DTYPES_STR_REPR = (
+        ALL_INT_PYARROW_DTYPES_STR_REPR + FLOAT_PYARROW_DTYPES_STR_REPR
+    )
 else:
     FLOAT_PYARROW_DTYPES_STR_REPR = []
     ALL_INT_PYARROW_DTYPES_STR_REPR = []
     ALL_PYARROW_DTYPES = []
+    ALL_REAL_PYARROW_DTYPES_STR_REPR = []
 
+ALL_REAL_NULLABLE_DTYPES = (
+    FLOAT_NUMPY_DTYPES + ALL_REAL_EXTENSION_DTYPES + ALL_REAL_PYARROW_DTYPES_STR_REPR
+)
 
 arithmetic_dunder_methods = [
     "__add__",
 
@@ -1703,6 +1703,38 @@ def any_numpy_dtype(request):
     return request.param
 
 
+@pytest.fixture(params=tm.ALL_REAL_NULLABLE_DTYPES)
+def any_real_nullable_dtype(request):
+    """
+    Parameterized fixture for all real dtypes that can hold NA.
+
+    * float
+    * 'float32'
+    * 'float64'
+    * 'Float32'
+    * 'Float64'
+    * 'UInt8'
+    * 'UInt16'
+    * 'UInt32'
+    * 'UInt64'
+    * 'Int8'
+    * 'Int16'
+    * 'Int32'
+    * 'Int64'
+    * 'uint8[pyarrow]'
+    * 'uint16[pyarrow]'
+    * 'uint32[pyarrow]'
+    * 'uint64[pyarrow]'
+    * 'int8[pyarrow]'
+    * 'int16[pyarrow]'
+    * 'int32[pyarrow]'
+    * 'int64[pyarrow]'
+    * 'float[pyarrow]'
+    * 'double[pyarrow]'
+    """
+    return request.param
+
+
 @pytest.fixture(params=tm.ALL_NUMERIC_DTYPES)
 def any_numeric_dtype(request):
     """
 
@@ -236,6 +236,8 @@ def asarray_tuplesafe(values: Iterable, dtype: NpDtype | None = None) -> ArrayLi
         values = list(values)
     elif isinstance(values, ABCIndex):
         return values._values
+    elif isinstance(values, ABCSeries):
+        return values._values
 
     if isinstance(values, list) and dtype in [np.object_, object]:
         return construct_1d_object_array_from_listlike(values)
 
@@ -3364,22 +3364,31 @@ def max(
             )
 
     @final
-    def first(self, numeric_only: bool = False, min_count: int = -1) -> NDFrameT:
+    def first(
+        self, numeric_only: bool = False, min_count: int = -1, skipna: bool = True
+    ) -> NDFrameT:
         """
-        Compute the first non-null entry of each column.
+        Compute the first entry of each column within each group.
+
+        Defaults to skipping NA elements.
 
         Parameters
         ----------
         numeric_only : bool, default False
             Include only float, int, boolean columns.
         min_count : int, default -1
             The required number of valid values to perform the operation. If fewer
-            than ``min_count`` non-NA values are present the result will be NA.
+            than ``min_count`` valid values are present the result will be NA.
+        skipna : bool, default True
+            Exclude NA/null values. If an entire row/column is NA, the result
+            will be NA.
+
+            .. versionadded:: 2.2.1
 
         Returns
         -------
         Series or DataFrame
-            First non-null of values within each group.
+            First values within each group.
 
         See Also
         --------
@@ -3431,12 +3440,17 @@ def first(x: Series):
             min_count=min_count,
             alias="first",
             npfunc=first_compat,
+            skipna=skipna,
         )
 
     @final
-    def last(self, numeric_only: bool = False, min_count: int = -1) -> NDFrameT:
+    def last(
+        self, numeric_only: bool = False, min_count: int = -1, skipna: bool = True
+    ) -> NDFrameT:
         """
-        Compute the last non-null entry of each column.
+        Compute the last entry of each column within each group.
+
+        Defaults to skipping NA elements.
 
         Parameters
         ----------
@@ -3445,12 +3459,17 @@ def last(self, numeric_only: bool = False, min_count: int = -1) -> NDFrameT:
             everything, then use only numeric data.
         min_count : int, default -1
             The required number of valid values to perform the operation. If fewer
-            than ``min_count`` non-NA values are present the result will be NA.
+            than ``min_count`` valid values are present the result will be NA.
+        skipna : bool, default True
+            Exclude NA/null values. If an entire row/column is NA, the result
+            will be NA.
+
+            .. versionadded:: 2.2.1
 
         Returns
         -------
         Series or DataFrame
-            Last non-null of values within each group.
+            Last of values within each group.
 
         See Also
         --------
@@ -3490,6 +3509,7 @@ def last(x: Series):
             min_count=min_count,
             alias="last",
             npfunc=last_compat,
+            skipna=skipna,
         )
 
     @final