pandas-dev
diff --git a/‎.github/workflows/wheels.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/wheels.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.pre-commit-config.yaml‎
Lines changed: 3 additions & 3 deletions b/‎.pre-commit-config.yaml‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎doc/source/development/contributing_documentation.rst‎
Lines changed: 5 additions & 0 deletions b/‎doc/source/development/contributing_documentation.rst‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎doc/source/user_guide/indexing.rst‎
Lines changed: 46 additions & 0 deletions b/‎doc/source/user_guide/indexing.rst‎
Lines changed: 46 additions & 0 deletions
diff --git a/‎doc/source/whatsnew/v3.0.0.rst‎
Lines changed: 5 additions & 1 deletion b/‎doc/source/whatsnew/v3.0.0.rst‎
Lines changed: 5 additions & 1 deletion
diff --git a/‎pandas/_libs/groupby.pyx‎
Lines changed: 2 additions & 3 deletions b/‎pandas/_libs/groupby.pyx‎
Lines changed: 2 additions & 3 deletions
diff --git a/‎pandas/_libs/index.pyx‎
Lines changed: 1 addition & 1 deletion b/‎pandas/_libs/index.pyx‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎pandas/conftest.py‎
Lines changed: 10 additions & 16 deletions b/‎pandas/conftest.py‎
Lines changed: 10 additions & 16 deletions
diff --git a/‎pandas/core/arrays/categorical.py‎
Lines changed: 12 additions & 8 deletions b/‎pandas/core/arrays/categorical.py‎
Lines changed: 12 additions & 8 deletions
diff --git a/‎pandas/core/arrays/masked.py‎
Lines changed: 44 additions & 0 deletions b/‎pandas/core/arrays/masked.py‎
Lines changed: 44 additions & 0 deletions
@@ -162,7 +162,7 @@ jobs:
         run: echo "sdist_name=$(cd ./dist && ls -d */)" >> "$GITHUB_ENV"
 
       - name: Build wheels
-        uses: pypa/[email protected].1
+        uses: pypa/[email protected].3
         with:
          package-dir: ./dist/${{ startsWith(matrix.buildplat[1], 'macosx') && env.sdist_name || needs.build_sdist.outputs.sdist_file }}
         env:
 
@@ -19,7 +19,7 @@ ci:
     skip: [pyright, mypy]
 repos:
 -   repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.12.2
+    rev: v0.12.7
     hooks:
     -   id: ruff
         args: [--exit-non-zero-on-fix]
@@ -95,14 +95,14 @@ repos:
     - id: sphinx-lint
       args: ["--enable", "all", "--disable", "line-too-long"]
 -   repo: https://github.com/pre-commit/mirrors-clang-format
-    rev: v20.1.7
+    rev: v20.1.8
     hooks:
     - id: clang-format
       files: ^pandas/_libs/src|^pandas/_libs/include
       args: [-i]
       types_or: [c, c++]
 -   repo: https://github.com/trim21/pre-commit-mirror-meson
-    rev: v1.8.2
+    rev: v1.8.3
     hooks:
     - id: meson-fmt
       args: ['--inplace']
 
@@ -157,6 +157,11 @@ If you want to do a full clean build, do::
     python make.py clean
     python make.py html
 
+.. tip::
+  If ``python make.py html`` exits with an error status,
+  try running the command ``python make.py html --num-jobs=1``
+  to identify the cause of the error.
+
 You can tell ``make.py`` to compile only a single section of the docs, greatly
 reducing the turn-around time for checking your changes.
 
 
@@ -1732,3 +1732,49 @@ Why does assignment fail when using chained indexing?
 This means that chained indexing will never work.
 See :ref:`this section <copy_on_write_chained_assignment>`
 for more context.
+
+.. _indexing.series_assignment:
+
+Series Assignment and Index Alignment
+-------------------------------------
+
+When assigning a Series to a DataFrame column, pandas performs automatic alignment
+based on index labels. This is a fundamental behavior that can be surprising to
+new users who might expect positional assignment.
+
+Key Points:
+~~~~~~~~~~~
+
+* Series values are matched to DataFrame rows by index label
+* Position/order in the Series doesn't matter
+* Missing index labels result in NaN values
+* This behavior is consistent across df[col] = series and df.loc[:, col] = series
+
+Examples:
+.. ipython:: python
+
+   import pandas as pd
+
+   # Create a DataFrame
+   df = pd.DataFrame({'values': [1, 2, 3]}, index=['x', 'y', 'z'])
+
+   # Series with matching indices (different order)
+   s1 = pd.Series([10, 20, 30], index=['z', 'x', 'y'])
+   df['aligned'] = s1  # Aligns by index, not position
+   print(df)
+
+   # Series with partial index match
+   s2 = pd.Series([100, 200], index=['x', 'z'])
+   df['partial'] = s2  # Missing 'y' gets NaN
+   print(df)
+
+   # Series with non-matching indices
+   s3 = pd.Series([1000, 2000], index=['a', 'b'])
+   df['nomatch'] = s3  # All values become NaN
+   print(df)
+
+
+   #Avoiding Confusion:
+   #If you want positional assignment instead of index alignment:
+   # reset the Series index to match DataFrame index
+   df['s1_values'] = s1.reindex(df.index)
@@ -81,6 +81,7 @@ Other enhancements
 - :meth:`Rolling.agg`, :meth:`Expanding.agg` and :meth:`ExponentialMovingWindow.agg` now accept :class:`NamedAgg` aggregations through ``**kwargs`` (:issue:`28333`)
 - :meth:`Series.map` can now accept kwargs to pass on to func (:issue:`59814`)
 - :meth:`Series.map` now accepts an ``engine`` parameter to allow execution with a third-party execution engine (:issue:`61125`)
+- :meth:`Series.rank` and :meth:`DataFrame.rank` with numpy-nullable dtypes preserve ``NA`` values and return ``UInt64`` dtype where appropriate instead of casting ``NA`` to ``NaN`` with ``float64`` dtype (:issue:`62043`)
 - :meth:`Series.str.get_dummies` now accepts a  ``dtype`` parameter to specify the dtype of the resulting DataFrame (:issue:`47872`)
 - :meth:`pandas.concat` will raise a ``ValueError`` when ``ignore_index=True`` and ``keys`` is not ``None`` (:issue:`59274`)
 - :py:class:`frozenset` elements in pandas objects are now natively printed (:issue:`60690`)
@@ -89,12 +90,14 @@ Other enhancements
 - Added support to read and write from and to Apache Iceberg tables with the new :func:`read_iceberg` and :meth:`DataFrame.to_iceberg` functions (:issue:`61383`)
 - Errors occurring during SQL I/O will now throw a generic :class:`.DatabaseError` instead of the raw Exception type from the underlying driver manager library (:issue:`60748`)
 - Implemented :meth:`Series.str.isascii` and :meth:`Series.str.isascii` (:issue:`59091`)
+- Improve the resulting dtypes in :meth:`DataFrame.where` and :meth:`DataFrame.mask` with :class:`ExtensionDtype` ``other`` (:issue:`62038`)
 - Improved deprecation message for offset aliases (:issue:`60820`)
 - Multiplying two :class:`DateOffset` objects will now raise a ``TypeError`` instead of a ``RecursionError`` (:issue:`59442`)
 - Restore support for reading Stata 104-format and enable reading 103-format dta files (:issue:`58554`)
 - Support passing a :class:`Iterable[Hashable]` input to :meth:`DataFrame.drop_duplicates` (:issue:`59237`)
 - Support reading Stata 102-format (Stata 1) dta files (:issue:`58978`)
 - Support reading Stata 110-format (Stata 7) dta files (:issue:`47176`)
+-
 
 .. ---------------------------------------------------------------------------
 .. _whatsnew_300.notable_bug_fixes:
@@ -504,7 +507,7 @@ Renamed the following offset aliases (:issue:`57986`):
 
 Other Removals
 ^^^^^^^^^^^^^^
-- :class:`.DataFrameGroupBy.idxmin`, :class:`.DataFrameGroupBy.idxmax`, :class:`.SeriesGroupBy.idxmin`, and :class:`.SeriesGroupBy.idxmax` will now raise a ``ValueError`` when used with ``skipna=False`` and an NA value is encountered (:issue:`10694`)
+- :class:`.DataFrameGroupBy.idxmin`, :class:`.DataFrameGroupBy.idxmax`, :class:`.SeriesGroupBy.idxmin`, and :class:`.SeriesGroupBy.idxmax` will now raise a ``ValueError`` when a group has all NA values, or when used with ``skipna=False`` and any NA value is encountered (:issue:`10694`, :issue:`57745`)
 - :func:`concat` no longer ignores empty objects when determining output dtypes (:issue:`39122`)
 - :func:`concat` with all-NA entries no longer ignores the dtype of those entries when determining the result dtype (:issue:`40893`)
 - :func:`read_excel`, :func:`read_json`, :func:`read_html`, and :func:`read_xml` no longer accept raw string or byte representation of the data. That type of data must be wrapped in a :py:class:`StringIO` or :py:class:`BytesIO` (:issue:`53767`)
@@ -687,6 +690,7 @@ Bug fixes
 Categorical
 ^^^^^^^^^^^
 - Bug in :func:`Series.apply` where ``nan`` was ignored for :class:`CategoricalDtype` (:issue:`59938`)
+- Bug in :meth:`Categorical.astype` where ``copy=False`` would still trigger a copy of the codes (:issue:`62000`)
 - Bug in :meth:`DataFrame.pivot` and :meth:`DataFrame.set_index` raising an ``ArrowNotImplementedError`` for columns with pyarrow dictionary dtype (:issue:`53051`)
 - Bug in :meth:`Series.convert_dtypes` with ``dtype_backend="pyarrow"`` where empty :class:`CategoricalDtype` :class:`Series` raised an error or got converted to ``null[pyarrow]`` (:issue:`59934`)
 -
 
@@ -2048,9 +2048,8 @@ def group_idxmin_idxmax(
         group_min_or_max = np.empty_like(out, dtype=values.dtype)
         seen = np.zeros_like(out, dtype=np.uint8)
 
-    # When using transform, we need a valid value for take in the case
-    # a category is not observed; these values will be dropped
-    out[:] = 0
+    # Sentinel for no valid values.
+    out[:] = -1
 
     with nogil(numeric_object_t is not object):
         for i in range(N):
 
@@ -803,7 +803,7 @@ cdef class BaseMultiIndexCodesEngine:
         int_keys : 1-dimensional array of dtype uint64 or object
             Integers representing one combination each
         """
-        level_codes = list(target._recode_for_new_levels(self.levels))
+        level_codes = list(target._recode_for_new_levels(self.levels, copy=True))
         for i, codes in enumerate(level_codes):
             if self.levels[i].hasnans:
                 na_index = self.levels[i].isna().nonzero()[0][0]
 
@@ -176,25 +176,19 @@ def pytest_collection_modifyitems(items, config) -> None:
                 ignore_doctest_warning(item, path, message)
 
 
-hypothesis_health_checks = [
-    hypothesis.HealthCheck.too_slow,
-    hypothesis.HealthCheck.differing_executors,
-]
-
-# Hypothesis
+# Similar to "ci" config in
+# https://hypothesis.readthedocs.io/en/latest/reference/api.html#built-in-profiles
 hypothesis.settings.register_profile(
-    "ci",
-    # Hypothesis timing checks are tuned for scalars by default, so we bump
-    # them from 200ms to 500ms per test case as the global default.  If this
-    # is too short for a specific test, (a) try to make it faster, and (b)
-    # if it really is slow add `@settings(deadline=...)` with a working value,
-    # or `deadline=None` to entirely disable timeouts for that test.
-    # 2022-02-09: Changed deadline from 500 -> None. Deadline leads to
-    # non-actionable, flaky CI failures (# GH 24641, 44969, 45118, 44969)
+    "pandas_ci",
+    database=None,
     deadline=None,
-    suppress_health_check=tuple(hypothesis_health_checks),
+    max_examples=15,
+    suppress_health_check=(
+        hypothesis.HealthCheck.too_slow,
+        hypothesis.HealthCheck.differing_executors,
+    ),
 )
-hypothesis.settings.load_profile("ci")
+hypothesis.settings.load_profile("pandas_ci")
 
 # Registering these strategies makes them globally available via st.from_type,
 # which is use for offsets in tests/tseries/offsets/test_offsets_properties.py
 
@@ -575,7 +575,7 @@ def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike:
             # GH 10696/18593/18630
             dtype = self.dtype.update_dtype(dtype)
             self = self.copy() if copy else self
-            result = self._set_dtype(dtype)
+            result = self._set_dtype(dtype, copy=False)
 
         elif isinstance(dtype, ExtensionDtype):
             return super().astype(dtype, copy=copy)
@@ -670,13 +670,15 @@ def _from_inferred_categories(
         if known_categories:
             # Recode from observation order to dtype.categories order.
             categories = dtype.categories
-            codes = recode_for_categories(inferred_codes, cats, categories)
+            codes = recode_for_categories(inferred_codes, cats, categories, copy=False)
         elif not cats.is_monotonic_increasing:
             # Sort categories and recode for unknown categories.
             unsorted = cats.copy()
             categories = cats.sort_values()
 
-            codes = recode_for_categories(inferred_codes, unsorted, categories)
+            codes = recode_for_categories(
+                inferred_codes, unsorted, categories, copy=False
+            )
             dtype = CategoricalDtype(categories, ordered=False)
         else:
             dtype = CategoricalDtype(cats, ordered=False)
@@ -945,7 +947,7 @@ def _set_categories(self, categories, fastpath: bool = False) -> None:
 
         super().__init__(self._ndarray, new_dtype)
 
-    def _set_dtype(self, dtype: CategoricalDtype) -> Self:
+    def _set_dtype(self, dtype: CategoricalDtype, *, copy: bool) -> Self:
         """
         Internal method for directly updating the CategoricalDtype
 
@@ -958,7 +960,9 @@ def _set_dtype(self, dtype: CategoricalDtype) -> Self:
         We don't do any validation here. It's assumed that the dtype is
         a (valid) instance of `CategoricalDtype`.
         """
-        codes = recode_for_categories(self.codes, self.categories, dtype.categories)
+        codes = recode_for_categories(
+            self.codes, self.categories, dtype.categories, copy=copy
+        )
         return type(self)._simple_new(codes, dtype=dtype)
 
     def set_ordered(self, value: bool) -> Self:
@@ -1152,7 +1156,7 @@ def set_categories(
             codes = cat._codes
         else:
             codes = recode_for_categories(
-                cat.codes, cat.categories, new_dtype.categories
+                cat.codes, cat.categories, new_dtype.categories, copy=False
             )
         NDArrayBacked.__init__(cat, codes, new_dtype)
         return cat
@@ -3004,7 +3008,7 @@ def _get_codes_for_values(
 
 
 def recode_for_categories(
-    codes: np.ndarray, old_categories, new_categories, copy: bool = True
+    codes: np.ndarray, old_categories, new_categories, *, copy: bool
 ) -> np.ndarray:
     """
     Convert a set of codes for to a new set of categories
@@ -3025,7 +3029,7 @@ def recode_for_categories(
     >>> old_cat = pd.Index(["b", "a", "c"])
     >>> new_cat = pd.Index(["a", "b"])
     >>> codes = np.array([0, 1, 1, 2])
-    >>> recode_for_categories(codes, old_cat, new_cat)
+    >>> recode_for_categories(codes, old_cat, new_cat, copy=True)
     array([ 1,  0,  0, -1], dtype=int8)
     """
     if len(old_categories) == 0:
 
@@ -12,6 +12,7 @@
 import numpy as np
 
 from pandas._libs import (
+    algos as libalgos,
     lib,
     missing as libmissing,
 )
@@ -992,6 +993,49 @@ def copy(self) -> Self:
         mask = self._mask.copy()
         return self._simple_new(data, mask)
 
+    def _rank(
+        self,
+        *,
+        axis: AxisInt = 0,
+        method: str = "average",
+        na_option: str = "keep",
+        ascending: bool = True,
+        pct: bool = False,
+    ):
+        # GH#62043 Avoid going through copy-making ensure_data in algorithms.rank
+        if axis != 0 or self.ndim != 1:
+            raise NotImplementedError
+
+        from pandas.core.arrays import FloatingArray
+
+        data = self._data
+        if data.dtype.kind == "b":
+            data = data.view("uint8")
+
+        result = libalgos.rank_1d(
+            data,
+            is_datetimelike=False,
+            ties_method=method,
+            ascending=ascending,
+            na_option=na_option,
+            pct=pct,
+            mask=self.isna(),
+        )
+        if na_option in ["top", "bottom"]:
+            mask = np.zeros(self.shape, dtype=bool)
+        else:
+            mask = self._mask.copy()
+
+        if method != "average" and not pct:
+            if na_option not in ["top", "bottom"]:
+                result[self._mask] = 0  # avoid warning on casting
+            result = result.astype("uint64", copy=False)
+            from pandas.core.arrays import IntegerArray
+
+            return IntegerArray(result, mask=mask)
+
+        return FloatingArray(result, mask=mask)
+
     @doc(ExtensionArray.duplicated)
     def duplicated(
         self, keep: Literal["first", "last", False] = "first"