pandas-dev
diff --git a/‎.github/workflows/wheels.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/wheels.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.pre-commit-config.yaml‎
Lines changed: 3 additions & 3 deletions b/‎.pre-commit-config.yaml‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎doc/source/whatsnew/v3.0.0.rst‎
Lines changed: 1 addition & 0 deletions b/‎doc/source/whatsnew/v3.0.0.rst‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎pandas/_libs/index.pyx‎
Lines changed: 1 addition & 1 deletion b/‎pandas/_libs/index.pyx‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎pandas/conftest.py‎
Lines changed: 10 additions & 16 deletions b/‎pandas/conftest.py‎
Lines changed: 10 additions & 16 deletions
diff --git a/‎pandas/core/arrays/categorical.py‎
Lines changed: 9 additions & 7 deletions b/‎pandas/core/arrays/categorical.py‎
Lines changed: 9 additions & 7 deletions
diff --git a/‎pandas/core/arrays/masked.py‎
Lines changed: 44 additions & 0 deletions b/‎pandas/core/arrays/masked.py‎
Lines changed: 44 additions & 0 deletions
diff --git a/‎pandas/core/common.py‎
Lines changed: 2 additions & 0 deletions b/‎pandas/core/common.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎pandas/core/dtypes/concat.py‎
Lines changed: 2 additions & 1 deletion b/‎pandas/core/dtypes/concat.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎pandas/core/groupby/categorical.py‎
Lines changed: 1 addition & 1 deletion b/‎pandas/core/groupby/categorical.py‎
Lines changed: 1 addition & 1 deletion
@@ -162,7 +162,7 @@ jobs:
         run: echo "sdist_name=$(cd ./dist && ls -d */)" >> "$GITHUB_ENV"
 
       - name: Build wheels
-        uses: pypa/[email protected].1
+        uses: pypa/[email protected].3
         with:
          package-dir: ./dist/${{ startsWith(matrix.buildplat[1], 'macosx') && env.sdist_name || needs.build_sdist.outputs.sdist_file }}
         env:
 
@@ -19,7 +19,7 @@ ci:
     skip: [pyright, mypy]
 repos:
 -   repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.12.2
+    rev: v0.12.7
     hooks:
     -   id: ruff
         args: [--exit-non-zero-on-fix]
@@ -95,14 +95,14 @@ repos:
     - id: sphinx-lint
       args: ["--enable", "all", "--disable", "line-too-long"]
 -   repo: https://github.com/pre-commit/mirrors-clang-format
-    rev: v20.1.7
+    rev: v20.1.8
     hooks:
     - id: clang-format
       files: ^pandas/_libs/src|^pandas/_libs/include
       args: [-i]
       types_or: [c, c++]
 -   repo: https://github.com/trim21/pre-commit-mirror-meson
-    rev: v1.8.2
+    rev: v1.8.3
     hooks:
     - id: meson-fmt
       args: ['--inplace']
 
@@ -81,6 +81,7 @@ Other enhancements
 - :meth:`Rolling.agg`, :meth:`Expanding.agg` and :meth:`ExponentialMovingWindow.agg` now accept :class:`NamedAgg` aggregations through ``**kwargs`` (:issue:`28333`)
 - :meth:`Series.map` can now accept kwargs to pass on to func (:issue:`59814`)
 - :meth:`Series.map` now accepts an ``engine`` parameter to allow execution with a third-party execution engine (:issue:`61125`)
+- :meth:`Series.rank` and :meth:`DataFrame.rank` with numpy-nullable dtypes preserve ``NA`` values and return ``UInt64`` dtype where appropriate instead of casting ``NA`` to ``NaN`` with ``float64`` dtype (:issue:`62043`)
 - :meth:`Series.str.get_dummies` now accepts a  ``dtype`` parameter to specify the dtype of the resulting DataFrame (:issue:`47872`)
 - :meth:`pandas.concat` will raise a ``ValueError`` when ``ignore_index=True`` and ``keys`` is not ``None`` (:issue:`59274`)
 - :py:class:`frozenset` elements in pandas objects are now natively printed (:issue:`60690`)
 
@@ -803,7 +803,7 @@ cdef class BaseMultiIndexCodesEngine:
         int_keys : 1-dimensional array of dtype uint64 or object
             Integers representing one combination each
         """
-        level_codes = list(target._recode_for_new_levels(self.levels))
+        level_codes = list(target._recode_for_new_levels(self.levels, copy=True))
         for i, codes in enumerate(level_codes):
             if self.levels[i].hasnans:
                 na_index = self.levels[i].isna().nonzero()[0][0]
 
@@ -176,25 +176,19 @@ def pytest_collection_modifyitems(items, config) -> None:
                 ignore_doctest_warning(item, path, message)
 
 
-hypothesis_health_checks = [
-    hypothesis.HealthCheck.too_slow,
-    hypothesis.HealthCheck.differing_executors,
-]
-
-# Hypothesis
+# Similar to "ci" config in
+# https://hypothesis.readthedocs.io/en/latest/reference/api.html#built-in-profiles
 hypothesis.settings.register_profile(
-    "ci",
-    # Hypothesis timing checks are tuned for scalars by default, so we bump
-    # them from 200ms to 500ms per test case as the global default.  If this
-    # is too short for a specific test, (a) try to make it faster, and (b)
-    # if it really is slow add `@settings(deadline=...)` with a working value,
-    # or `deadline=None` to entirely disable timeouts for that test.
-    # 2022-02-09: Changed deadline from 500 -> None. Deadline leads to
-    # non-actionable, flaky CI failures (# GH 24641, 44969, 45118, 44969)
+    "pandas_ci",
+    database=None,
     deadline=None,
-    suppress_health_check=tuple(hypothesis_health_checks),
+    max_examples=15,
+    suppress_health_check=(
+        hypothesis.HealthCheck.too_slow,
+        hypothesis.HealthCheck.differing_executors,
+    ),
 )
-hypothesis.settings.load_profile("ci")
+hypothesis.settings.load_profile("pandas_ci")
 
 # Registering these strategies makes them globally available via st.from_type,
 # which is use for offsets in tests/tseries/offsets/test_offsets_properties.py
 
@@ -670,13 +670,15 @@ def _from_inferred_categories(
         if known_categories:
             # Recode from observation order to dtype.categories order.
             categories = dtype.categories
-            codes = recode_for_categories(inferred_codes, cats, categories)
+            codes = recode_for_categories(inferred_codes, cats, categories, copy=False)
         elif not cats.is_monotonic_increasing:
             # Sort categories and recode for unknown categories.
             unsorted = cats.copy()
             categories = cats.sort_values()
 
-            codes = recode_for_categories(inferred_codes, unsorted, categories)
+            codes = recode_for_categories(
+                inferred_codes, unsorted, categories, copy=False
+            )
             dtype = CategoricalDtype(categories, ordered=False)
         else:
             dtype = CategoricalDtype(cats, ordered=False)
@@ -945,7 +947,7 @@ def _set_categories(self, categories, fastpath: bool = False) -> None:
 
         super().__init__(self._ndarray, new_dtype)
 
-    def _set_dtype(self, dtype: CategoricalDtype, copy: bool = True) -> Self:
+    def _set_dtype(self, dtype: CategoricalDtype, *, copy: bool) -> Self:
         """
         Internal method for directly updating the CategoricalDtype
 
@@ -959,7 +961,7 @@ def _set_dtype(self, dtype: CategoricalDtype, copy: bool = True) -> Self:
         a (valid) instance of `CategoricalDtype`.
         """
         codes = recode_for_categories(
-            self.codes, self.categories, dtype.categories, copy
+            self.codes, self.categories, dtype.categories, copy=copy
         )
         return type(self)._simple_new(codes, dtype=dtype)
 
@@ -1154,7 +1156,7 @@ def set_categories(
             codes = cat._codes
         else:
             codes = recode_for_categories(
-                cat.codes, cat.categories, new_dtype.categories
+                cat.codes, cat.categories, new_dtype.categories, copy=False
             )
         NDArrayBacked.__init__(cat, codes, new_dtype)
         return cat
@@ -3006,7 +3008,7 @@ def _get_codes_for_values(
 
 
 def recode_for_categories(
-    codes: np.ndarray, old_categories, new_categories, copy: bool = True
+    codes: np.ndarray, old_categories, new_categories, *, copy: bool
 ) -> np.ndarray:
     """
     Convert a set of codes for to a new set of categories
@@ -3027,7 +3029,7 @@ def recode_for_categories(
     >>> old_cat = pd.Index(["b", "a", "c"])
     >>> new_cat = pd.Index(["a", "b"])
     >>> codes = np.array([0, 1, 1, 2])
-    >>> recode_for_categories(codes, old_cat, new_cat)
+    >>> recode_for_categories(codes, old_cat, new_cat, copy=True)
     array([ 1,  0,  0, -1], dtype=int8)
     """
     if len(old_categories) == 0:
 
@@ -12,6 +12,7 @@
 import numpy as np
 
 from pandas._libs import (
+    algos as libalgos,
     lib,
     missing as libmissing,
 )
@@ -992,6 +993,49 @@ def copy(self) -> Self:
         mask = self._mask.copy()
         return self._simple_new(data, mask)
 
+    def _rank(
+        self,
+        *,
+        axis: AxisInt = 0,
+        method: str = "average",
+        na_option: str = "keep",
+        ascending: bool = True,
+        pct: bool = False,
+    ):
+        # GH#62043 Avoid going through copy-making ensure_data in algorithms.rank
+        if axis != 0 or self.ndim != 1:
+            raise NotImplementedError
+
+        from pandas.core.arrays import FloatingArray
+
+        data = self._data
+        if data.dtype.kind == "b":
+            data = data.view("uint8")
+
+        result = libalgos.rank_1d(
+            data,
+            is_datetimelike=False,
+            ties_method=method,
+            ascending=ascending,
+            na_option=na_option,
+            pct=pct,
+            mask=self.isna(),
+        )
+        if na_option in ["top", "bottom"]:
+            mask = np.zeros(self.shape, dtype=bool)
+        else:
+            mask = self._mask.copy()
+
+        if method != "average" and not pct:
+            if na_option not in ["top", "bottom"]:
+                result[self._mask] = 0  # avoid warning on casting
+            result = result.astype("uint64", copy=False)
+            from pandas.core.arrays import IntegerArray
+
+            return IntegerArray(result, mask=mask)
+
+        return FloatingArray(result, mask=mask)
+
     @doc(ExtensionArray.duplicated)
     def duplicated(
         self, keep: Literal["first", "last", False] = "first"
 
@@ -91,8 +91,10 @@ def consensus_name_attr(objs):
         try:
             if obj.name != name:
                 name = None
+                break
         except ValueError:
             name = None
+            break
     return name
 
 
 
@@ -318,7 +318,8 @@ def _maybe_unwrap(x):
             categories = categories.sort_values()
 
         new_codes = [
-            recode_for_categories(c.codes, c.categories, categories) for c in to_union
+            recode_for_categories(c.codes, c.categories, categories, copy=False)
+            for c in to_union
         ]
         new_codes = np.concatenate(new_codes)
     else:
 
@@ -53,7 +53,7 @@ def recode_for_groupby(c: Categorical, sort: bool, observed: bool) -> Categorica
 
         # we recode according to the uniques
         categories = c.categories.take(take_codes)
-        codes = recode_for_categories(c.codes, c.categories, categories)
+        codes = recode_for_categories(c.codes, c.categories, categories, copy=False)
 
         # return a new categorical that maps our new codes
         # and categories
Original file line number	Diff line number	Diff line change
`@@ -318,7 +318,8 @@ def _maybe_unwrap(x):`
`318`	`318`	`categories = categories.sort_values()`
`319`	`319`
`320`	`320`	`new_codes = [`
`321`		`- recode_for_categories(c.codes, c.categories, categories) for c in to_union`
	`321`	`+ recode_for_categories(c.codes, c.categories, categories, copy=False)`
	`322`	`+ for c in to_union`
`322`	`323`	`]`
`323`	`324`	`new_codes = np.concatenate(new_codes)`
`324`	`325`	`else:`