pandas-dev
diff --git a/‎.github/workflows/unit-tests.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/unit-tests.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/wheels.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/wheels.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.pre-commit-config.yaml‎
Lines changed: 3 additions & 3 deletions b/‎.pre-commit-config.yaml‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎doc/source/development/contributing_documentation.rst‎
Lines changed: 5 additions & 0 deletions b/‎doc/source/development/contributing_documentation.rst‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎doc/source/user_guide/indexing.rst‎
Lines changed: 46 additions & 0 deletions b/‎doc/source/user_guide/indexing.rst‎
Lines changed: 46 additions & 0 deletions
diff --git a/‎doc/source/whatsnew/v2.3.2.rst‎
Lines changed: 1 addition & 1 deletion b/‎doc/source/whatsnew/v2.3.2.rst‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/source/whatsnew/v3.0.0.rst‎
Lines changed: 6 additions & 1 deletion b/‎doc/source/whatsnew/v3.0.0.rst‎
Lines changed: 6 additions & 1 deletion
diff --git a/‎pandas/_libs/groupby.pyx‎
Lines changed: 2 additions & 3 deletions b/‎pandas/_libs/groupby.pyx‎
Lines changed: 2 additions & 3 deletions
diff --git a/‎pandas/_libs/index.pyx‎
Lines changed: 1 addition & 1 deletion b/‎pandas/_libs/index.pyx‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎pandas/conftest.py‎
Lines changed: 10 additions & 16 deletions b/‎pandas/conftest.py‎
Lines changed: 10 additions & 16 deletions
@@ -71,7 +71,7 @@ jobs:
             # It will be temporarily activated during tests with locale.setlocale
             extra_loc: "zh_CN"
             platform: ubuntu-24.04
-          - name: "Past no infer strings"
+          - name: "PANDAS_FUTURE_INFER_STRING=0"
             env_file: actions-312.yaml
             pandas_future_infer_string: "0"
             platform: ubuntu-24.04
 
@@ -162,7 +162,7 @@ jobs:
         run: echo "sdist_name=$(cd ./dist && ls -d */)" >> "$GITHUB_ENV"
 
       - name: Build wheels
-        uses: pypa/[email protected].1
+        uses: pypa/[email protected].3
         with:
          package-dir: ./dist/${{ startsWith(matrix.buildplat[1], 'macosx') && env.sdist_name || needs.build_sdist.outputs.sdist_file }}
         env:
 
@@ -19,7 +19,7 @@ ci:
     skip: [pyright, mypy]
 repos:
 -   repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.12.2
+    rev: v0.12.7
     hooks:
     -   id: ruff
         args: [--exit-non-zero-on-fix]
@@ -95,14 +95,14 @@ repos:
     - id: sphinx-lint
       args: ["--enable", "all", "--disable", "line-too-long"]
 -   repo: https://github.com/pre-commit/mirrors-clang-format
-    rev: v20.1.7
+    rev: v20.1.8
     hooks:
     - id: clang-format
       files: ^pandas/_libs/src|^pandas/_libs/include
       args: [-i]
       types_or: [c, c++]
 -   repo: https://github.com/trim21/pre-commit-mirror-meson
-    rev: v1.8.2
+    rev: v1.8.3
     hooks:
     - id: meson-fmt
       args: ['--inplace']
 
@@ -157,6 +157,11 @@ If you want to do a full clean build, do::
     python make.py clean
     python make.py html
 
+.. tip::
+  If ``python make.py html`` exits with an error status,
+  try running the command ``python make.py html --num-jobs=1``
+  to identify the cause of the error.
+
 You can tell ``make.py`` to compile only a single section of the docs, greatly
 reducing the turn-around time for checking your changes.
 
 
@@ -1732,3 +1732,49 @@ Why does assignment fail when using chained indexing?
 This means that chained indexing will never work.
 See :ref:`this section <copy_on_write_chained_assignment>`
 for more context.
+
+.. _indexing.series_assignment:
+
+Series Assignment and Index Alignment
+-------------------------------------
+
+When assigning a Series to a DataFrame column, pandas performs automatic alignment
+based on index labels. This is a fundamental behavior that can be surprising to
+new users who might expect positional assignment.
+
+Key Points:
+~~~~~~~~~~~
+
+* Series values are matched to DataFrame rows by index label
+* Position/order in the Series doesn't matter
+* Missing index labels result in NaN values
+* This behavior is consistent across df[col] = series and df.loc[:, col] = series
+
+Examples:
+.. ipython:: python
+
+   import pandas as pd
+
+   # Create a DataFrame
+   df = pd.DataFrame({'values': [1, 2, 3]}, index=['x', 'y', 'z'])
+
+   # Series with matching indices (different order)
+   s1 = pd.Series([10, 20, 30], index=['z', 'x', 'y'])
+   df['aligned'] = s1  # Aligns by index, not position
+   print(df)
+
+   # Series with partial index match
+   s2 = pd.Series([100, 200], index=['x', 'z'])
+   df['partial'] = s2  # Missing 'y' gets NaN
+   print(df)
+
+   # Series with non-matching indices
+   s3 = pd.Series([1000, 2000], index=['a', 'b'])
+   df['nomatch'] = s3  # All values become NaN
+   print(df)
+
+
+   #Avoiding Confusion:
+   #If you want positional assignment instead of index alignment:
+   # reset the Series index to match DataFrame index
+   df['s1_values'] = s1.reindex(df.index)
@@ -25,7 +25,7 @@ Bug fixes
 - Fix :meth:`~DataFrame.to_json` with ``orient="table"`` to correctly use the
   "string" type in the JSON Table Schema for :class:`StringDtype` columns
   (:issue:`61889`)
-
+- Boolean operations (``|``, ``&``, ``^``) with bool-dtype objects on the left and :class:`StringDtype` objects on the right now cast the string to bool, with a deprecation warning (:issue:`60234`)
 
 .. ---------------------------------------------------------------------------
 .. _whatsnew_232.contributors:
 
@@ -81,6 +81,7 @@ Other enhancements
 - :meth:`Rolling.agg`, :meth:`Expanding.agg` and :meth:`ExponentialMovingWindow.agg` now accept :class:`NamedAgg` aggregations through ``**kwargs`` (:issue:`28333`)
 - :meth:`Series.map` can now accept kwargs to pass on to func (:issue:`59814`)
 - :meth:`Series.map` now accepts an ``engine`` parameter to allow execution with a third-party execution engine (:issue:`61125`)
+- :meth:`Series.rank` and :meth:`DataFrame.rank` with numpy-nullable dtypes preserve ``NA`` values and return ``UInt64`` dtype where appropriate instead of casting ``NA`` to ``NaN`` with ``float64`` dtype (:issue:`62043`)
 - :meth:`Series.str.get_dummies` now accepts a  ``dtype`` parameter to specify the dtype of the resulting DataFrame (:issue:`47872`)
 - :meth:`pandas.concat` will raise a ``ValueError`` when ``ignore_index=True`` and ``keys`` is not ``None`` (:issue:`59274`)
 - :py:class:`frozenset` elements in pandas objects are now natively printed (:issue:`60690`)
@@ -89,12 +90,14 @@ Other enhancements
 - Added support to read and write from and to Apache Iceberg tables with the new :func:`read_iceberg` and :meth:`DataFrame.to_iceberg` functions (:issue:`61383`)
 - Errors occurring during SQL I/O will now throw a generic :class:`.DatabaseError` instead of the raw Exception type from the underlying driver manager library (:issue:`60748`)
 - Implemented :meth:`Series.str.isascii` and :meth:`Series.str.isascii` (:issue:`59091`)
+- Improve the resulting dtypes in :meth:`DataFrame.where` and :meth:`DataFrame.mask` with :class:`ExtensionDtype` ``other`` (:issue:`62038`)
 - Improved deprecation message for offset aliases (:issue:`60820`)
 - Multiplying two :class:`DateOffset` objects will now raise a ``TypeError`` instead of a ``RecursionError`` (:issue:`59442`)
 - Restore support for reading Stata 104-format and enable reading 103-format dta files (:issue:`58554`)
 - Support passing a :class:`Iterable[Hashable]` input to :meth:`DataFrame.drop_duplicates` (:issue:`59237`)
 - Support reading Stata 102-format (Stata 1) dta files (:issue:`58978`)
 - Support reading Stata 110-format (Stata 7) dta files (:issue:`47176`)
+-
 
 .. ---------------------------------------------------------------------------
 .. _whatsnew_300.notable_bug_fixes:
@@ -539,7 +542,7 @@ Renamed the following offset aliases (:issue:`57986`):
 
 Other Removals
 ^^^^^^^^^^^^^^
-- :class:`.DataFrameGroupBy.idxmin`, :class:`.DataFrameGroupBy.idxmax`, :class:`.SeriesGroupBy.idxmin`, and :class:`.SeriesGroupBy.idxmax` will now raise a ``ValueError`` when used with ``skipna=False`` and an NA value is encountered (:issue:`10694`)
+- :class:`.DataFrameGroupBy.idxmin`, :class:`.DataFrameGroupBy.idxmax`, :class:`.SeriesGroupBy.idxmin`, and :class:`.SeriesGroupBy.idxmax` will now raise a ``ValueError`` when a group has all NA values, or when used with ``skipna=False`` and any NA value is encountered (:issue:`10694`, :issue:`57745`)
 - :func:`concat` no longer ignores empty objects when determining output dtypes (:issue:`39122`)
 - :func:`concat` with all-NA entries no longer ignores the dtype of those entries when determining the result dtype (:issue:`40893`)
 - :func:`read_excel`, :func:`read_json`, :func:`read_html`, and :func:`read_xml` no longer accept raw string or byte representation of the data. That type of data must be wrapped in a :py:class:`StringIO` or :py:class:`BytesIO` (:issue:`53767`)
@@ -722,6 +725,7 @@ Bug fixes
 Categorical
 ^^^^^^^^^^^
 - Bug in :func:`Series.apply` where ``nan`` was ignored for :class:`CategoricalDtype` (:issue:`59938`)
+- Bug in :meth:`Categorical.astype` where ``copy=False`` would still trigger a copy of the codes (:issue:`62000`)
 - Bug in :meth:`DataFrame.pivot` and :meth:`DataFrame.set_index` raising an ``ArrowNotImplementedError`` for columns with pyarrow dictionary dtype (:issue:`53051`)
 - Bug in :meth:`Series.convert_dtypes` with ``dtype_backend="pyarrow"`` where empty :class:`CategoricalDtype` :class:`Series` raised an error or got converted to ``null[pyarrow]`` (:issue:`59934`)
 -
@@ -887,6 +891,7 @@ Groupby/resample/rolling
 - Bug in :meth:`DataFrame.ewm` and :meth:`Series.ewm` when passed ``times`` and aggregation functions other than mean (:issue:`51695`)
 - Bug in :meth:`DataFrame.resample` and :meth:`Series.resample` were not keeping the index name when the index had :class:`ArrowDtype` timestamp dtype (:issue:`61222`)
 - Bug in :meth:`DataFrame.resample` changing index type to :class:`MultiIndex` when the dataframe is empty and using an upsample method (:issue:`55572`)
+- Bug in :meth:`DataFrameGroupBy.agg` and :meth:`SeriesGroupBy.agg` that was returning numpy dtype values when input values are pyarrow dtype values, instead of returning pyarrow dtype values. (:issue:`53030`)
 - Bug in :meth:`DataFrameGroupBy.agg` that raises ``AttributeError`` when there is dictionary input and duplicated columns, instead of returning a DataFrame with the aggregation of all duplicate columns. (:issue:`55041`)
 - Bug in :meth:`DataFrameGroupBy.agg` where applying a user-defined function to an empty DataFrame returned a Series instead of an empty DataFrame. (:issue:`61503`)
 - Bug in :meth:`DataFrameGroupBy.apply` and :meth:`SeriesGroupBy.apply` for empty data frame with ``group_keys=False`` still creating output index using group keys. (:issue:`60471`)
 
@@ -2048,9 +2048,8 @@ def group_idxmin_idxmax(
         group_min_or_max = np.empty_like(out, dtype=values.dtype)
         seen = np.zeros_like(out, dtype=np.uint8)
 
-    # When using transform, we need a valid value for take in the case
-    # a category is not observed; these values will be dropped
-    out[:] = 0
+    # Sentinel for no valid values.
+    out[:] = -1
 
     with nogil(numeric_object_t is not object):
         for i in range(N):
 
@@ -803,7 +803,7 @@ cdef class BaseMultiIndexCodesEngine:
         int_keys : 1-dimensional array of dtype uint64 or object
             Integers representing one combination each
         """
-        level_codes = list(target._recode_for_new_levels(self.levels))
+        level_codes = list(target._recode_for_new_levels(self.levels, copy=True))
         for i, codes in enumerate(level_codes):
             if self.levels[i].hasnans:
                 na_index = self.levels[i].isna().nonzero()[0][0]
 
@@ -176,25 +176,19 @@ def pytest_collection_modifyitems(items, config) -> None:
                 ignore_doctest_warning(item, path, message)
 
 
-hypothesis_health_checks = [
-    hypothesis.HealthCheck.too_slow,
-    hypothesis.HealthCheck.differing_executors,
-]
-
-# Hypothesis
+# Similar to "ci" config in
+# https://hypothesis.readthedocs.io/en/latest/reference/api.html#built-in-profiles
 hypothesis.settings.register_profile(
-    "ci",
-    # Hypothesis timing checks are tuned for scalars by default, so we bump
-    # them from 200ms to 500ms per test case as the global default.  If this
-    # is too short for a specific test, (a) try to make it faster, and (b)
-    # if it really is slow add `@settings(deadline=...)` with a working value,
-    # or `deadline=None` to entirely disable timeouts for that test.
-    # 2022-02-09: Changed deadline from 500 -> None. Deadline leads to
-    # non-actionable, flaky CI failures (# GH 24641, 44969, 45118, 44969)
+    "pandas_ci",
+    database=None,
     deadline=None,
-    suppress_health_check=tuple(hypothesis_health_checks),
+    max_examples=15,
+    suppress_health_check=(
+        hypothesis.HealthCheck.too_slow,
+        hypothesis.HealthCheck.differing_executors,
+    ),
 )
-hypothesis.settings.load_profile("ci")
+hypothesis.settings.load_profile("pandas_ci")
 
 # Registering these strategies makes them globally available via st.from_type,
 # which is use for offsets in tests/tseries/offsets/test_offsets_properties.py