resolved merge from main for ssl error pr for doc build page

jeffersbaxter · jeffersbaxter · commit 94716b535e45 · 2025-08-02T21:47:04.000-07:00
diff --git a/doc/source/user_guide/indexing.rst b/doc/source/user_guide/indexing.rst
@@ -1732,3 +1732,49 @@ Why does assignment fail when using chained indexing?
 This means that chained indexing will never work.
 See :ref:`this section <copy_on_write_chained_assignment>`
 for more context.
+
+.. _indexing.series_assignment:
+
+Series Assignment and Index Alignment
+-------------------------------------
+
+When assigning a Series to a DataFrame column, pandas performs automatic alignment
+based on index labels. This is a fundamental behavior that can be surprising to
+new users who might expect positional assignment.
+
+Key Points:
+~~~~~~~~~~~
+
+* Series values are matched to DataFrame rows by index label
+* Position/order in the Series doesn't matter
+* Missing index labels result in NaN values
+* This behavior is consistent across df[col] = series and df.loc[:, col] = series
+
+Examples:
+.. ipython:: python
+
+   import pandas as pd
+
+   # Create a DataFrame
+   df = pd.DataFrame({'values': [1, 2, 3]}, index=['x', 'y', 'z'])
+
+   # Series with matching indices (different order)
+   s1 = pd.Series([10, 20, 30], index=['z', 'x', 'y'])
+   df['aligned'] = s1  # Aligns by index, not position
+   print(df)
+
+   # Series with partial index match
+   s2 = pd.Series([100, 200], index=['x', 'z'])
+   df['partial'] = s2  # Missing 'y' gets NaN
+   print(df)
+
+   # Series with non-matching indices
+   s3 = pd.Series([1000, 2000], index=['a', 'b'])
+   df['nomatch'] = s3  # All values become NaN
+   print(df)
+
+
+   #Avoiding Confusion:
+   #If you want positional assignment instead of index alignment:
+   # reset the Series index to match DataFrame index
+   df['s1_values'] = s1.reindex(df.index)
diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
@@ -687,6 +687,7 @@ Bug fixes
 Categorical
 ^^^^^^^^^^^
 - Bug in :func:`Series.apply` where ``nan`` was ignored for :class:`CategoricalDtype` (:issue:`59938`)
+- Bug in :meth:`Categorical.astype` where ``copy=False`` would still trigger a copy of the codes (:issue:`62000`)
 - Bug in :meth:`DataFrame.pivot` and :meth:`DataFrame.set_index` raising an ``ArrowNotImplementedError`` for columns with pyarrow dictionary dtype (:issue:`53051`)
 - Bug in :meth:`Series.convert_dtypes` with ``dtype_backend="pyarrow"`` where empty :class:`CategoricalDtype` :class:`Series` raised an error or got converted to ``null[pyarrow]`` (:issue:`59934`)
 -
diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
@@ -575,7 +575,7 @@ def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike:
             # GH 10696/18593/18630
             dtype = self.dtype.update_dtype(dtype)
             self = self.copy() if copy else self
-            result = self._set_dtype(dtype)
+            result = self._set_dtype(dtype, copy=False)
 
         elif isinstance(dtype, ExtensionDtype):
             return super().astype(dtype, copy=copy)
@@ -945,7 +945,7 @@ def _set_categories(self, categories, fastpath: bool = False) -> None:
 
         super().__init__(self._ndarray, new_dtype)
 
-    def _set_dtype(self, dtype: CategoricalDtype) -> Self:
+    def _set_dtype(self, dtype: CategoricalDtype, copy: bool = True) -> Self:
         """
         Internal method for directly updating the CategoricalDtype
 
@@ -958,7 +958,9 @@ def _set_dtype(self, dtype: CategoricalDtype) -> Self:
         We don't do any validation here. It's assumed that the dtype is
         a (valid) instance of `CategoricalDtype`.
         """
-        codes = recode_for_categories(self.codes, self.categories, dtype.categories)
+        codes = recode_for_categories(
+            self.codes, self.categories, dtype.categories, copy
+        )
         return type(self)._simple_new(codes, dtype=dtype)
 
     def set_ordered(self, value: bool) -> Self:
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -4213,6 +4213,89 @@ def isetitem(self, loc, value) -> None:
         self._iset_item_mgr(loc, arraylike, inplace=False, refs=refs)
 
     def __setitem__(self, key, value) -> None:
+        """
+        Set item(s) in DataFrame by key.
+
+        This method allows you to set the values of one or more columns in the
+        DataFrame using a key. If the key does not exist, a new
+        column will be created.
+
+        Parameters
+        ----------
+        key : The object(s) in the index which are to be assigned to
+            Column label(s) to set. Can be a single column name, list of column names,
+            or tuple for MultiIndex columns.
+        value : scalar, array-like, Series, or DataFrame
+            Value(s) to set for the specified key(s).
+
+        Returns
+        -------
+        None
+            This method does not return a value.
+
+        See Also
+        --------
+        DataFrame.loc : Access and set values by label-based indexing.
+        DataFrame.iloc : Access and set values by position-based indexing.
+        DataFrame.assign : Assign new columns to a DataFrame.
+
+        Notes
+        -----
+        When assigning a Series to a DataFrame column, pandas aligns the Series
+        by index labels, not by position. This means:
+
+        * Values from the Series are matched to DataFrame rows by index label
+        * If a Series index label doesn't exist in the DataFrame index, it's ignored
+        * If a DataFrame index label doesn't exist in the Series index, NaN is assigned
+        * The order of values in the Series doesn't matter; only the index labels matter
+
+        Examples
+        --------
+        Basic column assignment:
+
+        >>> df = pd.DataFrame({"A": [1, 2, 3]})
+        >>> df["B"] = [4, 5, 6]  # Assigns by position
+        >>> df
+            A  B
+        0  1  4
+        1  2  5
+        2  3  6
+
+        Series assignment with index alignment:
+
+        >>> df = pd.DataFrame({"A": [1, 2, 3]}, index=[0, 1, 2])
+        >>> s = pd.Series([10, 20], index=[1, 3])  # Note: index 3 doesn't exist in df
+        >>> df["B"] = s  # Assigns by index label, not position
+        >>> df
+            A   B
+        0  1 NaN
+        1  2  10
+        2  3 NaN
+
+        Series assignment with partial index match:
+
+        >>> df = pd.DataFrame({"A": [1, 2, 3, 4]}, index=["a", "b", "c", "d"])
+        >>> s = pd.Series([100, 200], index=["b", "d"])
+        >>> df["B"] = s
+        >>> df
+            A    B
+        a  1  NaN
+        b  2  100
+        c  3  NaN
+        d  4  200
+
+        Series index labels NOT in DataFrame, ignored:
+
+        >>> df = pd.DataFrame({"A": [1, 2, 3]}, index=["x", "y", "z"])
+        >>> s = pd.Series([10, 20, 30, 40, 50], index=["x", "y", "a", "b", "z"])
+        >>> df["B"] = s
+        >>> df
+           A   B
+        x  1  10
+        y  2  20
+        z  3  50
+        # Values for 'a' and 'b' are completely ignored!
+        """
         if not PYPY:
             if sys.getrefcount(self) <= 3:
                 warnings.warn(
diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
@@ -609,6 +609,22 @@ def loc(self) -> _LocIndexer:
 
         Please see the :ref:`user guide<advanced.advanced_hierarchical>`
         for more details and explanations of advanced indexing.
+
+        **Assignment with Series**
+
+        When assigning a Series to .loc[row_indexer, col_indexer], pandas aligns
+        the Series by index labels, not by order or position.
+
+        Series assignment with .loc and index alignment:
+
+        >>> df = pd.DataFrame({"A": [1, 2, 3]}, index=[0, 1, 2])
+        >>> s = pd.Series([10, 20], index=[1, 0])  # Note reversed order
+        >>> df.loc[:, "B"] = s  # Aligns by index, not order
+        >>> df
+           A   B
+        0  1  20.0
+        1  2  10.0
+        2  3 NaN
         """
         return _LocIndexer("loc", self)
 
diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -1972,7 +1972,7 @@ def groupby(
         as_index: bool = True,
         sort: bool = True,
         group_keys: bool = True,
-        observed: bool = False,
+        observed: bool = True,
         dropna: bool = True,
     ) -> SeriesGroupBy:
         from pandas.core.groupby.generic import SeriesGroupBy
diff --git a/pandas/tests/arrays/categorical/test_astype.py b/pandas/tests/arrays/categorical/test_astype.py
@@ -130,6 +130,14 @@ def test_astype_category(self, dtype_ordered, ordered):
             expected = cat
             tm.assert_categorical_equal(result, expected)
 
+    def test_astype_category_copy_false_nocopy_codes(self):
+        # GH#62000
+        cat = Categorical([3, 2, 4, 1])
+        new = cat.astype("category", copy=False)
+        assert tm.shares_memory(new.codes, cat.codes)
+        new = cat.astype("category", copy=True)
+        assert not tm.shares_memory(new.codes, cat.codes)
+
     def test_astype_object_datetime_categories(self):
         # GH#40754
         cat = Categorical(to_datetime(["2021-03-27", NaT]))

Original file line number	Diff line number	Diff line change
`@@ -687,6 +687,7 @@ Bug fixes`
`687`	`687`	`Categorical`
`688`	`688`	`^^^^^^^^^^^`
`689`	`689`	- Bug in :func:`Series.apply` where ``nan`` was ignored for :class:`CategoricalDtype` (:issue:`59938`)
	`690`	+- Bug in :meth:`Categorical.astype` where ``copy=False`` would still trigger a copy of the codes (:issue:`62000`)
`690`	`691`	- Bug in :meth:`DataFrame.pivot` and :meth:`DataFrame.set_index` raising an ``ArrowNotImplementedError`` for columns with pyarrow dictionary dtype (:issue:`53051`)
`691`	`692`	- Bug in :meth:`Series.convert_dtypes` with ``dtype_backend="pyarrow"`` where empty :class:`CategoricalDtype` :class:`Series` raised an error or got converted to ``null[pyarrow]`` (:issue:`59934`)
`692`	`693`	`-`