Skip to content

Commit 94716b5

Browse files
committed
resolved merge from main for ssl error pr for doc build page
2 parents 090c0b7 + 364ca58 commit 94716b5

File tree

7 files changed

+160
-4
lines changed

7 files changed

+160
-4
lines changed

doc/source/user_guide/indexing.rst

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1732,3 +1732,49 @@ Why does assignment fail when using chained indexing?
17321732
This means that chained indexing will never work.
17331733
See :ref:`this section <copy_on_write_chained_assignment>`
17341734
for more context.
1735+
1736+
.. _indexing.series_assignment:
1737+
1738+
Series Assignment and Index Alignment
1739+
-------------------------------------
1740+
1741+
When assigning a Series to a DataFrame column, pandas performs automatic alignment
1742+
based on index labels. This is a fundamental behavior that can be surprising to
1743+
new users who might expect positional assignment.
1744+
1745+
Key Points:
1746+
~~~~~~~~~~~
1747+
1748+
* Series values are matched to DataFrame rows by index label
1749+
* Position/order in the Series doesn't matter
1750+
* Missing index labels result in NaN values
1751+
* This behavior is consistent across df[col] = series and df.loc[:, col] = series
1752+
1753+
Examples:
1754+
.. ipython:: python
1755+
1756+
import pandas as pd
1757+
1758+
# Create a DataFrame
1759+
df = pd.DataFrame({'values': [1, 2, 3]}, index=['x', 'y', 'z'])
1760+
1761+
# Series with matching indices (different order)
1762+
s1 = pd.Series([10, 20, 30], index=['z', 'x', 'y'])
1763+
df['aligned'] = s1 # Aligns by index, not position
1764+
print(df)
1765+
1766+
# Series with partial index match
1767+
s2 = pd.Series([100, 200], index=['x', 'z'])
1768+
df['partial'] = s2 # Missing 'y' gets NaN
1769+
print(df)
1770+
1771+
# Series with non-matching indices
1772+
s3 = pd.Series([1000, 2000], index=['a', 'b'])
1773+
df['nomatch'] = s3 # All values become NaN
1774+
print(df)
1775+
1776+
1777+
#Avoiding Confusion:
1778+
#If you want positional assignment instead of index alignment:
1779+
# reset the Series index to match DataFrame index
1780+
df['s1_values'] = s1.reindex(df.index)

doc/source/whatsnew/v3.0.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -687,6 +687,7 @@ Bug fixes
687687
Categorical
688688
^^^^^^^^^^^
689689
- Bug in :func:`Series.apply` where ``nan`` was ignored for :class:`CategoricalDtype` (:issue:`59938`)
690+
- Bug in :meth:`Categorical.astype` where ``copy=False`` would still trigger a copy of the codes (:issue:`62000`)
690691
- Bug in :meth:`DataFrame.pivot` and :meth:`DataFrame.set_index` raising an ``ArrowNotImplementedError`` for columns with pyarrow dictionary dtype (:issue:`53051`)
691692
- Bug in :meth:`Series.convert_dtypes` with ``dtype_backend="pyarrow"`` where empty :class:`CategoricalDtype` :class:`Series` raised an error or got converted to ``null[pyarrow]`` (:issue:`59934`)
692693
-

pandas/core/arrays/categorical.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -575,7 +575,7 @@ def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike:
575575
# GH 10696/18593/18630
576576
dtype = self.dtype.update_dtype(dtype)
577577
self = self.copy() if copy else self
578-
result = self._set_dtype(dtype)
578+
result = self._set_dtype(dtype, copy=False)
579579

580580
elif isinstance(dtype, ExtensionDtype):
581581
return super().astype(dtype, copy=copy)
@@ -945,7 +945,7 @@ def _set_categories(self, categories, fastpath: bool = False) -> None:
945945

946946
super().__init__(self._ndarray, new_dtype)
947947

948-
def _set_dtype(self, dtype: CategoricalDtype) -> Self:
948+
def _set_dtype(self, dtype: CategoricalDtype, copy: bool = True) -> Self:
949949
"""
950950
Internal method for directly updating the CategoricalDtype
951951
@@ -958,7 +958,9 @@ def _set_dtype(self, dtype: CategoricalDtype) -> Self:
958958
We don't do any validation here. It's assumed that the dtype is
959959
a (valid) instance of `CategoricalDtype`.
960960
"""
961-
codes = recode_for_categories(self.codes, self.categories, dtype.categories)
961+
codes = recode_for_categories(
962+
self.codes, self.categories, dtype.categories, copy
963+
)
962964
return type(self)._simple_new(codes, dtype=dtype)
963965

964966
def set_ordered(self, value: bool) -> Self:

pandas/core/frame.py

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4213,6 +4213,89 @@ def isetitem(self, loc, value) -> None:
42134213
self._iset_item_mgr(loc, arraylike, inplace=False, refs=refs)
42144214

42154215
def __setitem__(self, key, value) -> None:
4216+
"""
4217+
Set item(s) in DataFrame by key.
4218+
4219+
This method allows you to set the values of one or more columns in the
4220+
DataFrame using a key. If the key does not exist, a new
4221+
column will be created.
4222+
4223+
Parameters
4224+
----------
4225+
key : The object(s) in the index which are to be assigned to
4226+
Column label(s) to set. Can be a single column name, list of column names,
4227+
or tuple for MultiIndex columns.
4228+
value : scalar, array-like, Series, or DataFrame
4229+
Value(s) to set for the specified key(s).
4230+
4231+
Returns
4232+
-------
4233+
None
4234+
This method does not return a value.
4235+
4236+
See Also
4237+
--------
4238+
DataFrame.loc : Access and set values by label-based indexing.
4239+
DataFrame.iloc : Access and set values by position-based indexing.
4240+
DataFrame.assign : Assign new columns to a DataFrame.
4241+
4242+
Notes
4243+
-----
4244+
When assigning a Series to a DataFrame column, pandas aligns the Series
4245+
by index labels, not by position. This means:
4246+
4247+
* Values from the Series are matched to DataFrame rows by index label
4248+
* If a Series index label doesn't exist in the DataFrame index, it's ignored
4249+
* If a DataFrame index label doesn't exist in the Series index, NaN is assigned
4250+
* The order of values in the Series doesn't matter; only the index labels matter
4251+
4252+
Examples
4253+
--------
4254+
Basic column assignment:
4255+
4256+
>>> df = pd.DataFrame({"A": [1, 2, 3]})
4257+
>>> df["B"] = [4, 5, 6] # Assigns by position
4258+
>>> df
4259+
A B
4260+
0 1 4
4261+
1 2 5
4262+
2 3 6
4263+
4264+
Series assignment with index alignment:
4265+
4266+
>>> df = pd.DataFrame({"A": [1, 2, 3]}, index=[0, 1, 2])
4267+
>>> s = pd.Series([10, 20], index=[1, 3]) # Note: index 3 doesn't exist in df
4268+
>>> df["B"] = s # Assigns by index label, not position
4269+
>>> df
4270+
A B
4271+
0 1 NaN
4272+
1 2 10
4273+
2 3 NaN
4274+
4275+
Series assignment with partial index match:
4276+
4277+
>>> df = pd.DataFrame({"A": [1, 2, 3, 4]}, index=["a", "b", "c", "d"])
4278+
>>> s = pd.Series([100, 200], index=["b", "d"])
4279+
>>> df["B"] = s
4280+
>>> df
4281+
A B
4282+
a 1 NaN
4283+
b 2 100
4284+
c 3 NaN
4285+
d 4 200
4286+
4287+
Series index labels NOT in DataFrame, ignored:
4288+
4289+
>>> df = pd.DataFrame({"A": [1, 2, 3]}, index=["x", "y", "z"])
4290+
>>> s = pd.Series([10, 20, 30, 40, 50], index=["x", "y", "a", "b", "z"])
4291+
>>> df["B"] = s
4292+
>>> df
4293+
A B
4294+
x 1 10
4295+
y 2 20
4296+
z 3 50
4297+
# Values for 'a' and 'b' are completely ignored!
4298+
"""
42164299
if not PYPY:
42174300
if sys.getrefcount(self) <= 3:
42184301
warnings.warn(

pandas/core/indexing.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -609,6 +609,22 @@ def loc(self) -> _LocIndexer:
609609
610610
Please see the :ref:`user guide<advanced.advanced_hierarchical>`
611611
for more details and explanations of advanced indexing.
612+
613+
**Assignment with Series**
614+
615+
When assigning a Series to .loc[row_indexer, col_indexer], pandas aligns
616+
the Series by index labels, not by order or position.
617+
618+
Series assignment with .loc and index alignment:
619+
620+
>>> df = pd.DataFrame({"A": [1, 2, 3]}, index=[0, 1, 2])
621+
>>> s = pd.Series([10, 20], index=[1, 0]) # Note reversed order
622+
>>> df.loc[:, "B"] = s # Aligns by index, not order
623+
>>> df
624+
A B
625+
0 1 20.0
626+
1 2 10.0
627+
2 3 NaN
612628
"""
613629
return _LocIndexer("loc", self)
614630

pandas/core/series.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1972,7 +1972,7 @@ def groupby(
19721972
as_index: bool = True,
19731973
sort: bool = True,
19741974
group_keys: bool = True,
1975-
observed: bool = False,
1975+
observed: bool = True,
19761976
dropna: bool = True,
19771977
) -> SeriesGroupBy:
19781978
from pandas.core.groupby.generic import SeriesGroupBy

pandas/tests/arrays/categorical/test_astype.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,14 @@ def test_astype_category(self, dtype_ordered, ordered):
130130
expected = cat
131131
tm.assert_categorical_equal(result, expected)
132132

133+
def test_astype_category_copy_false_nocopy_codes(self):
134+
# GH#62000
135+
cat = Categorical([3, 2, 4, 1])
136+
new = cat.astype("category", copy=False)
137+
assert tm.shares_memory(new.codes, cat.codes)
138+
new = cat.astype("category", copy=True)
139+
assert not tm.shares_memory(new.codes, cat.codes)
140+
133141
def test_astype_object_datetime_categories(self):
134142
# GH#40754
135143
cat = Categorical(to_datetime(["2021-03-27", NaT]))

0 commit comments

Comments
 (0)