Skip to content

Commit 699a9db

Browse files
committed
Doc improvement fro setitem
1 parent cfa767f commit 699a9db

File tree

4 files changed

+158
-0
lines changed

4 files changed

+158
-0
lines changed

doc/source/user_guide/indexing.rst

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1732,3 +1732,53 @@ Why does assignment fail when using chained indexing?
17321732
This means that chained indexing will never work.
17331733
See :ref:`this section <copy_on_write_chained_assignment>`
17341734
for more context.
1735+
1736+
.. _indexing.series_assignment:
1737+
1738+
Series Assignment and Index Alignment
1739+
-------------------------------------
1740+
1741+
When assigning a Series to a DataFrame column, pandas performs automatic alignment
1742+
based on index labels. This is a fundamental behavior that can be surprising to
1743+
new users who might expect positional assignment.
1744+
1745+
Key Points:
1746+
~~~~~~~~~~~
1747+
1748+
* Series values are matched to DataFrame rows by index label
1749+
* Position/order in the Series doesn't matter
1750+
* Missing index labels result in NaN values
1751+
* This behavior is consistent across df[col] = series and df.loc[:, col] = series
1752+
1753+
Examples:
1754+
.. ipython:: python
1755+
1756+
import pandas as pd
1757+
1758+
# Create a DataFrame
1759+
df = pd.DataFrame({'values': [1, 2, 3]}, index=['x', 'y', 'z'])
1760+
1761+
# Series with matching indices (different order)
1762+
s1 = pd.Series([10, 20, 30], index=['z', 'x', 'y'])
1763+
df['aligned'] = s1 # Aligns by index, not position
1764+
print(df)
1765+
1766+
# Series with partial index match
1767+
s2 = pd.Series([100, 200], index=['x', 'z'])
1768+
df['partial'] = s2 # Missing 'y' gets NaN
1769+
print(df)
1770+
1771+
# Series with non-matching indices
1772+
s3 = pd.Series([1000, 2000], index=['a', 'b'])
1773+
df['nomatch'] = s3 # All values become NaN
1774+
print(df)
1775+
1776+
1777+
#Avoiding Confusion:
1778+
#If you want positional assignment instead of index alignment:
1779+
# Convert Series to array/list for positional assignment
1780+
1781+
df['positional'] = s1.values # or s1.tolist()
1782+
1783+
# Or reset the Series index to match DataFrame index
1784+
df['reset_index'] = s1.reindex(df.index)

pandas/core/frame.py

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4214,6 +4214,78 @@ def isetitem(self, loc, value) -> None:
42144214
self._iset_item_mgr(loc, arraylike, inplace=False, refs=refs)
42154215

42164216
def __setitem__(self, key, value) -> None:
4217+
"""
4218+
Set item(s) in DataFrame by key.
4219+
4220+
This method allows you to set the values of one or more columns in the
4221+
DataFrame using a key. The key can be a single column label, a list of
4222+
labels, or a boolean array. If the key does not exist, a new
4223+
column will be created.
4224+
4225+
Parameters
4226+
----------
4227+
key : str, list of str, or tuple
4228+
Column label(s) to set. Can be a single column name, list of column names,
4229+
or tuple for MultiIndex columns.
4230+
value : scalar, array-like, Series, or DataFrame
4231+
Value(s) to set for the specified key(s).
4232+
4233+
Returns
4234+
-------
4235+
None
4236+
This method does not return a value.
4237+
4238+
See Also
4239+
--------
4240+
DataFrame.loc : Access and set values by label-based indexing.
4241+
DataFrame.iloc : Access and set values by position-based indexing.
4242+
DataFrame.assign : Assign new columns to a DataFrame.
4243+
4244+
Notes
4245+
-----
4246+
When assigning a Series to a DataFrame column, pandas aligns the Series
4247+
by index labels, not by position. This means:
4248+
4249+
* Values from the Series are matched to DataFrame rows by index label
4250+
* If a Series index label doesn't exist in the DataFrame index, it's ignored
4251+
* If a DataFrame index label doesn't exist in the Series index, NaN is assigned
4252+
* The order of values in the Series doesn't matter; only the index labels matter
4253+
4254+
Examples
4255+
--------
4256+
Basic column assignment:
4257+
4258+
>>> df = pd.DataFrame({"A": [1, 2, 3]})
4259+
>>> df["B"] = [4, 5, 6] # Assigns by position
4260+
>>> df
4261+
A B
4262+
0 1 4
4263+
1 2 5
4264+
2 3 6
4265+
4266+
Series assignment with index alignment:
4267+
4268+
>>> df = pd.DataFrame({"A": [1, 2, 3]}, index=[0, 1, 2])
4269+
>>> s = pd.Series([10, 20], index=[1, 3]) # Note: index 3 doesn't exist in df
4270+
>>> df["B"] = s # Assigns by index label, not position
4271+
>>> df
4272+
A B
4273+
0 1 NaN
4274+
1 2 10
4275+
2 3 NaN
4276+
4277+
Series assignment with partial index match:
4278+
4279+
>>> df = pd.DataFrame({"A": [1, 2, 3, 4]}, index=["a", "b", "c", "d"])
4280+
>>> s = pd.Series([100, 200], index=["b", "d"])
4281+
>>> df["B"] = s
4282+
>>> df
4283+
A B
4284+
a 1 NaN
4285+
b 2 100
4286+
c 3 NaN
4287+
d 4 200
4288+
"""
42174289
if not PYPY:
42184290
if sys.getrefcount(self) <= 3:
42194291
warnings.warn(

pandas/core/indexing.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -610,6 +610,23 @@ def loc(self) -> _LocIndexer:
610610
611611
Please see the :ref:`user guide<advanced.advanced_hierarchical>`
612612
for more details and explanations of advanced indexing.
613+
614+
**Assignment with Series**
615+
616+
When assigning a Series to .loc[row_indexer, col_indexer], pandas aligns
617+
the Series by index labels, not by order or position. This is consistent
618+
with pandas' general alignment behavior.
619+
620+
Series assignment with .loc and index alignment:
621+
622+
>>> df = pd.DataFrame({"A": [1, 2, 3]}, index=[0, 1, 2])
623+
>>> s = pd.Series([10, 20], index=[1, 0]) # Note reversed order
624+
>>> df.loc[:, "B"] = s # Aligns by index, not order
625+
>>> df
626+
A B
627+
0 1 20
628+
1 2 10
629+
2 3 NaN
613630
"""
614631
return _LocIndexer("loc", self)
615632

pandas/tests/frame/test_api.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -378,3 +378,22 @@ def test_inspect_getmembers(self):
378378
# GH38740
379379
df = DataFrame()
380380
inspect.getmembers(df)
381+
382+
def test_setitem_series_alignment_documentation(self):
383+
# Test that Series assignment aligns by index as documented.
384+
df = DataFrame({"A": [1, 2, 3]}, index=[0, 1, 2])
385+
s = Series([10, 20], index=[1, 3])
386+
df["B"] = s
387+
expected = DataFrame({"A": [1, 2, 3], "B": [np.nan, 10, np.nan]})
388+
tm.assert_frame_equal(df, expected)
389+
390+
def test_setitem_series_partial_alignment(self):
391+
# Test Series assignment with partial index match. """
392+
df = DataFrame({"A": [1, 2, 3, 4]}, index=["a", "b", "c", "d"])
393+
s = Series([100, 200], index=["b", "d"])
394+
df["B"] = s
395+
expected = DataFrame(
396+
{"A": [1, 2, 3, 4], "B": [np.nan, 100, np.nan, 200]},
397+
index=["a", "b", "c", "d"],
398+
)
399+
tm.assert_frame_equal(df, expected)

0 commit comments

Comments
 (0)