Skip to content

Commit c397839

Browse files
committed
Revert "DOC: update DF.set_index (pandas-dev#24762)"
This reverts commit e984947.
1 parent 5cb622a commit c397839

File tree

3 files changed

+48
-73
lines changed

3 files changed

+48
-73
lines changed

doc/source/whatsnew/v0.24.0.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1195,8 +1195,8 @@ Other API Changes
11951195
- :class:`pandas.io.formats.style.Styler` supports a ``number-format`` property when using :meth:`~pandas.io.formats.style.Styler.to_excel` (:issue:`22015`)
11961196
- :meth:`DataFrame.corr` and :meth:`Series.corr` now raise a ``ValueError`` along with a helpful error message instead of a ``KeyError`` when supplied with an invalid method (:issue:`22298`)
11971197
- :meth:`shift` will now always return a copy, instead of the previous behaviour of returning self when shifting by 0 (:issue:`22397`)
1198-
- :meth:`DataFrame.set_index` now gives a better (and less frequent) KeyError, raises a ``ValueError`` for incorrect types,
1199-
and will not fail on duplicate column names with ``drop=True``. (:issue:`22484`)
1198+
- :meth:`DataFrame.set_index` now allows all one-dimensional list-likes, raises a ``TypeError`` for incorrect types,
1199+
has an improved ``KeyError`` message, and will not fail on duplicate column names with ``drop=True``. (:issue:`22484`)
12001200
- Slicing a single row of a DataFrame with multiple ExtensionArrays of the same type now preserves the dtype, rather than coercing to object (:issue:`22784`)
12011201
- :class:`DateOffset` attribute `_cacheable` and method `_should_cache` have been removed (:issue:`23118`)
12021202
- :meth:`Series.searchsorted`, when supplied a scalar value to search for, now returns a scalar instead of an array (:issue:`23801`).

pandas/core/frame.py

Lines changed: 25 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -4052,16 +4052,12 @@ def set_index(self, keys, drop=True, append=False, inplace=False,
40524052
Set the DataFrame index using existing columns.
40534053
40544054
Set the DataFrame index (row labels) using one or more existing
4055-
columns or arrays (of the correct length). The index can replace the
4056-
existing index or expand on it.
4055+
columns. The index can replace the existing index or expand on it.
40574056
40584057
Parameters
40594058
----------
4060-
keys : label or array-like or list of labels/arrays
4061-
This parameter can be either a single column key, a single array of
4062-
the same length as the calling DataFrame, or a list containing an
4063-
arbitrary combination of column keys and arrays. Here, "array"
4064-
encompasses :class:`Series`, :class:`Index` and ``np.ndarray``.
4059+
keys : label or list of label
4060+
Name or names of the columns that will be used as the index.
40654061
drop : bool, default True
40664062
Delete columns to be used as the new index.
40674063
append : bool, default False
@@ -4106,7 +4102,7 @@ def set_index(self, keys, drop=True, append=False, inplace=False,
41064102
7 2013 84
41074103
10 2014 31
41084104
4109-
Create a MultiIndex using columns 'year' and 'month':
4105+
Create a multi-index using columns 'year' and 'month':
41104106
41114107
>>> df.set_index(['year', 'month'])
41124108
sale
@@ -4116,51 +4112,35 @@ def set_index(self, keys, drop=True, append=False, inplace=False,
41164112
2013 7 84
41174113
2014 10 31
41184114
4119-
Create a MultiIndex using an Index and a column:
4115+
Create a multi-index using a set of values and a column:
41204116
4121-
>>> df.set_index([pd.Index([1, 2, 3, 4]), 'year'])
4117+
>>> df.set_index([[1, 2, 3, 4], 'year'])
41224118
month sale
41234119
year
41244120
1 2012 1 55
41254121
2 2014 4 40
41264122
3 2013 7 84
41274123
4 2014 10 31
4128-
4129-
Create a MultiIndex using two Series:
4130-
4131-
>>> s = pd.Series([1, 2, 3, 4])
4132-
>>> df.set_index([s, s**2])
4133-
month year sale
4134-
1 1 1 2012 55
4135-
2 4 4 2014 40
4136-
3 9 7 2013 84
4137-
4 16 10 2014 31
41384124
"""
41394125
inplace = validate_bool_kwarg(inplace, 'inplace')
4140-
4141-
err_msg = ('The parameter "keys" may be a column key, one-dimensional '
4142-
'array, or a list containing only valid column keys and '
4143-
'one-dimensional arrays.')
4144-
4145-
if (is_scalar(keys) or isinstance(keys, tuple)
4146-
or isinstance(keys, (ABCIndexClass, ABCSeries, np.ndarray))):
4147-
# make sure we have a container of keys/arrays we can iterate over
4148-
# tuples can appear as valid column keys!
4126+
if not isinstance(keys, list):
41494127
keys = [keys]
4150-
elif not isinstance(keys, list):
4151-
raise ValueError(err_msg)
41524128

41534129
missing = []
41544130
for col in keys:
4155-
if (is_scalar(col) or isinstance(col, tuple)):
4156-
# if col is a valid column key, everything is fine
4157-
# tuples are always considered keys, never as list-likes
4158-
if col not in self:
4159-
missing.append(col)
4160-
elif (not isinstance(col, (ABCIndexClass, ABCSeries,
4161-
np.ndarray, list))
4131+
if (is_scalar(col) or isinstance(col, tuple)) and col in self:
4132+
# tuples can be both column keys or list-likes
4133+
# if they are valid column keys, everything is fine
4134+
continue
4135+
elif is_scalar(col) and col not in self:
4136+
# tuples that are not column keys are considered list-like,
4137+
# not considered missing
4138+
missing.append(col)
4139+
elif (not is_list_like(col, allow_sets=False)
41624140
or getattr(col, 'ndim', 1) > 1):
4163-
raise ValueError(err_msg)
4141+
raise TypeError('The parameter "keys" may only contain a '
4142+
'combination of valid column keys and '
4143+
'one-dimensional list-likes')
41644144

41654145
if missing:
41664146
raise KeyError('{}'.format(missing))
@@ -4193,6 +4173,12 @@ def set_index(self, keys, drop=True, append=False, inplace=False,
41934173
elif isinstance(col, (list, np.ndarray)):
41944174
arrays.append(col)
41954175
names.append(None)
4176+
elif (is_list_like(col)
4177+
and not (isinstance(col, tuple) and col in self)):
4178+
# all other list-likes (but avoid valid column keys)
4179+
col = list(col) # ensure iterator do not get read twice etc.
4180+
arrays.append(col)
4181+
names.append(None)
41964182
# from here, col can only be a column label
41974183
else:
41984184
arrays.append(frame[col]._values)

pandas/tests/frame/test_alter_axes.py

Lines changed: 21 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,7 @@ def test_set_index_after_mutation(self):
118118
# Add list-of-list constructor because list is ambiguous -> lambda
119119
# also test index name if append=True (name is duplicate here for B)
120120
@pytest.mark.parametrize('box', [Series, Index, np.array,
121-
list, lambda x: [list(x)],
121+
list, tuple, iter, lambda x: [list(x)],
122122
lambda x: MultiIndex.from_arrays([x])])
123123
@pytest.mark.parametrize('append, index_name', [(True, None),
124124
(True, 'B'), (True, 'test'), (False, None)])
@@ -135,7 +135,7 @@ def test_set_index_pass_single_array(self, frame_of_index_cols,
135135
with pytest.raises(KeyError, match=msg):
136136
df.set_index(key, drop=drop, append=append)
137137
else:
138-
# np.array/list-of-list "forget" the name of B
138+
# np.array/tuple/iter/list-of-list "forget" the name of B
139139
name_mi = getattr(key, 'names', None)
140140
name = [getattr(key, 'name', None)] if name_mi is None else name_mi
141141

@@ -150,7 +150,8 @@ def test_set_index_pass_single_array(self, frame_of_index_cols,
150150

151151
# MultiIndex constructor does not work directly on Series -> lambda
152152
# also test index name if append=True (name is duplicate here for A & B)
153-
@pytest.mark.parametrize('box', [Series, Index, np.array, list,
153+
@pytest.mark.parametrize('box', [Series, Index, np.array,
154+
list, tuple, iter,
154155
lambda x: MultiIndex.from_arrays([x])])
155156
@pytest.mark.parametrize('append, index_name',
156157
[(True, None), (True, 'A'), (True, 'B'),
@@ -162,7 +163,7 @@ def test_set_index_pass_arrays(self, frame_of_index_cols,
162163
df.index.name = index_name
163164

164165
keys = ['A', box(df['B'])]
165-
# np.array/list "forget" the name of B
166+
# np.array/list/tuple/iter "forget" the name of B
166167
names = ['A', None if box in [np.array, list, tuple, iter] else 'B']
167168

168169
result = df.set_index(keys, drop=drop, append=append)
@@ -178,10 +179,12 @@ def test_set_index_pass_arrays(self, frame_of_index_cols,
178179
# MultiIndex constructor does not work directly on Series -> lambda
179180
# We also emulate a "constructor" for the label -> lambda
180181
# also test index name if append=True (name is duplicate here for A)
181-
@pytest.mark.parametrize('box2', [Series, Index, np.array, list,
182+
@pytest.mark.parametrize('box2', [Series, Index, np.array,
183+
list, tuple, iter,
182184
lambda x: MultiIndex.from_arrays([x]),
183185
lambda x: x.name])
184-
@pytest.mark.parametrize('box1', [Series, Index, np.array, list,
186+
@pytest.mark.parametrize('box1', [Series, Index, np.array,
187+
list, tuple, iter,
185188
lambda x: MultiIndex.from_arrays([x]),
186189
lambda x: x.name])
187190
@pytest.mark.parametrize('append, index_name', [(True, None),
@@ -195,14 +198,17 @@ def test_set_index_pass_arrays_duplicate(self, frame_of_index_cols, drop,
195198
keys = [box1(df['A']), box2(df['A'])]
196199
result = df.set_index(keys, drop=drop, append=append)
197200

201+
# if either box was iter, the content has been consumed; re-read it
202+
keys = [box1(df['A']), box2(df['A'])]
203+
198204
# need to adapt first drop for case that both keys are 'A' --
199205
# cannot drop the same column twice;
200206
# use "is" because == would give ambiguous Boolean error for containers
201207
first_drop = False if (keys[0] is 'A' and keys[1] is 'A') else drop
202208

203209
# to test against already-tested behaviour, we add sequentially,
204210
# hence second append always True; must wrap keys in list, otherwise
205-
# box = list would be interpreted as keys
211+
# box = list would be illegal
206212
expected = df.set_index([keys[0]], drop=first_drop, append=append)
207213
expected = expected.set_index([keys[1]], drop=drop, append=True)
208214
tm.assert_frame_equal(result, expected)
@@ -232,7 +238,7 @@ def test_set_index_verify_integrity(self, frame_of_index_cols):
232238

233239
@pytest.mark.parametrize('append', [True, False])
234240
@pytest.mark.parametrize('drop', [True, False])
235-
def test_set_index_raise_keys(self, frame_of_index_cols, drop, append):
241+
def test_set_index_raise(self, frame_of_index_cols, drop, append):
236242
df = frame_of_index_cols
237243

238244
with pytest.raises(KeyError, match="['foo', 'bar', 'baz']"):
@@ -243,31 +249,14 @@ def test_set_index_raise_keys(self, frame_of_index_cols, drop, append):
243249
with pytest.raises(KeyError, match='X'):
244250
df.set_index([df['A'], df['B'], 'X'], drop=drop, append=append)
245251

246-
msg = "[('foo', 'foo', 'foo', 'bar', 'bar')]"
247-
# tuples always raise KeyError
248-
with pytest.raises(KeyError, match=msg):
249-
df.set_index(tuple(df['A']), drop=drop, append=append)
250-
251-
# also within a list
252-
with pytest.raises(KeyError, match=msg):
253-
df.set_index(['A', df['A'], tuple(df['A'])],
254-
drop=drop, append=append)
255-
256-
@pytest.mark.parametrize('append', [True, False])
257-
@pytest.mark.parametrize('drop', [True, False])
258-
@pytest.mark.parametrize('box', [set, iter])
259-
def test_set_index_raise_on_type(self, frame_of_index_cols, box,
260-
drop, append):
261-
df = frame_of_index_cols
262-
263-
msg = 'The parameter "keys" may be a column key, .*'
264-
# forbidden type, e.g. set/tuple/iter
265-
with pytest.raises(ValueError, match=msg):
266-
df.set_index(box(df['A']), drop=drop, append=append)
252+
msg = 'The parameter "keys" may only contain a combination of.*'
253+
# forbidden type, e.g. set
254+
with pytest.raises(TypeError, match=msg):
255+
df.set_index(set(df['A']), drop=drop, append=append)
267256

268-
# forbidden type in list, e.g. set/tuple/iter
269-
with pytest.raises(ValueError, match=msg):
270-
df.set_index(['A', df['A'], box(df['A'])],
257+
# forbidden type in list, e.g. set
258+
with pytest.raises(TypeError, match=msg):
259+
df.set_index(['A', df['A'], set(df['A'])],
271260
drop=drop, append=append)
272261

273262
def test_construction_with_categorical_index(self):

0 commit comments

Comments
 (0)