Skip to content

Commit 3446950

Browse files
feat: Add level, inplace params to reset_index (#1988)
1 parent ebaa244 commit 3446950

File tree

9 files changed

+204
-23
lines changed

9 files changed

+204
-23
lines changed

bigframes/core/blocks.py

Lines changed: 25 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -387,25 +387,39 @@ def reversed(self) -> Block:
387387
index_labels=self.index.names,
388388
)
389389

390-
def reset_index(self, drop: bool = True) -> Block:
390+
def reset_index(self, level: LevelsType = None, drop: bool = True) -> Block:
391391
"""Reset the index of the block, promoting the old index to a value column.
392392
393393
Arguments:
394+
level: the label or index level of the index levels to remove.
394395
name: this is the column id for the new value id derived from the old index
395396
396397
Returns:
397398
A new Block because dropping index columns can break references
398399
from Index classes that point to this block.
399400
"""
401+
if level:
402+
# preserve original order, not user provided order
403+
level_ids: Sequence[str] = [
404+
id for id in self.index_columns if id in self.index.resolve_level(level)
405+
]
406+
else:
407+
level_ids = self.index_columns
408+
400409
expr = self._expr
401-
if (
410+
if set(self.index_columns) > set(level_ids):
411+
new_index_cols = [col for col in self.index_columns if col not in level_ids]
412+
new_index_labels = [self.col_id_to_index_name[id] for id in new_index_cols]
413+
elif (
402414
self.session._default_index_type
403415
== bigframes.enums.DefaultIndexKind.SEQUENTIAL_INT64
404416
):
405417
expr, new_index_col_id = expr.promote_offsets()
406418
new_index_cols = [new_index_col_id]
419+
new_index_labels = [None]
407420
elif self.session._default_index_type == bigframes.enums.DefaultIndexKind.NULL:
408421
new_index_cols = []
422+
new_index_labels = []
409423
else:
410424
raise ValueError(
411425
f"Unrecognized default index kind: {self.session._default_index_type}"
@@ -415,22 +429,23 @@ def reset_index(self, drop: bool = True) -> Block:
415429
# Even though the index might be part of the ordering, keep that
416430
# ordering expression as reset_index shouldn't change the row
417431
# order.
418-
expr = expr.drop_columns(self.index_columns)
432+
expr = expr.drop_columns(level_ids)
419433
return Block(
420434
expr,
421435
index_columns=new_index_cols,
436+
index_labels=new_index_labels,
422437
column_labels=self.column_labels,
423438
)
424439
else:
425440
# Add index names to column index
426-
index_labels = self.index.names
427441
column_labels_modified = self.column_labels
428-
for level, label in enumerate(index_labels):
442+
for position, level_id in enumerate(level_ids):
443+
label = self.col_id_to_index_name[level_id]
429444
if label is None:
430-
if "index" not in self.column_labels and len(index_labels) <= 1:
445+
if "index" not in self.column_labels and self.index.nlevels <= 1:
431446
label = "index"
432447
else:
433-
label = f"level_{level}"
448+
label = f"level_{self.index_columns.index(level_id)}"
434449

435450
if label in self.column_labels:
436451
raise ValueError(f"cannot insert {label}, already exists")
@@ -439,11 +454,12 @@ def reset_index(self, drop: bool = True) -> Block:
439454
label = tuple(label if i == 0 else "" for i in range(nlevels))
440455
# Create index copy with label inserted
441456
# See: https://pandas.pydata.org/docs/reference/api/pandas.Index.insert.html
442-
column_labels_modified = column_labels_modified.insert(level, label)
457+
column_labels_modified = column_labels_modified.insert(position, label)
443458

444459
return Block(
445-
expr,
460+
expr.select_columns((*new_index_cols, *level_ids, *self.value_columns)),
446461
index_columns=new_index_cols,
462+
index_labels=new_index_labels,
447463
column_labels=column_labels_modified,
448464
)
449465

bigframes/dataframe.py

Lines changed: 33 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2315,9 +2315,39 @@ def _assign_series_join_on_index(
23152315

23162316
return DataFrame(block.with_index_labels(self._block.index.names))
23172317

2318-
def reset_index(self, *, drop: bool = False) -> DataFrame:
2319-
block = self._block.reset_index(drop)
2320-
return DataFrame(block)
2318+
@overload # type: ignore[override]
2319+
def reset_index(
2320+
self,
2321+
level: blocks.LevelsType = ...,
2322+
drop: bool = ...,
2323+
inplace: Literal[False] = ...,
2324+
) -> DataFrame:
2325+
...
2326+
2327+
@overload
2328+
def reset_index(
2329+
self,
2330+
level: blocks.LevelsType = ...,
2331+
drop: bool = ...,
2332+
inplace: Literal[True] = ...,
2333+
) -> None:
2334+
...
2335+
2336+
@overload
2337+
def reset_index(
2338+
self, level: blocks.LevelsType = None, drop: bool = False, inplace: bool = ...
2339+
) -> Optional[DataFrame]:
2340+
...
2341+
2342+
def reset_index(
2343+
self, level: blocks.LevelsType = None, drop: bool = False, inplace: bool = False
2344+
) -> Optional[DataFrame]:
2345+
block = self._block.reset_index(level, drop)
2346+
if inplace:
2347+
self._set_block(block)
2348+
return None
2349+
else:
2350+
return DataFrame(block)
23212351

23222352
def set_index(
23232353
self,

bigframes/series.py

Lines changed: 44 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -406,17 +406,59 @@ def equals(
406406
return False
407407
return block_ops.equals(self._block, other._block)
408408

409+
@overload # type: ignore[override]
410+
def reset_index(
411+
self,
412+
level: blocks.LevelsType = ...,
413+
*,
414+
name: typing.Optional[str] = ...,
415+
drop: Literal[False] = ...,
416+
inplace: Literal[False] = ...,
417+
) -> bigframes.dataframe.DataFrame:
418+
...
419+
420+
@overload
421+
def reset_index(
422+
self,
423+
level: blocks.LevelsType = ...,
424+
*,
425+
name: typing.Optional[str] = ...,
426+
drop: Literal[True] = ...,
427+
inplace: Literal[False] = ...,
428+
) -> Series:
429+
...
430+
431+
@overload
432+
def reset_index(
433+
self,
434+
level: blocks.LevelsType = ...,
435+
*,
436+
name: typing.Optional[str] = ...,
437+
drop: bool = ...,
438+
inplace: Literal[True] = ...,
439+
) -> None:
440+
...
441+
409442
@validations.requires_ordering()
410443
def reset_index(
411444
self,
445+
level: blocks.LevelsType = None,
412446
*,
413447
name: typing.Optional[str] = None,
414448
drop: bool = False,
415-
) -> bigframes.dataframe.DataFrame | Series:
416-
block = self._block.reset_index(drop)
449+
inplace: bool = False,
450+
) -> bigframes.dataframe.DataFrame | Series | None:
451+
block = self._block.reset_index(level, drop)
417452
if drop:
453+
if inplace:
454+
self._set_block(block)
455+
return None
418456
return Series(block)
419457
else:
458+
if inplace:
459+
raise ValueError(
460+
"Series.reset_index cannot combine inplace=True and drop=False"
461+
)
420462
if name:
421463
block = block.assign_label(self._value_column, name)
422464
return bigframes.dataframe.DataFrame(block)

tests/system/small/test_dataframe.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2070,6 +2070,26 @@ def test_reset_index(scalars_df_index, scalars_pandas_df_index, drop):
20702070
pandas.testing.assert_frame_equal(bf_result, pd_result)
20712071

20722072

2073+
@pytest.mark.parametrize(
2074+
("drop",),
2075+
((True,), (False,)),
2076+
)
2077+
def test_reset_index_inplace(scalars_df_index, scalars_pandas_df_index, drop):
2078+
df = scalars_df_index.copy()
2079+
df.reset_index(drop=drop, inplace=True)
2080+
assert df.index.name is None
2081+
2082+
bf_result = df.to_pandas()
2083+
pd_result = scalars_pandas_df_index.copy()
2084+
pd_result.reset_index(drop=drop, inplace=True)
2085+
2086+
# Pandas uses int64 instead of Int64 (nullable) dtype.
2087+
pd_result.index = pd_result.index.astype(pd.Int64Dtype())
2088+
2089+
# reset_index should maintain the original ordering.
2090+
pandas.testing.assert_frame_equal(bf_result, pd_result)
2091+
2092+
20732093
def test_reset_index_then_filter(
20742094
scalars_df_index,
20752095
scalars_pandas_df_index,

tests/system/small/test_multiindex.py

Lines changed: 54 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -101,20 +101,69 @@ def test_set_multi_index(scalars_df_index, scalars_pandas_df_index):
101101
pandas.testing.assert_frame_equal(bf_result, pd_result)
102102

103103

104-
def test_reset_multi_index(scalars_df_index, scalars_pandas_df_index):
104+
@pytest.mark.parametrize(
105+
("level", "drop"),
106+
[
107+
(None, True),
108+
(None, False),
109+
(1, True),
110+
("bool_col", True),
111+
(["float64_col", "int64_too"], True),
112+
([2, 0], False),
113+
],
114+
)
115+
def test_df_reset_multi_index(scalars_df_index, scalars_pandas_df_index, level, drop):
105116
bf_result = (
106-
scalars_df_index.set_index(["bool_col", "int64_too"]).reset_index().to_pandas()
117+
scalars_df_index.set_index(["bool_col", "int64_too", "float64_col"])
118+
.reset_index(level=level, drop=drop)
119+
.to_pandas()
107120
)
108121
pd_result = scalars_pandas_df_index.set_index(
109-
["bool_col", "int64_too"]
110-
).reset_index()
122+
["bool_col", "int64_too", "float64_col"]
123+
).reset_index(level=level, drop=drop)
111124

112125
# Pandas uses int64 instead of Int64 (nullable) dtype.
113-
pd_result.index = pd_result.index.astype(pandas.Int64Dtype())
126+
if pd_result.index.dtype != bf_result.index.dtype:
127+
pd_result.index = pd_result.index.astype(pandas.Int64Dtype())
114128

115129
pandas.testing.assert_frame_equal(bf_result, pd_result)
116130

117131

132+
@pytest.mark.parametrize(
133+
("level", "drop"),
134+
[
135+
(None, True),
136+
(None, False),
137+
(1, True),
138+
("bool_col", True),
139+
(["float64_col", "int64_too"], True),
140+
([2, 0], False),
141+
],
142+
)
143+
def test_series_reset_multi_index(
144+
scalars_df_index, scalars_pandas_df_index, level, drop
145+
):
146+
bf_result = (
147+
scalars_df_index.set_index(["bool_col", "int64_too", "float64_col"])[
148+
"string_col"
149+
]
150+
.reset_index(level=level, drop=drop)
151+
.to_pandas()
152+
)
153+
pd_result = scalars_pandas_df_index.set_index(
154+
["bool_col", "int64_too", "float64_col"]
155+
)["string_col"].reset_index(level=level, drop=drop)
156+
157+
# Pandas uses int64 instead of Int64 (nullable) dtype.
158+
if pd_result.index.dtype != bf_result.index.dtype:
159+
pd_result.index = pd_result.index.astype(pandas.Int64Dtype())
160+
161+
if drop:
162+
pandas.testing.assert_series_equal(bf_result, pd_result)
163+
else:
164+
pandas.testing.assert_frame_equal(bf_result, pd_result)
165+
166+
118167
def test_series_multi_index_idxmin(scalars_df_index, scalars_pandas_df_index):
119168
bf_result = scalars_df_index.set_index(["bool_col", "int64_too"])[
120169
"float64_col"

tests/system/small/test_series.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1339,6 +1339,18 @@ def test_reset_index_drop(scalars_df_index, scalars_pandas_df_index):
13391339
pd.testing.assert_series_equal(bf_result.to_pandas(), pd_result)
13401340

13411341

1342+
def test_series_reset_index_inplace(scalars_df_index, scalars_pandas_df_index):
1343+
bf_result = scalars_df_index.sort_index(ascending=False)["float64_col"]
1344+
bf_result.reset_index(drop=True, inplace=True)
1345+
pd_result = scalars_pandas_df_index.sort_index(ascending=False)["float64_col"]
1346+
pd_result.reset_index(drop=True, inplace=True)
1347+
1348+
# BigQuery DataFrames default indices use nullable Int64 always
1349+
pd_result.index = pd_result.index.astype("Int64")
1350+
1351+
pd.testing.assert_series_equal(bf_result.to_pandas(), pd_result)
1352+
1353+
13421354
@pytest.mark.parametrize(
13431355
("name",),
13441356
[

tests/unit/test_dataframe_polars.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1657,13 +1657,11 @@ def test_reset_index_with_unnamed_index(
16571657
pandas.testing.assert_frame_equal(bf_result, pd_result)
16581658

16591659

1660-
def test_reset_index_with_unnamed_multiindex(
1661-
scalars_df_index,
1662-
scalars_pandas_df_index,
1663-
):
1660+
def test_reset_index_with_unnamed_multiindex(session):
16641661
bf_df = dataframe.DataFrame(
16651662
([1, 2, 3], [2, 5, 7]),
16661663
index=pd.MultiIndex.from_tuples([("a", "aa"), ("a", "aa")]),
1664+
session=session,
16671665
)
16681666
pd_df = pd.DataFrame(
16691667
([1, 2, 3], [2, 5, 7]),

third_party/bigframes_vendored/pandas/core/frame.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1601,8 +1601,10 @@ def droplevel(self, level, axis: str | int = 0):
16011601

16021602
def reset_index(
16031603
self,
1604+
level=None,
16041605
*,
16051606
drop: bool = False,
1607+
inplace: bool = False,
16061608
) -> DataFrame | None:
16071609
"""Reset the index.
16081610
@@ -1696,9 +1698,14 @@ class name speed max
16961698
16971699
16981700
Args:
1701+
level (int, str, tuple, or list, default None):
1702+
Only remove the given levels from the index. Removes all levels by
1703+
default.
16991704
drop (bool, default False):
17001705
Do not try to insert index into dataframe columns. This resets
17011706
the index to the default integer index.
1707+
inplace (bool, default False):
1708+
Whether to modify the DataFrame rather than creating a new one.
17021709
17031710
Returns:
17041711
bigframes.pandas.DataFrame: DataFrame with the new index.

third_party/bigframes_vendored/pandas/core/series.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -321,9 +321,11 @@ def transpose(self) -> Series:
321321

322322
def reset_index(
323323
self,
324+
level=None,
324325
*,
325326
drop: bool = False,
326327
name=pd_ext.no_default,
328+
inplace: bool = False,
327329
) -> DataFrame | Series | None:
328330
"""
329331
Generate a new DataFrame or Series with the index reset.
@@ -399,13 +401,18 @@ def reset_index(
399401
[4 rows x 3 columns]
400402
401403
Args:
404+
level (int, str, tuple, or list, default optional):
405+
For a Series with a MultiIndex, only remove the specified levels
406+
from the index. Removes all levels by default.
402407
drop (bool, default False):
403408
Just reset the index, without inserting it as a column in
404409
the new DataFrame.
405410
name (object, optional):
406411
The name to use for the column containing the original Series
407412
values. Uses ``self.name`` by default. This argument is ignored
408413
when `drop` is True.
414+
inplace (bool, default False):
415+
Modify the Series in place (do not create a new object).
409416
410417
Returns:
411418
bigframes.pandas.Series or bigframes.pandas.DataFrame or None:

0 commit comments

Comments
 (0)