Skip to content

Commit 710ad67

Browse files
feat: Add reset_index names, col_level, col_fill, allow_duplicates args
1 parent 09b67da commit 710ad67

File tree

8 files changed

+162
-11
lines changed

8 files changed

+162
-11
lines changed

bigframes/core/blocks.py

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -387,12 +387,21 @@ def reversed(self) -> Block:
387387
index_labels=self.index.names,
388388
)
389389

390-
def reset_index(self, level: LevelsType = None, drop: bool = True) -> Block:
390+
def reset_index(
391+
self,
392+
level: LevelsType = None,
393+
drop: bool = True,
394+
*,
395+
col_level: Union[str, int] = 0,
396+
col_fill: typing.Hashable = "",
397+
allow_duplicates: bool = False,
398+
) -> Block:
391399
"""Reset the index of the block, promoting the old index to a value column.
392400
393401
Arguments:
394402
level: the label or index level of the index levels to remove.
395403
name: this is the column id for the new value id derived from the old index
404+
allow_duplicates:
396405
397406
Returns:
398407
A new Block because dropping index columns can break references
@@ -438,6 +447,11 @@ def reset_index(self, level: LevelsType = None, drop: bool = True) -> Block:
438447
)
439448
else:
440449
# Add index names to column index
450+
col_level_n = (
451+
col_level
452+
if isinstance(col_level, int)
453+
else self.column_labels.names.index(col_level)
454+
)
441455
column_labels_modified = self.column_labels
442456
for position, level_id in enumerate(level_ids):
443457
label = self.col_id_to_index_name[level_id]
@@ -447,11 +461,15 @@ def reset_index(self, level: LevelsType = None, drop: bool = True) -> Block:
447461
else:
448462
label = f"level_{self.index_columns.index(level_id)}"
449463

450-
if label in self.column_labels:
464+
if (not allow_duplicates) and (label in self.column_labels):
451465
raise ValueError(f"cannot insert {label}, already exists")
466+
452467
if isinstance(self.column_labels, pd.MultiIndex):
453468
nlevels = self.column_labels.nlevels
454-
label = tuple(label if i == 0 else "" for i in range(nlevels))
469+
label = tuple(
470+
label if i == col_level_n else col_fill for i in range(nlevels)
471+
)
472+
455473
# Create index copy with label inserted
456474
# See: https://pandas.pydata.org/docs/reference/api/pandas.Index.insert.html
457475
column_labels_modified = column_labels_modified.insert(position, label)

bigframes/dataframe.py

Lines changed: 44 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2321,6 +2321,10 @@ def reset_index(
23212321
level: blocks.LevelsType = ...,
23222322
drop: bool = ...,
23232323
inplace: Literal[False] = ...,
2324+
col_level: Union[int, str] = ...,
2325+
col_fill: Hashable = ...,
2326+
allow_duplicates: Optional[bool] = ...,
2327+
names: Union[None, Hashable, Sequence[Hashable]] = ...,
23242328
) -> DataFrame:
23252329
...
23262330

@@ -2330,19 +2334,56 @@ def reset_index(
23302334
level: blocks.LevelsType = ...,
23312335
drop: bool = ...,
23322336
inplace: Literal[True] = ...,
2337+
col_level: Union[int, str] = ...,
2338+
col_fill: Hashable = ...,
2339+
allow_duplicates: Optional[bool] = ...,
2340+
names: Union[None, Hashable, Sequence[Hashable]] = ...,
23332341
) -> None:
23342342
...
23352343

23362344
@overload
23372345
def reset_index(
2338-
self, level: blocks.LevelsType = None, drop: bool = False, inplace: bool = ...
2346+
self,
2347+
level: blocks.LevelsType = None,
2348+
drop: bool = False,
2349+
inplace: bool = ...,
2350+
col_level: Union[int, str] = ...,
2351+
col_fill: Hashable = ...,
2352+
allow_duplicates: Optional[bool] = ...,
2353+
names: Union[None, Hashable, Sequence[Hashable]] = ...,
23392354
) -> Optional[DataFrame]:
23402355
...
23412356

23422357
def reset_index(
2343-
self, level: blocks.LevelsType = None, drop: bool = False, inplace: bool = False
2358+
self,
2359+
level: blocks.LevelsType = None,
2360+
drop: bool = False,
2361+
inplace: bool = False,
2362+
col_level: Union[int, str] = 0,
2363+
col_fill: Hashable = "",
2364+
allow_duplicates: Optional[bool] = None,
2365+
names: Union[None, Hashable, Sequence[Hashable]] = None,
23442366
) -> Optional[DataFrame]:
2345-
block = self._block.reset_index(level, drop)
2367+
block = self._block
2368+
if names:
2369+
if isinstance(names, blocks.Label) and not isinstance(names, tuple):
2370+
names = [names]
2371+
else:
2372+
names = list(names)
2373+
2374+
if len(names) != self.index.nlevels:
2375+
raise ValueError("'names' must be same length as levels")
2376+
2377+
block = block.with_index_labels(names)
2378+
if allow_duplicates is None:
2379+
allow_duplicates = False
2380+
block = block.reset_index(
2381+
level,
2382+
drop,
2383+
col_level=col_level,
2384+
col_fill=col_fill,
2385+
allow_duplicates=allow_duplicates,
2386+
)
23462387
if inplace:
23472388
self._set_block(block)
23482389
return None

bigframes/series.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -414,6 +414,7 @@ def reset_index(
414414
name: typing.Optional[str] = ...,
415415
drop: Literal[False] = ...,
416416
inplace: Literal[False] = ...,
417+
allow_duplicates: Optional[bool] = ...,
417418
) -> bigframes.dataframe.DataFrame:
418419
...
419420

@@ -425,6 +426,7 @@ def reset_index(
425426
name: typing.Optional[str] = ...,
426427
drop: Literal[True] = ...,
427428
inplace: Literal[False] = ...,
429+
allow_duplicates: Optional[bool] = ...,
428430
) -> Series:
429431
...
430432

@@ -436,6 +438,7 @@ def reset_index(
436438
name: typing.Optional[str] = ...,
437439
drop: bool = ...,
438440
inplace: Literal[True] = ...,
441+
allow_duplicates: Optional[bool] = ...,
439442
) -> None:
440443
...
441444

@@ -447,8 +450,11 @@ def reset_index(
447450
name: typing.Optional[str] = None,
448451
drop: bool = False,
449452
inplace: bool = False,
453+
allow_duplicates: Optional[bool] = None,
450454
) -> bigframes.dataframe.DataFrame | Series | None:
451-
block = self._block.reset_index(level, drop)
455+
if allow_duplicates is None:
456+
allow_duplicates = False
457+
block = self._block.reset_index(level, drop, allow_duplicates=allow_duplicates)
452458
if drop:
453459
if inplace:
454460
self._set_block(block)

tests/system/small/test_dataframe.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2085,6 +2085,32 @@ def test_reset_index(scalars_df_index, scalars_pandas_df_index, drop):
20852085
pandas.testing.assert_frame_equal(bf_result, pd_result)
20862086

20872087

2088+
def test_reset_index_allow_duplicates(scalars_df_index, scalars_pandas_df_index):
2089+
scalars_df_index = scalars_df_index.copy()
2090+
scalars_df_index.index.name = "int64_col"
2091+
df = scalars_df_index.reset_index(allow_duplicates=True, drop=False)
2092+
assert df.index.name is None
2093+
2094+
bf_result = df.to_pandas()
2095+
2096+
scalars_pandas_df_index = scalars_pandas_df_index.copy()
2097+
scalars_pandas_df_index.index.name = "int64_col"
2098+
pd_result = scalars_pandas_df_index.reset_index(allow_duplicates=True, drop=False)
2099+
2100+
# Pandas uses int64 instead of Int64 (nullable) dtype.
2101+
pd_result.index = pd_result.index.astype(pd.Int64Dtype())
2102+
2103+
# reset_index should maintain the original ordering.
2104+
pandas.testing.assert_frame_equal(bf_result, pd_result)
2105+
2106+
2107+
def test_reset_index_duplicates_error(scalars_df_index):
2108+
scalars_df_index = scalars_df_index.copy()
2109+
scalars_df_index.index.name = "int64_col"
2110+
with pytest.raises(ValueError):
2111+
scalars_df_index.reset_index(allow_duplicates=False, drop=False)
2112+
2113+
20882114
@pytest.mark.parametrize(
20892115
("drop",),
20902116
((True,), (False,)),

tests/system/small/test_multiindex.py

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -929,16 +929,30 @@ def test_column_multi_index_rename(scalars_df_index, scalars_pandas_df_index):
929929
pandas.testing.assert_frame_equal(bf_result, pd_result)
930930

931931

932-
def test_column_multi_index_reset_index(scalars_df_index, scalars_pandas_df_index):
932+
@pytest.mark.parametrize(
933+
("names", "col_fill", "col_level"),
934+
[
935+
(None, "", "l2"),
936+
(("new_name"), "fill", 1),
937+
("new_name", "fill", 0),
938+
],
939+
)
940+
def test_column_multi_index_reset_index(
941+
scalars_df_index, scalars_pandas_df_index, names, col_fill, col_level
942+
):
933943
columns = ["int64_too", "int64_col", "float64_col"]
934-
multi_columns = pandas.MultiIndex.from_tuples(zip(["a", "b", "a"], ["a", "b", "b"]))
944+
multi_columns = pandas.MultiIndex.from_tuples(
945+
zip(["a", "b", "a"], ["a", "b", "b"]), names=["l1", "l2"]
946+
)
935947
bf_df = scalars_df_index[columns].copy()
936948
bf_df.columns = multi_columns
937949
pd_df = scalars_pandas_df_index[columns].copy()
938950
pd_df.columns = multi_columns
939951

940-
bf_result = bf_df.reset_index().to_pandas()
941-
pd_result = pd_df.reset_index()
952+
bf_result = bf_df.reset_index(
953+
names=names, col_fill=col_fill, col_level=col_level
954+
).to_pandas()
955+
pd_result = pd_df.reset_index(names=names, col_fill=col_fill, col_level=col_level)
942956

943957
# Pandas uses int64 instead of Int64 (nullable) dtype.
944958
pd_result.index = pd_result.index.astype(pandas.Int64Dtype())

tests/system/small/test_series.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1339,6 +1339,32 @@ def test_reset_index_drop(scalars_df_index, scalars_pandas_df_index):
13391339
pd.testing.assert_series_equal(bf_result.to_pandas(), pd_result)
13401340

13411341

1342+
def test_series_reset_index_allow_duplicates(scalars_df_index, scalars_pandas_df_index):
1343+
bf_series = scalars_df_index["int64_col"].copy()
1344+
bf_series.index.name = "int64_col"
1345+
df = bf_series.reset_index(allow_duplicates=True, drop=False)
1346+
assert df.index.name is None
1347+
1348+
bf_result = df.to_pandas()
1349+
1350+
pd_series = scalars_pandas_df_index["int64_col"].copy()
1351+
pd_series.index.name = "int64_col"
1352+
pd_result = pd_series.reset_index(allow_duplicates=True, drop=False)
1353+
1354+
# Pandas uses int64 instead of Int64 (nullable) dtype.
1355+
pd_result.index = pd_result.index.astype(pd.Int64Dtype())
1356+
1357+
# reset_index should maintain the original ordering.
1358+
pd.testing.assert_frame_equal(bf_result, pd_result)
1359+
1360+
1361+
def test_series_reset_index_duplicates_error(scalars_df_index):
1362+
scalars_df_index = scalars_df_index["int64_col"].copy()
1363+
scalars_df_index.index.name = "int64_col"
1364+
with pytest.raises(ValueError):
1365+
scalars_df_index.reset_index(allow_duplicates=False, drop=False)
1366+
1367+
13421368
def test_series_reset_index_inplace(scalars_df_index, scalars_pandas_df_index):
13431369
bf_result = scalars_df_index.sort_index(ascending=False)["float64_col"]
13441370
bf_result.reset_index(drop=True, inplace=True)

third_party/bigframes_vendored/pandas/core/frame.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1605,6 +1605,10 @@ def reset_index(
16051605
*,
16061606
drop: bool = False,
16071607
inplace: bool = False,
1608+
col_level: Hashable = 0,
1609+
col_fill: Hashable = "",
1610+
allow_duplicates: Optional[bool] = None,
1611+
names: Hashable | Sequence[Hashable] | None = None,
16081612
) -> DataFrame | None:
16091613
"""Reset the index.
16101614
@@ -1706,6 +1710,19 @@ class name speed max
17061710
the index to the default integer index.
17071711
inplace (bool, default False):
17081712
Whether to modify the DataFrame rather than creating a new one.
1713+
col_level (int or str, default 0):
1714+
If the columns have multiple levels, determines which level the
1715+
labels are inserted into. By default it is inserted into the first
1716+
level.
1717+
col_fill (object, default ''):
1718+
If the columns have multiple levels, determines how the other
1719+
levels are named. If None then the index name is repeated.
1720+
allow_duplicates (bool, optional, default None):
1721+
Allow duplicate column labels to be created.
1722+
names (str or 1-dimensional list, default None):
1723+
Using the given string, rename the DataFrame column which contains the
1724+
index data. If the DataFrame has a MultiIndex, this has to be a list or
1725+
tuple with length equal to the number of levels
17091726
17101727
Returns:
17111728
bigframes.pandas.DataFrame: DataFrame with the new index.

third_party/bigframes_vendored/pandas/core/series.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -326,6 +326,7 @@ def reset_index(
326326
drop: bool = False,
327327
name=pd_ext.no_default,
328328
inplace: bool = False,
329+
allow_duplicates: Optional[bool] = None,
329330
) -> DataFrame | Series | None:
330331
"""
331332
Generate a new DataFrame or Series with the index reset.
@@ -413,6 +414,8 @@ def reset_index(
413414
when `drop` is True.
414415
inplace (bool, default False):
415416
Modify the Series in place (do not create a new object).
417+
allow_duplicates (bool, optional, default None):
418+
Allow duplicate column labels to be created.
416419
417420
Returns:
418421
bigframes.pandas.Series or bigframes.pandas.DataFrame or None:

0 commit comments

Comments
 (0)