feat: Add reset_index names, col_level, col_fill, allow_duplicates args

TrevorBergeron · TrevorBergeron · commit 710ad67950bf · 2025-08-22T01:08:42.000Z
diff --git a/bigframes/core/blocks.py b/bigframes/core/blocks.py
@@ -387,12 +387,21 @@ def reversed(self) -> Block:
             index_labels=self.index.names,
         )
 
-    def reset_index(self, level: LevelsType = None, drop: bool = True) -> Block:
+    def reset_index(
+        self,
+        level: LevelsType = None,
+        drop: bool = True,
+        *,
+        col_level: Union[str, int] = 0,
+        col_fill: typing.Hashable = "",
+        allow_duplicates: bool = False,
+    ) -> Block:
         """Reset the index of the block, promoting the old index to a value column.
 
         Arguments:
             level: the label or index level of the index levels to remove.
             name: this is the column id for the new value id derived from the old index
+            allow_duplicates:
 
         Returns:
             A new Block because dropping index columns can break references
@@ -438,6 +447,11 @@ def reset_index(self, level: LevelsType = None, drop: bool = True) -> Block:
             )
         else:
             # Add index names to column index
+            col_level_n = (
+                col_level
+                if isinstance(col_level, int)
+                else self.column_labels.names.index(col_level)
+            )
             column_labels_modified = self.column_labels
             for position, level_id in enumerate(level_ids):
                 label = self.col_id_to_index_name[level_id]
@@ -447,11 +461,15 @@ def reset_index(self, level: LevelsType = None, drop: bool = True) -> Block:
                     else:
                         label = f"level_{self.index_columns.index(level_id)}"
 
-                if label in self.column_labels:
+                if (not allow_duplicates) and (label in self.column_labels):
                     raise ValueError(f"cannot insert {label}, already exists")
+
                 if isinstance(self.column_labels, pd.MultiIndex):
                     nlevels = self.column_labels.nlevels
-                    label = tuple(label if i == 0 else "" for i in range(nlevels))
+                    label = tuple(
+                        label if i == col_level_n else col_fill for i in range(nlevels)
+                    )
+
                 # Create index copy with label inserted
                 # See: https://pandas.pydata.org/docs/reference/api/pandas.Index.insert.html
                 column_labels_modified = column_labels_modified.insert(position, label)
diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py
@@ -2321,6 +2321,10 @@ def reset_index(
         level: blocks.LevelsType = ...,
         drop: bool = ...,
         inplace: Literal[False] = ...,
+        col_level: Union[int, str] = ...,
+        col_fill: Hashable = ...,
+        allow_duplicates: Optional[bool] = ...,
+        names: Union[None, Hashable, Sequence[Hashable]] = ...,
     ) -> DataFrame:
         ...
 
@@ -2330,19 +2334,56 @@ def reset_index(
         level: blocks.LevelsType = ...,
         drop: bool = ...,
         inplace: Literal[True] = ...,
+        col_level: Union[int, str] = ...,
+        col_fill: Hashable = ...,
+        allow_duplicates: Optional[bool] = ...,
+        names: Union[None, Hashable, Sequence[Hashable]] = ...,
     ) -> None:
         ...
 
     @overload
     def reset_index(
-        self, level: blocks.LevelsType = None, drop: bool = False, inplace: bool = ...
+        self,
+        level: blocks.LevelsType = None,
+        drop: bool = False,
+        inplace: bool = ...,
+        col_level: Union[int, str] = ...,
+        col_fill: Hashable = ...,
+        allow_duplicates: Optional[bool] = ...,
+        names: Union[None, Hashable, Sequence[Hashable]] = ...,
     ) -> Optional[DataFrame]:
         ...
 
     def reset_index(
-        self, level: blocks.LevelsType = None, drop: bool = False, inplace: bool = False
+        self,
+        level: blocks.LevelsType = None,
+        drop: bool = False,
+        inplace: bool = False,
+        col_level: Union[int, str] = 0,
+        col_fill: Hashable = "",
+        allow_duplicates: Optional[bool] = None,
+        names: Union[None, Hashable, Sequence[Hashable]] = None,
     ) -> Optional[DataFrame]:
-        block = self._block.reset_index(level, drop)
+        block = self._block
+        if names:
+            if isinstance(names, blocks.Label) and not isinstance(names, tuple):
+                names = [names]
+            else:
+                names = list(names)
+
+            if len(names) != self.index.nlevels:
+                raise ValueError("'names' must be same length as levels")
+
+            block = block.with_index_labels(names)
+        if allow_duplicates is None:
+            allow_duplicates = False
+        block = block.reset_index(
+            level,
+            drop,
+            col_level=col_level,
+            col_fill=col_fill,
+            allow_duplicates=allow_duplicates,
+        )
         if inplace:
             self._set_block(block)
             return None
diff --git a/bigframes/series.py b/bigframes/series.py
@@ -414,6 +414,7 @@ def reset_index(
         name: typing.Optional[str] = ...,
         drop: Literal[False] = ...,
         inplace: Literal[False] = ...,
+        allow_duplicates: Optional[bool] = ...,
     ) -> bigframes.dataframe.DataFrame:
         ...
 
@@ -425,6 +426,7 @@ def reset_index(
         name: typing.Optional[str] = ...,
         drop: Literal[True] = ...,
         inplace: Literal[False] = ...,
+        allow_duplicates: Optional[bool] = ...,
     ) -> Series:
         ...
 
@@ -436,6 +438,7 @@ def reset_index(
         name: typing.Optional[str] = ...,
         drop: bool = ...,
         inplace: Literal[True] = ...,
+        allow_duplicates: Optional[bool] = ...,
     ) -> None:
         ...
 
@@ -447,8 +450,11 @@ def reset_index(
         name: typing.Optional[str] = None,
         drop: bool = False,
         inplace: bool = False,
+        allow_duplicates: Optional[bool] = None,
     ) -> bigframes.dataframe.DataFrame | Series | None:
-        block = self._block.reset_index(level, drop)
+        if allow_duplicates is None:
+            allow_duplicates = False
+        block = self._block.reset_index(level, drop, allow_duplicates=allow_duplicates)
         if drop:
             if inplace:
                 self._set_block(block)
diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py
@@ -2085,6 +2085,32 @@ def test_reset_index(scalars_df_index, scalars_pandas_df_index, drop):
     pandas.testing.assert_frame_equal(bf_result, pd_result)
 
 
+def test_reset_index_allow_duplicates(scalars_df_index, scalars_pandas_df_index):
+    scalars_df_index = scalars_df_index.copy()
+    scalars_df_index.index.name = "int64_col"
+    df = scalars_df_index.reset_index(allow_duplicates=True, drop=False)
+    assert df.index.name is None
+
+    bf_result = df.to_pandas()
+
+    scalars_pandas_df_index = scalars_pandas_df_index.copy()
+    scalars_pandas_df_index.index.name = "int64_col"
+    pd_result = scalars_pandas_df_index.reset_index(allow_duplicates=True, drop=False)
+
+    # Pandas uses int64 instead of Int64 (nullable) dtype.
+    pd_result.index = pd_result.index.astype(pd.Int64Dtype())
+
+    # reset_index should maintain the original ordering.
+    pandas.testing.assert_frame_equal(bf_result, pd_result)
+
+
+def test_reset_index_duplicates_error(scalars_df_index):
+    scalars_df_index = scalars_df_index.copy()
+    scalars_df_index.index.name = "int64_col"
+    with pytest.raises(ValueError):
+        scalars_df_index.reset_index(allow_duplicates=False, drop=False)
+
+
 @pytest.mark.parametrize(
     ("drop",),
     ((True,), (False,)),
diff --git a/tests/system/small/test_multiindex.py b/tests/system/small/test_multiindex.py
@@ -929,16 +929,30 @@ def test_column_multi_index_rename(scalars_df_index, scalars_pandas_df_index):
     pandas.testing.assert_frame_equal(bf_result, pd_result)
 
 
-def test_column_multi_index_reset_index(scalars_df_index, scalars_pandas_df_index):
+@pytest.mark.parametrize(
+    ("names", "col_fill", "col_level"),
+    [
+        (None, "", "l2"),
+        (("new_name"), "fill", 1),
+        ("new_name", "fill", 0),
+    ],
+)
+def test_column_multi_index_reset_index(
+    scalars_df_index, scalars_pandas_df_index, names, col_fill, col_level
+):
     columns = ["int64_too", "int64_col", "float64_col"]
-    multi_columns = pandas.MultiIndex.from_tuples(zip(["a", "b", "a"], ["a", "b", "b"]))
+    multi_columns = pandas.MultiIndex.from_tuples(
+        zip(["a", "b", "a"], ["a", "b", "b"]), names=["l1", "l2"]
+    )
     bf_df = scalars_df_index[columns].copy()
     bf_df.columns = multi_columns
     pd_df = scalars_pandas_df_index[columns].copy()
     pd_df.columns = multi_columns
 
-    bf_result = bf_df.reset_index().to_pandas()
-    pd_result = pd_df.reset_index()
+    bf_result = bf_df.reset_index(
+        names=names, col_fill=col_fill, col_level=col_level
+    ).to_pandas()
+    pd_result = pd_df.reset_index(names=names, col_fill=col_fill, col_level=col_level)
 
     # Pandas uses int64 instead of Int64 (nullable) dtype.
     pd_result.index = pd_result.index.astype(pandas.Int64Dtype())
diff --git a/tests/system/small/test_series.py b/tests/system/small/test_series.py
@@ -1339,6 +1339,32 @@ def test_reset_index_drop(scalars_df_index, scalars_pandas_df_index):
     pd.testing.assert_series_equal(bf_result.to_pandas(), pd_result)
 
 
+def test_series_reset_index_allow_duplicates(scalars_df_index, scalars_pandas_df_index):
+    bf_series = scalars_df_index["int64_col"].copy()
+    bf_series.index.name = "int64_col"
+    df = bf_series.reset_index(allow_duplicates=True, drop=False)
+    assert df.index.name is None
+
+    bf_result = df.to_pandas()
+
+    pd_series = scalars_pandas_df_index["int64_col"].copy()
+    pd_series.index.name = "int64_col"
+    pd_result = pd_series.reset_index(allow_duplicates=True, drop=False)
+
+    # Pandas uses int64 instead of Int64 (nullable) dtype.
+    pd_result.index = pd_result.index.astype(pd.Int64Dtype())
+
+    # reset_index should maintain the original ordering.
+    pd.testing.assert_frame_equal(bf_result, pd_result)
+
+
+def test_series_reset_index_duplicates_error(scalars_df_index):
+    scalars_df_index = scalars_df_index["int64_col"].copy()
+    scalars_df_index.index.name = "int64_col"
+    with pytest.raises(ValueError):
+        scalars_df_index.reset_index(allow_duplicates=False, drop=False)
+
+
 def test_series_reset_index_inplace(scalars_df_index, scalars_pandas_df_index):
     bf_result = scalars_df_index.sort_index(ascending=False)["float64_col"]
     bf_result.reset_index(drop=True, inplace=True)
diff --git a/third_party/bigframes_vendored/pandas/core/frame.py b/third_party/bigframes_vendored/pandas/core/frame.py
@@ -1605,6 +1605,10 @@ def reset_index(
         *,
         drop: bool = False,
         inplace: bool = False,
+        col_level: Hashable = 0,
+        col_fill: Hashable = "",
+        allow_duplicates: Optional[bool] = None,
+        names: Hashable | Sequence[Hashable] | None = None,
     ) -> DataFrame | None:
         """Reset the index.
 
@@ -1706,6 +1710,19 @@ class    name  speed   max
                 the index to the default integer index.
             inplace (bool, default False):
                 Whether to modify the DataFrame rather than creating a new one.
+            col_level (int or str, default 0):
+                If the columns have multiple levels, determines which level the
+                labels are inserted into. By default it is inserted into the first
+                level.
+            col_fill (object, default ''):
+                If the columns have multiple levels, determines how the other
+                levels are named. If None then the index name is repeated.
+            allow_duplicates (bool, optional, default None):
+                Allow duplicate column labels to be created.
+            names (str or 1-dimensional list, default None):
+                Using the given string, rename the DataFrame column which contains the
+                index data. If the DataFrame has a MultiIndex, this has to be a list or
+                tuple with length equal to the number of levels
 
         Returns:
             bigframes.pandas.DataFrame: DataFrame with the new index.
diff --git a/third_party/bigframes_vendored/pandas/core/series.py b/third_party/bigframes_vendored/pandas/core/series.py
@@ -326,6 +326,7 @@ def reset_index(
         drop: bool = False,
         name=pd_ext.no_default,
         inplace: bool = False,
+        allow_duplicates: Optional[bool] = None,
     ) -> DataFrame | Series | None:
         """
         Generate a new DataFrame or Series with the index reset.
@@ -413,6 +414,8 @@ def reset_index(
                 when `drop` is True.
             inplace (bool, default False):
                 Modify the Series in place (do not create a new object).
+            allow_duplicates (bool, optional, default None):
+                Allow duplicate column labels to be created.
 
         Returns:
             bigframes.pandas.Series or bigframes.pandas.DataFrame or None: