diff --git a/bigframes/core/blocks.py b/bigframes/core/blocks.py index 283f56fd39..f7d456bf9d 100644 --- a/bigframes/core/blocks.py +++ b/bigframes/core/blocks.py @@ -2129,9 +2129,17 @@ def _get_unique_values( import bigframes.core.block_transforms as block_tf import bigframes.dataframe as df - unique_value_block = block_tf.drop_duplicates( - self.select_columns(columns), columns - ) + if self.explicitly_ordered: + unique_value_block = block_tf.drop_duplicates( + self.select_columns(columns), columns + ) + else: + unique_value_block, _ = self.aggregate(by_column_ids=columns, dropna=False) + col_labels = self._get_labels_for_columns(columns) + unique_value_block = unique_value_block.reset_index( + drop=False + ).with_column_labels(col_labels) + pd_values = ( df.DataFrame(unique_value_block).head(max_unique_values + 1).to_pandas() ) diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py index 85760d94bc..f2035441ec 100644 --- a/bigframes/dataframe.py +++ b/bigframes/dataframe.py @@ -3346,8 +3346,6 @@ def _pivot( ) return DataFrame(pivot_block) - @validations.requires_index - @validations.requires_ordering() def pivot( self, *, @@ -3361,8 +3359,6 @@ def pivot( ) -> DataFrame: return self._pivot(columns=columns, index=index, values=values) - @validations.requires_index - @validations.requires_ordering() def pivot_table( self, values: typing.Optional[ diff --git a/tests/system/conftest.py b/tests/system/conftest.py index a75918ed23..70a379fe0e 100644 --- a/tests/system/conftest.py +++ b/tests/system/conftest.py @@ -585,6 +585,18 @@ def scalars_df_null_index( ).sort_values("rowindex") +@pytest.fixture(scope="session") +def scalars_df_unordered( + scalars_table_id: str, unordered_session: bigframes.Session +) -> bigframes.dataframe.DataFrame: + """DataFrame pointing at test data.""" + df = unordered_session.read_gbq( + scalars_table_id, index_col=bigframes.enums.DefaultIndexKind.NULL + ) + assert not df._block.explicitly_ordered + return df + + @pytest.fixture(scope="session") def scalars_df_2_default_index( scalars_df_2_index: bigframes.dataframe.DataFrame, diff --git a/tests/system/small/test_unordered.py b/tests/system/small/test_unordered.py index 0825b78037..ccb2140799 100644 --- a/tests/system/small/test_unordered.py +++ b/tests/system/small/test_unordered.py @@ -265,3 +265,27 @@ def test__resample_with_index(unordered_session, rule, origin, data): pd.testing.assert_frame_equal( bf_result, pd_result, check_dtype=False, check_index_type=False ) + + +@pytest.mark.parametrize( + ("values", "index", "columns"), + [ + ("int64_col", "int64_too", ["string_col"]), + (["int64_col"], "int64_too", ["string_col"]), + (["int64_col", "float64_col"], "int64_too", ["string_col"]), + ], +) +def test_unordered_df_pivot( + scalars_df_unordered, scalars_pandas_df_index, values, index, columns +): + bf_result = scalars_df_unordered.pivot( + values=values, index=index, columns=columns + ).to_pandas() + pd_result = scalars_pandas_df_index.pivot( + values=values, index=index, columns=columns + ) + + # Pandas produces NaN, where bq dataframes produces pd.NA + bf_result = bf_result.fillna(float("nan")) + pd_result = pd_result.fillna(float("nan")) + pd.testing.assert_frame_equal(bf_result, pd_result, check_dtype=False)