Skip to content

Commit 84e6927

Browse files
feat: Can pivot unordered, unindexed dataframe
1 parent 209d0d4 commit 84e6927

File tree

4 files changed

+47
-7
lines changed

4 files changed

+47
-7
lines changed

bigframes/core/blocks.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2165,9 +2165,17 @@ def _get_unique_values(
21652165
import bigframes.core.block_transforms as block_tf
21662166
import bigframes.dataframe as df
21672167

2168-
unique_value_block = block_tf.drop_duplicates(
2169-
self.select_columns(columns), columns
2170-
)
2168+
if self.explicitly_ordered:
2169+
unique_value_block = block_tf.drop_duplicates(
2170+
self.select_columns(columns), columns
2171+
)
2172+
else:
2173+
unique_value_block, _ = self.aggregate(by_column_ids=columns, dropna=False)
2174+
col_labels = self._get_labels_for_columns(columns)
2175+
unique_value_block = unique_value_block.reset_index(
2176+
drop=False
2177+
).with_column_labels(col_labels)
2178+
21712179
pd_values = (
21722180
df.DataFrame(unique_value_block).head(max_unique_values + 1).to_pandas()
21732181
)

bigframes/dataframe.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3346,8 +3346,6 @@ def _pivot(
33463346
)
33473347
return DataFrame(pivot_block)
33483348

3349-
@validations.requires_index
3350-
@validations.requires_ordering()
33513349
def pivot(
33523350
self,
33533351
*,
@@ -3361,8 +3359,6 @@ def pivot(
33613359
) -> DataFrame:
33623360
return self._pivot(columns=columns, index=index, values=values)
33633361

3364-
@validations.requires_index
3365-
@validations.requires_ordering()
33663362
def pivot_table(
33673363
self,
33683364
values: typing.Optional[

tests/system/conftest.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -585,6 +585,18 @@ def scalars_df_null_index(
585585
).sort_values("rowindex")
586586

587587

588+
@pytest.fixture(scope="session")
589+
def scalars_df_unordered(
590+
scalars_table_id: str, unordered_session: bigframes.Session
591+
) -> bigframes.dataframe.DataFrame:
592+
"""DataFrame pointing at test data."""
593+
df = unordered_session.read_gbq(
594+
scalars_table_id, index_col=bigframes.enums.DefaultIndexKind.NULL
595+
)
596+
assert not df._block.explicitly_ordered
597+
return df
598+
599+
588600
@pytest.fixture(scope="session")
589601
def scalars_df_2_default_index(
590602
scalars_df_2_index: bigframes.dataframe.DataFrame,

tests/system/small/test_unordered.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -265,3 +265,27 @@ def test__resample_with_index(unordered_session, rule, origin, data):
265265
pd.testing.assert_frame_equal(
266266
bf_result, pd_result, check_dtype=False, check_index_type=False
267267
)
268+
269+
270+
@pytest.mark.parametrize(
271+
("values", "index", "columns"),
272+
[
273+
("int64_col", "int64_too", ["string_col"]),
274+
(["int64_col"], "int64_too", ["string_col"]),
275+
(["int64_col", "float64_col"], "int64_too", ["string_col"]),
276+
],
277+
)
278+
def test_unordered_df_pivot(
279+
scalars_df_unordered, scalars_pandas_df_index, values, index, columns
280+
):
281+
bf_result = scalars_df_unordered.pivot(
282+
values=values, index=index, columns=columns
283+
).to_pandas()
284+
pd_result = scalars_pandas_df_index.pivot(
285+
values=values, index=index, columns=columns
286+
)
287+
288+
# Pandas produces NaN, where bq dataframes produces pd.NA
289+
bf_result = bf_result.fillna(float("nan"))
290+
pd_result = pd_result.fillna(float("nan"))
291+
pd.testing.assert_frame_equal(bf_result, pd_result, check_dtype=False)

0 commit comments

Comments
 (0)