Skip to content

Commit 1a0f710

Browse files
feat: Can pivot unordered, unindexed dataframe (#2040)
1 parent 164c481 commit 1a0f710

File tree

4 files changed

+47
-7
lines changed

4 files changed

+47
-7
lines changed

bigframes/core/blocks.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2129,9 +2129,17 @@ def _get_unique_values(
21292129
import bigframes.core.block_transforms as block_tf
21302130
import bigframes.dataframe as df
21312131

2132-
unique_value_block = block_tf.drop_duplicates(
2133-
self.select_columns(columns), columns
2134-
)
2132+
if self.explicitly_ordered:
2133+
unique_value_block = block_tf.drop_duplicates(
2134+
self.select_columns(columns), columns
2135+
)
2136+
else:
2137+
unique_value_block, _ = self.aggregate(by_column_ids=columns, dropna=False)
2138+
col_labels = self._get_labels_for_columns(columns)
2139+
unique_value_block = unique_value_block.reset_index(
2140+
drop=False
2141+
).with_column_labels(col_labels)
2142+
21352143
pd_values = (
21362144
df.DataFrame(unique_value_block).head(max_unique_values + 1).to_pandas()
21372145
)

bigframes/dataframe.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3347,8 +3347,6 @@ def _pivot(
33473347
)
33483348
return DataFrame(pivot_block)
33493349

3350-
@validations.requires_index
3351-
@validations.requires_ordering()
33523350
def pivot(
33533351
self,
33543352
*,
@@ -3362,8 +3360,6 @@ def pivot(
33623360
) -> DataFrame:
33633361
return self._pivot(columns=columns, index=index, values=values)
33643362

3365-
@validations.requires_index
3366-
@validations.requires_ordering()
33673363
def pivot_table(
33683364
self,
33693365
values: typing.Optional[

tests/system/conftest.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -585,6 +585,18 @@ def scalars_df_null_index(
585585
).sort_values("rowindex")
586586

587587

588+
@pytest.fixture(scope="session")
589+
def scalars_df_unordered(
590+
scalars_table_id: str, unordered_session: bigframes.Session
591+
) -> bigframes.dataframe.DataFrame:
592+
"""DataFrame pointing at test data."""
593+
df = unordered_session.read_gbq(
594+
scalars_table_id, index_col=bigframes.enums.DefaultIndexKind.NULL
595+
)
596+
assert not df._block.explicitly_ordered
597+
return df
598+
599+
588600
@pytest.fixture(scope="session")
589601
def scalars_df_2_default_index(
590602
scalars_df_2_index: bigframes.dataframe.DataFrame,

tests/system/small/test_unordered.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -265,3 +265,27 @@ def test__resample_with_index(unordered_session, rule, origin, data):
265265
pd.testing.assert_frame_equal(
266266
bf_result, pd_result, check_dtype=False, check_index_type=False
267267
)
268+
269+
270+
@pytest.mark.parametrize(
271+
("values", "index", "columns"),
272+
[
273+
("int64_col", "int64_too", ["string_col"]),
274+
(["int64_col"], "int64_too", ["string_col"]),
275+
(["int64_col", "float64_col"], "int64_too", ["string_col"]),
276+
],
277+
)
278+
def test_unordered_df_pivot(
279+
scalars_df_unordered, scalars_pandas_df_index, values, index, columns
280+
):
281+
bf_result = scalars_df_unordered.pivot(
282+
values=values, index=index, columns=columns
283+
).to_pandas()
284+
pd_result = scalars_pandas_df_index.pivot(
285+
values=values, index=index, columns=columns
286+
)
287+
288+
# Pandas produces NaN, where bq dataframes produces pd.NA
289+
bf_result = bf_result.fillna(float("nan"))
290+
pd_result = pd_result.fillna(float("nan"))
291+
pd.testing.assert_frame_equal(bf_result, pd_result, check_dtype=False)

0 commit comments

Comments
 (0)