Skip to content

Commit cc339e9

Browse files
fix: Fix bug selecting column repeatedly (#1858)
1 parent ed75cd9 commit cc339e9

File tree

3 files changed

+33
-6
lines changed

3 files changed

+33
-6
lines changed

bigframes/core/array_value.py

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -330,12 +330,27 @@ def create_constant(
330330

331331
return self.project_to_id(ex.const(value, dtype))
332332

333-
def select_columns(self, column_ids: typing.Sequence[str]) -> ArrayValue:
333+
def select_columns(
334+
self, column_ids: typing.Sequence[str], allow_renames: bool = False
335+
) -> ArrayValue:
334336
# This basically just drops and reorders columns - logically a no-op except as a final step
335-
selections = (
336-
bigframes.core.nodes.AliasedRef.identity(ids.ColumnId(col_id))
337-
for col_id in column_ids
338-
)
337+
selections = []
338+
seen = set()
339+
340+
for id in column_ids:
341+
if id not in seen:
342+
ref = nodes.AliasedRef.identity(ids.ColumnId(id))
343+
elif allow_renames:
344+
ref = nodes.AliasedRef(
345+
ex.deref(id), ids.ColumnId(bigframes.core.guid.generate_guid())
346+
)
347+
else:
348+
raise ValueError(
349+
"Must set allow_renames=True to select columns repeatedly"
350+
)
351+
selections.append(ref)
352+
seen.add(id)
353+
339354
return ArrayValue(
340355
nodes.SelectionNode(
341356
child=self.node,

bigframes/core/blocks.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1210,7 +1210,10 @@ def select_column(self, id: str) -> Block:
12101210
return self.select_columns([id])
12111211

12121212
def select_columns(self, ids: typing.Sequence[str]) -> Block:
1213-
expr = self._expr.select_columns([*self.index_columns, *ids])
1213+
# Allow renames as may end up selecting same columns multiple times
1214+
expr = self._expr.select_columns(
1215+
[*self.index_columns, *ids], allow_renames=True
1216+
)
12141217
col_labels = self._get_labels_for_columns(ids)
12151218
return Block(expr, self.index_columns, col_labels, self.index.names)
12161219

tests/system/small/test_dataframe.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3408,6 +3408,15 @@ def test__dir__with_rename(scalars_dfs):
34083408
assert "drop" in results
34093409

34103410

3411+
def test_loc_select_columns_w_repeats(scalars_df_index, scalars_pandas_df_index):
3412+
bf_result = scalars_df_index[["int64_col", "int64_col", "int64_too"]].to_pandas()
3413+
pd_result = scalars_pandas_df_index[["int64_col", "int64_col", "int64_too"]]
3414+
pd.testing.assert_frame_equal(
3415+
bf_result,
3416+
pd_result,
3417+
)
3418+
3419+
34113420
@pytest.mark.parametrize(
34123421
("start", "stop", "step"),
34133422
[

0 commit comments

Comments
 (0)