Skip to content

Commit 0c55b07

Browse files
fix: translate labels to col ids when copying dataframes (#1372)
* fix: translate labels to col ids when copying dataframes * polish error message * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * polish doc --------- Co-authored-by: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com>
1 parent 269f4b4 commit 0c55b07

File tree

3 files changed

+34
-12
lines changed

3 files changed

+34
-12
lines changed

bigframes/core/blocks.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -276,6 +276,26 @@ def label_to_col_id(self) -> typing.Mapping[Label, typing.Sequence[str]]:
276276
mapping[label] = (*mapping.get(label, ()), id)
277277
return mapping
278278

279+
def resolve_label_exact(self, label: Label) -> Optional[str]:
280+
"""Returns the column id matching the label if there is exactly
281+
one such column. If there are multiple columns with the same name,
282+
raises an error. If there is no such a column, returns None."""
283+
matches = self.label_to_col_id.get(label, [])
284+
if len(matches) > 1:
285+
raise ValueError(
286+
f"Multiple columns matching id {label} were found. {constants.FEEDBACK_LINK}"
287+
)
288+
return matches[0] if len(matches) != 0 else None
289+
290+
def resolve_label_exact_or_error(self, label: Label) -> str:
291+
"""Returns the column id matching the label if there is exactly
292+
one such column. If there are multiple columns with the same name,
293+
raises an error. If there is no such a column, raises an error too."""
294+
col_id = self.resolve_label_exact(label)
295+
if col_id is None:
296+
raise ValueError(f"Label {label} not found. {constants.FEEDBACK_LINK}")
297+
return col_id
298+
279299
@functools.cached_property
280300
def col_id_to_index_name(self) -> typing.Mapping[str, Label]:
281301
"""Get column label for value columns, or index name for index columns"""

bigframes/dataframe.py

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -180,7 +180,10 @@ def __init__(
180180
)
181181
block = block.set_index([r_mapping[idx_col] for idx_col in idx_cols])
182182
if columns:
183-
block = block.select_columns(list(columns)) # type:ignore
183+
column_ids = [
184+
block.resolve_label_exact_or_error(label) for label in list(columns)
185+
]
186+
block = block.select_columns(column_ids) # type:ignore
184187
if dtype:
185188
bf_dtype = bigframes.dtypes.bigframes_type(dtype)
186189
block = block.multi_apply_unary_op(ops.AsTypeOp(to_type=bf_dtype))
@@ -238,15 +241,7 @@ def _find_indices(
238241
return [self._block.value_columns.index(col_id) for col_id in col_ids]
239242

240243
def _resolve_label_exact(self, label) -> Optional[str]:
241-
"""Returns the column id matching the label if there is exactly
242-
one such column. If there are multiple columns with the same name,
243-
raises an error. If there is no such column, returns None."""
244-
matches = self._block.label_to_col_id.get(label, [])
245-
if len(matches) > 1:
246-
raise ValueError(
247-
f"Multiple columns matching id {label} were found. {constants.FEEDBACK_LINK}"
248-
)
249-
return matches[0] if len(matches) != 0 else None
244+
return self._block.resolve_label_exact(label)
250245

251246
def _sql_names(
252247
self,

tests/system/small/test_dataframe.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,15 @@
4444
def test_df_construct_copy(scalars_dfs):
4545
columns = ["int64_col", "string_col", "float64_col"]
4646
scalars_df, scalars_pandas_df = scalars_dfs
47-
bf_result = dataframe.DataFrame(scalars_df, columns=columns).to_pandas()
48-
pd_result = pd.DataFrame(scalars_pandas_df, columns=columns)
47+
# Make the mapping from label to col_id non-trivial
48+
bf_df = scalars_df.copy()
49+
bf_df["int64_col"] = bf_df["int64_col"] / 2
50+
pd_df = scalars_pandas_df.copy()
51+
pd_df["int64_col"] = pd_df["int64_col"] / 2
52+
53+
bf_result = dataframe.DataFrame(bf_df, columns=columns).to_pandas()
54+
55+
pd_result = pd.DataFrame(pd_df, columns=columns)
4956
pandas.testing.assert_frame_equal(bf_result, pd_result)
5057

5158

0 commit comments

Comments
 (0)