Skip to content

Commit c53da80

Browse files
committed
refactor: revert the refactor
1 parent 3affd92 commit c53da80

File tree

1 file changed

+23
-14
lines changed

1 file changed

+23
-14
lines changed

bigframes/display/_flatten.py

Lines changed: 23 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -248,31 +248,40 @@ def _flatten_struct_columns(
248248
clear_on_continuation_cols: list[str],
249249
nested_originated_columns: set[str],
250250
) -> tuple[pd.DataFrame, list[str]]:
251-
"""Flatten regular STRUCT columns using pandas accessor."""
251+
"""Flatten regular STRUCT columns."""
252252
result_df = dataframe.copy()
253253
for col_name in struct_columns:
254-
# Use pandas struct accessor to explode the struct column into a DataFrame of its fields
255-
exploded_struct = result_df[col_name].struct.explode()
254+
col_data = result_df[col_name]
255+
if isinstance(col_data.dtype, pd.ArrowDtype):
256+
pa_type = cast(pd.ArrowDtype, col_data.dtype).pyarrow_dtype
257+
258+
# Use PyArrow to flatten the struct column without row iteration
259+
# combine_chunks() ensures we have a single array if it was chunked
260+
arrow_array = pa.array(col_data)
261+
flattened_fields = arrow_array.flatten()
256262

257-
# Rename columns to 'parent.child' format
258-
exploded_struct.columns = [
259-
f"{col_name}.{sub_col}" for sub_col in exploded_struct.columns
260-
]
263+
new_cols_to_add = {}
264+
for field_idx in range(pa_type.num_fields):
265+
field = pa_type.field(field_idx)
266+
new_col_name = f"{col_name}.{field.name}"
267+
nested_originated_columns.add(new_col_name)
268+
clear_on_continuation_cols.append(new_col_name)
261269

262-
# Update metadata
263-
for new_col in exploded_struct.columns:
264-
nested_originated_columns.add(new_col)
265-
clear_on_continuation_cols.append(new_col)
270+
# Create a new Series from the flattened array
271+
new_cols_to_add[new_col_name] = pd.Series(
272+
flattened_fields[field_idx].to_pylist(),
273+
dtype=pd.ArrowDtype(field.type),
274+
index=result_df.index,
275+
)
266276

267-
# Replace the original struct column with the new field columns
268277
col_idx = result_df.columns.to_list().index(col_name)
278+
new_cols_df = pd.DataFrame(new_cols_to_add, index=result_df.index)
269279
result_df = pd.concat(
270280
[
271281
result_df.iloc[:, :col_idx],
272-
exploded_struct,
282+
new_cols_df,
273283
result_df.iloc[:, col_idx + 1 :],
274284
],
275285
axis=1,
276286
)
277-
278287
return result_df, clear_on_continuation_cols

0 commit comments

Comments
 (0)