Skip to content

Commit 2de5a3c

Browse files
committed
fix: resolve NameError for ExplodeResult and formatting
1 parent 09635e6 commit 2de5a3c

File tree

1 file changed

+60
-18
lines changed

1 file changed

+60
-18
lines changed

bigframes/display/_flatten.py

Lines changed: 60 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -70,18 +70,33 @@ class ColumnClassification:
7070

7171

7272
@dataclasses.dataclass(frozen=True)
73-
class ExplodeResult:
74-
"""The result of exploding array columns.
73+
class FlattenArrayOfStructsResult:
74+
"""The result of flattening array-of-struct columns.
7575
7676
Attributes:
77-
dataframe: The exploded DataFrame.
78-
row_labels: Labels for the rows.
79-
continuation_rows: Indices of continuation rows.
77+
dataframe: The flattened DataFrame.
78+
array_columns: The updated list of array columns.
79+
nested_originated_columns: The updated set of columns created from nested data.
8080
"""
8181

8282
dataframe: pd.DataFrame
83-
row_labels: list[str]
84-
continuation_rows: set[int]
83+
array_columns: tuple[str, ...]
84+
nested_originated_columns: frozenset[str]
85+
86+
87+
@dataclasses.dataclass(frozen=True)
88+
class FlattenStructsResult:
89+
"""The result of flattening struct columns.
90+
91+
Attributes:
92+
dataframe: The flattened DataFrame.
93+
clear_on_continuation_cols: The updated list of columns to clear on continuation.
94+
nested_originated_columns: The updated set of columns created from nested data.
95+
"""
96+
97+
dataframe: pd.DataFrame
98+
clear_on_continuation_cols: tuple[str, ...]
99+
nested_originated_columns: frozenset[str]
85100

86101

87102
def flatten_nested_data(
@@ -109,27 +124,31 @@ def flatten_nested_data(
109124
classification = _classify_columns(result_df)
110125

111126
# Process ARRAY-of-STRUCT columns into multiple ARRAY columns (one per struct field).
112-
result_df, array_cols, nested_cols = _flatten_array_of_struct_columns(
127+
flatten_array_structs_result = _flatten_array_of_struct_columns(
113128
result_df,
114129
classification.array_of_struct_columns,
115130
classification.array_columns,
116131
classification.nested_originated_columns,
117132
)
133+
result_df = flatten_array_structs_result.dataframe
118134
classification = dataclasses.replace(
119-
classification, array_columns=array_cols, nested_originated_columns=nested_cols
135+
classification,
136+
array_columns=flatten_array_structs_result.array_columns,
137+
nested_originated_columns=flatten_array_structs_result.nested_originated_columns,
120138
)
121139

122140
# Flatten top-level STRUCT columns into separate columns.
123-
result_df, clear_cols, nested_cols = _flatten_struct_columns(
141+
flatten_structs_result = _flatten_struct_columns(
124142
result_df,
125143
classification.struct_columns,
126144
classification.clear_on_continuation_cols,
127145
classification.nested_originated_columns,
128146
)
147+
result_df = flatten_structs_result.dataframe
129148
classification = dataclasses.replace(
130149
classification,
131-
clear_on_continuation_cols=clear_cols,
132-
nested_originated_columns=nested_cols,
150+
clear_on_continuation_cols=flatten_structs_result.clear_on_continuation_cols,
151+
nested_originated_columns=flatten_structs_result.nested_originated_columns,
133152
)
134153

135154
# Now handle ARRAY columns (including the newly created ones from ARRAY of STRUCT)
@@ -206,7 +225,7 @@ def _flatten_array_of_struct_columns(
206225
array_of_struct_columns: tuple[str, ...],
207226
array_columns: tuple[str, ...],
208227
nested_originated_columns: frozenset[str],
209-
) -> tuple[pd.DataFrame, tuple[str, ...], frozenset[str]]:
228+
) -> FlattenArrayOfStructsResult:
210229
"""Flatten ARRAY of STRUCT columns into separate ARRAY columns for each field.
211230
212231
Args:
@@ -216,7 +235,7 @@ def _flatten_array_of_struct_columns(
216235
nested_originated_columns: Columns tracked as originating from nested data.
217236
218237
Returns:
219-
A tuple containing the modified DataFrame, updated array columns, and updated nested columns.
238+
A FlattenArrayOfStructsResult containing the updated DataFrame and columns.
220239
"""
221240
result_df = dataframe.copy()
222241
current_array_columns = list(array_columns)
@@ -245,7 +264,11 @@ def _flatten_array_of_struct_columns(
245264
current_array_columns.remove(col_name)
246265
current_array_columns.extend(new_cols_df.columns.tolist())
247266

248-
return result_df, tuple(current_array_columns), frozenset(current_nested_columns)
267+
return FlattenArrayOfStructsResult(
268+
dataframe=result_df,
269+
array_columns=tuple(current_array_columns),
270+
nested_originated_columns=frozenset(current_nested_columns),
271+
)
249272

250273

251274
def _transpose_list_of_structs(arrow_array: pa.ListArray) -> dict[str, pa.ListArray]:
@@ -299,6 +322,21 @@ def _replace_column_in_df(
299322
)
300323

301324

325+
@dataclasses.dataclass(frozen=True)
326+
class ExplodeResult:
327+
"""The result of exploding array columns.
328+
329+
Attributes:
330+
dataframe: The exploded DataFrame.
331+
row_labels: Labels for the rows.
332+
continuation_rows: Indices of continuation rows.
333+
"""
334+
335+
dataframe: pd.DataFrame
336+
row_labels: list[str]
337+
continuation_rows: set[int]
338+
339+
302340
def _explode_array_columns(
303341
dataframe: pd.DataFrame, array_columns: list[str]
304342
) -> ExplodeResult:
@@ -407,7 +445,7 @@ def _flatten_struct_columns(
407445
struct_columns: tuple[str, ...],
408446
clear_on_continuation_cols: tuple[str, ...],
409447
nested_originated_columns: frozenset[str],
410-
) -> tuple[pd.DataFrame, tuple[str, ...], frozenset[str]]:
448+
) -> FlattenStructsResult:
411449
"""Flatten regular STRUCT columns into separate columns.
412450
413451
Args:
@@ -417,7 +455,7 @@ def _flatten_struct_columns(
417455
nested_originated_columns: Columns tracked as originating from nested data.
418456
419457
Returns:
420-
A tuple containing the modified DataFrame, updated clear columns, and updated nested columns.
458+
A FlattenStructsResult containing the updated DataFrame and columns.
421459
"""
422460
result_df = dataframe.copy()
423461
current_clear_cols = list(clear_on_continuation_cols)
@@ -450,4 +488,8 @@ def _flatten_struct_columns(
450488
new_cols_df = pd.DataFrame(new_cols_to_add, index=result_df.index)
451489
result_df = _replace_column_in_df(result_df, col_name, new_cols_df)
452490

453-
return result_df, tuple(current_clear_cols), frozenset(current_nested_cols)
491+
return FlattenStructsResult(
492+
dataframe=result_df,
493+
clear_on_continuation_cols=tuple(current_clear_cols),
494+
nested_originated_columns=frozenset(current_nested_cols),
495+
)

0 commit comments

Comments
 (0)