@@ -104,12 +104,6 @@ def flatten_nested_data(
104104 nested_columns = set (),
105105 )
106106
107- # Coordinates the flattening process:
108- # 1. Classifies columns into STRUCT, ARRAY, ARRAY-of-STRUCT, and standard types.
109- # 2. Flattens ARRAY-of-STRUCT columns into multiple ARRAY columns (one per struct field).
110- # This simplifies the subsequent explosion step.
111- # 3. Flattens top-level STRUCT columns into separate columns.
112- # 4. Explodes all ARRAY columns (original and those from step 2) into multiple rows.
113107 result_df = dataframe .copy ()
114108
115109 classification = _classify_columns (result_df )
@@ -161,8 +155,6 @@ def _classify_columns(
161155 Returns:
162156 A ColumnClassification object containing lists of column names for each category.
163157 """
164- # Inspects the PyArrow dtype of each column to determine if it is a
165- # STRUCT, LIST (Array), or LIST of STRUCTs.
166158 initial_columns = list (dataframe .columns )
167159 struct_columns : list [str ] = []
168160 array_columns : list [str ] = []
@@ -291,12 +283,9 @@ def _explode_array_columns(
291283 if not array_columns :
292284 return ExplodeResult (dataframe , [], set ())
293285
294- # Implementation details:
295- # - We group by all non-array columns to maintain context.
296- # - `_row_num` is used to track the index within the exploded array, effectively
297- # synchronizing multiple arrays if they belong to the same row.
298- # - Continuation rows (index > 0 in the explosion) are tracked so we can clear
299- # repeated values in the display.
286+ # Group by all non-array columns to maintain context.
287+ # _row_num tracks the index within the exploded array to synchronize multiple
288+ # arrays. Continuation rows (index > 0) are tracked for display clearing.
300289 original_cols = dataframe .columns .tolist ()
301290 work_df = dataframe
302291
0 commit comments