perf(display): optimize nested data flattening and fix js style

shuoweil · shuoweil · commit 0b73c0adaa3b · 2026-01-08T01:18:23.000Z
diff --git a/bigframes/display/_flatten.py b/bigframes/display/_flatten.py
@@ -30,11 +30,11 @@ class FlattenResult:
     dataframe: pd.DataFrame
     """The flattened DataFrame."""
 
-    row_groups: dict[str, list[int]]
-    """
-    A mapping from original row index to the new row indices that were created
-    from it.
-    """
+    row_labels: list[str] | None
+    """A list of original row labels for each row in the flattened DataFrame."""
+
+    continuation_rows: set[int] | None
+    """A set of row indices that are continuation rows."""
 
     cleared_on_continuation: list[str]
     """A list of column names that should be cleared on continuation rows."""
@@ -50,7 +50,8 @@ def flatten_nested_data(
     if dataframe.empty:
         return FlattenResult(
             dataframe=dataframe.copy(),
-            row_groups={},
+            row_labels=None,
+            continuation_rows=None,
             cleared_on_continuation=[],
             nested_columns=set(),
         )
@@ -77,15 +78,19 @@ def flatten_nested_data(
     if not array_columns:
         return FlattenResult(
             dataframe=result_df,
-            row_groups={},
+            row_labels=None,
+            continuation_rows=None,
             cleared_on_continuation=clear_on_continuation_cols,
             nested_columns=nested_originated_columns,
         )
 
-    result_df, array_row_groups = _explode_array_columns(result_df, array_columns)
+    result_df, row_labels, continuation_rows = _explode_array_columns(
+        result_df, array_columns
+    )
     return FlattenResult(
         dataframe=result_df,
-        row_groups=array_row_groups,
+        row_labels=row_labels,
+        continuation_rows=continuation_rows,
         cleared_on_continuation=clear_on_continuation_cols,
         nested_columns=nested_originated_columns,
     )
@@ -192,10 +197,10 @@ def _flatten_array_of_struct_columns(
 
 def _explode_array_columns(
     dataframe: pd.DataFrame, array_columns: list[str]
-) -> tuple[pd.DataFrame, dict[str, list[int]]]:
+) -> tuple[pd.DataFrame, list[str], set[int]]:
     """Explode array columns into new rows."""
     if not array_columns:
-        return dataframe, {}
+        return dataframe, [], set()
 
     original_cols = dataframe.columns.tolist()
     work_df = dataframe
@@ -243,7 +248,7 @@ def _explode_array_columns(
 
     if not exploded_dfs:
         # This should not be reached if array_columns is not empty
-        return dataframe, {}
+        return dataframe, [], set()
 
     # Merge the exploded columns
     merged_df = exploded_dfs[0]
@@ -260,22 +265,20 @@ def _explode_array_columns(
         drop=True
     )
 
-    # Create row groups
-    array_row_groups = {}
+    # Generate row labels and continuation mask efficiently
     grouping_col_name = (
         "_original_index" if original_index_name is None else original_index_name
     )
-    if grouping_col_name in merged_df.columns:
-        for orig_idx, group in merged_df.groupby(grouping_col_name):
-            array_row_groups[str(orig_idx)] = group.index.tolist()
+    row_labels = merged_df[grouping_col_name].astype(str).tolist()
+    continuation_rows = set(merged_df.index[merged_df["_row_num"] > 0])
 
     # Restore original columns
     result_df = merged_df[original_cols]
 
     if original_index_name:
         result_df = result_df.set_index(original_index_name)
 
-    return result_df, array_row_groups
+    return result_df, row_labels, continuation_rows
 
 
 def _flatten_struct_columns(
diff --git a/bigframes/display/html.py b/bigframes/display/html.py
@@ -60,7 +60,8 @@ def render_html(
     table_html_parts.append(
         _render_table_body(
             flatten_result.dataframe,
-            flatten_result.row_groups,
+            flatten_result.row_labels,
+            flatten_result.continuation_rows,
             flatten_result.cleared_on_continuation,
             flatten_result.nested_columns,
         )
@@ -87,7 +88,8 @@ def _render_table_header(dataframe: pd.DataFrame, orderable_columns: list[str])
 
 def _render_table_body(
     dataframe: pd.DataFrame,
-    array_row_groups: dict[str, list[int]],
+    row_labels: list[str] | None,
+    continuation_rows: set[int] | None,
     clear_on_continuation: list[str],
     nested_originated_columns: set[str],
 ) -> str:
@@ -99,14 +101,15 @@ def _render_table_body(
         row_class = ""
         orig_row_idx = None
         is_continuation = False
-        for orig_key, row_indices in array_row_groups.items():
-            if i in row_indices and row_indices[0] != i:
-                row_class = "array-continuation"
-                orig_row_idx = orig_key
-                is_continuation = True
-                break
-
-        if row_class:
+
+        if row_labels:
+            orig_row_idx = row_labels[i]
+
+        if continuation_rows and i in continuation_rows:
+            is_continuation = True
+            row_class = "array-continuation"
+
+        if orig_row_idx is not None:
             body_parts.append(
                 f'    <tr class="{row_class}" data-orig-row="{orig_row_idx}">'
             )
diff --git a/tests/js/table_widget.test.js b/tests/js/table_widget.test.js
@@ -20,7 +20,7 @@
 
 import { jest } from '@jest/globals';
 
-/**
+/*
  * Test suite for the TableWidget frontend component.
  */
 describe('TableWidget', () => {
@@ -31,7 +31,7 @@ describe('TableWidget', () => {
   /** @type {Function} */
   let render;
 
-  /**
+  /*
    * Sets up the test environment before each test.
    * This includes resetting modules, creating a DOM element,
    * and mocking the widget model.
@@ -58,7 +58,7 @@ describe('TableWidget', () => {
     expect(render).toBeDefined();
   });
 
-  /**
+  /*
    * Tests for the render function of the widget.
    */
   describe('render', () => {
@@ -91,7 +91,7 @@ describe('TableWidget', () => {
       expect(el.querySelector('div:nth-child(3)')).not.toBeNull();
     });
 
-    /**
+    /*
      * Verifies that clicking a sortable column header triggers a sort action
      * with the correct parameters.
      */
@@ -220,7 +220,7 @@ describe('TableWidget', () => {
       expect(indicator2.textContent).toBe('●');
     });
 
-    /**
+    /*
      * Tests that holding the Shift key while clicking a column header
      * adds the new column to the existing sort context for multi-column sorting.
      */
@@ -362,7 +362,7 @@ describe('TableWidget', () => {
     expect(headers[1].textContent).toBe('value');
   });
 
-  /**
+  /*
    * Verifies that hovering over a cell in a group of flattened rows
    * (i.e., rows originating from the same nested data structure)
    * adds a hover class to all cells in that group.
diff --git a/tests/unit/display/test_html.py b/tests/unit/display/test_html.py
@@ -190,12 +190,11 @@ def test_flatten_nested_data_explodes_arrays():
 
     result = flatten_nested_data(array_data)
     flattened = result.dataframe
-    groups = result.row_groups
+    row_labels = result.row_labels
+    continuation_rows = result.continuation_rows
     nested_originated_columns = result.nested_columns
 
     assert len(flattened) == 5  # 3 + 2 array elements
-    assert "0" in groups  # First original row
-    assert len(groups["0"]) == 3  # Three array elements
-    assert "1" in groups
-    assert len(groups["1"]) == 2
+    assert row_labels == ["0", "0", "0", "1", "1"]
+    assert continuation_rows == {1, 2, 4}
     assert "array_col" in nested_originated_columns