googleapis
diff --git a/‎bigframes/display/anywidget.py‎
Lines changed: 72 additions & 59 deletions b/‎bigframes/display/anywidget.py‎
Lines changed: 72 additions & 59 deletions
diff --git a/‎bigframes/display/html.py‎
Lines changed: 84 additions & 52 deletions b/‎bigframes/display/html.py‎
Lines changed: 84 additions & 52 deletions
@@ -39,15 +39,15 @@
     import anywidget
     import traitlets
 
-    ANYWIDGET_INSTALLED = True
+    _ANYWIDGET_INSTALLED = True
 except Exception:
-    ANYWIDGET_INSTALLED = False
+    _ANYWIDGET_INSTALLED = False
 
-WIDGET_BASE: type[Any]
-if ANYWIDGET_INSTALLED:
-    WIDGET_BASE = anywidget.AnyWidget
+_WIDGET_BASE: type[Any]
+if _ANYWIDGET_INSTALLED:
+    _WIDGET_BASE = anywidget.AnyWidget
 else:
-    WIDGET_BASE = object
+    _WIDGET_BASE = object
 
 
 @dataclasses.dataclass(frozen=True)
@@ -56,7 +56,7 @@ class _SortState:
     ascending: bool
 
 
-class TableWidget(WIDGET_BASE):
+class TableWidget(_WIDGET_BASE):
     """An interactive, paginated table widget for BigFrames DataFrames.
 
     This widget provides a user-friendly way to display and navigate through
@@ -82,7 +82,7 @@ def __init__(self, dataframe: bigframes.dataframe.DataFrame):
         Args:
             dataframe: The Bigframes Dataframe to display in the widget.
         """
-        if not ANYWIDGET_INSTALLED:
+        if not _ANYWIDGET_INSTALLED:
             raise ImportError(
                 "Please `pip install anywidget traitlets` or "
                 "`pip install 'bigframes[anywidget]'` to use TableWidget."
@@ -105,6 +105,7 @@ def __init__(self, dataframe: bigframes.dataframe.DataFrame):
         # set traitlets properties that trigger observers
         # TODO(b/462525985): Investigate and improve TableWidget UX for DataFrames with a large number of columns.
         self.page_size = initial_page_size
+        # TODO(b/469861913): Nested columns from structs (e.g., 'struct_col.name') are not currently sortable.
         # TODO(b/463754889): Support non-string column labels for sorting.
         if all(isinstance(col, str) for col in dataframe.columns):
             self.orderable_columns = [
@@ -115,6 +116,14 @@ def __init__(self, dataframe: bigframes.dataframe.DataFrame):
         else:
             self.orderable_columns = []
 
+        self._initial_load()
+
+        # Signals to the frontend that the initial data load is complete.
+        # Also used as a guard to prevent observers from firing during initialization.
+        self._initial_load_complete = True
+
+    def _initial_load(self):
+        """Get initial data and row count."""
         # obtain the row counts
         # TODO(b/428238610): Start iterating over the result of `to_pandas_batches()`
         # before we get here so that the count might already be cached.
@@ -138,10 +147,6 @@ def __init__(self, dataframe: bigframes.dataframe.DataFrame):
         # get the initial page
         self._set_table_html()
 
-        # Signals to the frontend that the initial data load is complete.
-        # Also used as a guard to prevent observers from firing during initialization.
-        self._initial_load_complete = True
-
     @traitlets.observe("_initial_load_complete")
     def _on_initial_load_complete(self, change: dict[str, Any]):
         if change["new"]:
@@ -294,53 +299,8 @@ def _set_table_html(self) -> None:
                 )
                 self.page = 0  # Reset to first page
 
-            page_data = pd.DataFrame()
-            # This loop is to handle auto-correction of page number when row count is unknown
-            while True:
-                start = self.page * self.page_size
-                end = start + self.page_size
-
-                # fetch more data if the requested page is outside our cache
-                cached_data = self._cached_data
-                while len(cached_data) < end and not self._all_data_loaded:
-                    if self._get_next_batch():
-                        cached_data = self._cached_data
-                    else:
-                        break
-
-                # Get the data for the current page
-                page_data = cached_data.iloc[start:end].copy()
-
-                # Handle case where user navigated beyond available data with unknown row count
-                is_unknown_count = self.row_count is None
-                is_beyond_data = (
-                    self._all_data_loaded and len(page_data) == 0 and self.page > 0
-                )
-                if is_unknown_count and is_beyond_data:
-                    # Calculate the last valid page (zero-indexed)
-                    total_rows = len(cached_data)
-                    last_valid_page = max(0, math.ceil(total_rows / self.page_size) - 1)
-                    # Navigate back to the last valid page
-                    self.page = last_valid_page
-                    # Continue the loop to re-calculate page data
-                    continue
-
-                # If page is valid, break out of the loop.
-                break
-
-            # Handle index display
-            if self._dataframe._block.has_index:
-                is_unnamed_single_index = (
-                    page_data.index.name is None
-                    and not isinstance(page_data.index, pd.MultiIndex)
-                )
-                page_data = page_data.reset_index()
-                if is_unnamed_single_index and "index" in page_data.columns:
-                    page_data.rename(columns={"index": ""}, inplace=True)
-
-            # Default index - include as "Row" column if no index was present originally
-            if not self._dataframe._block.has_index:
-                page_data.insert(0, "Row", range(start + 1, start + len(page_data) + 1))
+            page_data = self._get_page_data()
+            page_data = self._prepare_dataframe_for_display(page_data)
 
             # Generate HTML table
             self.table_html = bigframes.display.html.render_html(
@@ -350,6 +310,59 @@ def _set_table_html(self) -> None:
         finally:
             delattr(self, "_setting_html")
 
+    def _get_page_data(self) -> pd.DataFrame:
+        """Get the data for the current page, handling unknown row count."""
+        # This loop is to handle auto-correction of page number when row count is unknown
+        while True:
+            start = self.page * self.page_size
+            end = start + self.page_size
+
+            # fetch more data if the requested page is outside our cache
+            cached_data = self._cached_data
+            while len(cached_data) < end and not self._all_data_loaded:
+                if self._get_next_batch():
+                    cached_data = self._cached_data
+                else:
+                    break
+
+            # Get the data for the current page
+            page_data = cached_data.iloc[start:end].copy()
+
+            # Handle case where user navigated beyond available data with unknown row count
+            is_unknown_count = self.row_count is None
+            is_beyond_data = (
+                self._all_data_loaded and len(page_data) == 0 and self.page > 0
+            )
+            if is_unknown_count and is_beyond_data:
+                # Calculate the last valid page (zero-indexed)
+                total_rows = len(cached_data)
+                last_valid_page = max(0, math.ceil(total_rows / self.page_size) - 1)
+                # Navigate back to the last valid page
+                self.page = last_valid_page
+                # Continue the loop to re-calculate page data
+                continue
+
+            # If page is valid, break out of the loop.
+            return page_data
+
+    def _prepare_dataframe_for_display(self, page_data: pd.DataFrame) -> pd.DataFrame:
+        """Prepare the DataFrame for display, handling index and row numbers."""
+        start = self.page * self.page_size
+        # Handle index display
+        if self._dataframe._block.has_index:
+            is_unnamed_single_index = page_data.index.name is None and not isinstance(
+                page_data.index, pd.MultiIndex
+            )
+            page_data = page_data.reset_index()
+            if is_unnamed_single_index and "index" in page_data.columns:
+                page_data.rename(columns={"index": ""}, inplace=True)
+
+        # Default index - include as "Row" column if no index was present originally
+        if not self._dataframe._block.has_index:
+            page_data.insert(0, "Row", range(start + 1, start + len(page_data) + 1))
+
+        return page_data
+
     @traitlets.observe("sort_column", "sort_ascending")
     def _sort_changed(self, _change: dict[str, Any]):
         """Handler for when sorting parameters change from the frontend."""
 
@@ -56,18 +56,53 @@ def _flatten_nested_data(
         return dataframe.copy(), {}, [], set()
 
     result_df = _try_parse_json_strings(dataframe)
-    initial_columns = list(result_df.columns)
 
-    array_row_groups: dict[str, list[int]] = {}
-    nested_originated_columns: set[str] = set()
+    (
+        struct_columns,
+        array_columns,
+        array_of_struct_columns,
+        clear_on_continuation_cols,
+        nested_originated_columns,
+    ) = _classify_columns(result_df)
+
+    result_df, array_columns = _flatten_array_of_struct_columns(
+        result_df, array_of_struct_columns, array_columns, nested_originated_columns
+    )
 
-    # First, identify all STRUCT and ARRAY columns
+    result_df, clear_on_continuation_cols = _flatten_struct_columns(
+        result_df, struct_columns, clear_on_continuation_cols, nested_originated_columns
+    )
+
+    # Now handle ARRAY columns (including the newly created ones from ARRAY of STRUCT)
+    if not array_columns:
+        return (
+            result_df,
+            {},
+            clear_on_continuation_cols,
+            nested_originated_columns,
+        )
+
+    result_df, array_row_groups = _explode_array_columns(result_df, array_columns)
+    return (
+        result_df,
+        array_row_groups,
+        clear_on_continuation_cols,
+        nested_originated_columns,
+    )
+
+
+def _classify_columns(
+    dataframe: pd.DataFrame,
+) -> tuple[list[str], list[str], list[str], list[str], set[str]]:
+    """Identify all STRUCT and ARRAY columns."""
+    initial_columns = list(dataframe.columns)
     struct_columns: list[str] = []
     array_columns: list[str] = []
     array_of_struct_columns: list[str] = []
     clear_on_continuation_cols: list[str] = []
+    nested_originated_columns: set[str] = set()
 
-    for col_name_raw, col_data in result_df.items():
+    for col_name_raw, col_data in dataframe.items():
         col_name = str(col_name_raw)
         dtype = col_data.dtype
         if isinstance(dtype, pd.ArrowDtype):
@@ -86,28 +121,10 @@ def _flatten_nested_data(
                 clear_on_continuation_cols.append(col_name)
         elif col_name in initial_columns:
             clear_on_continuation_cols.append(col_name)
-
-    result_df, array_columns = _flatten_array_of_struct_columns(
-        result_df, array_of_struct_columns, array_columns, nested_originated_columns
-    )
-
-    result_df, clear_on_continuation_cols = _flatten_struct_columns(
-        result_df, struct_columns, clear_on_continuation_cols, nested_originated_columns
-    )
-
-    # Now handle ARRAY columns (including the newly created ones from ARRAY of STRUCT)
-    if not array_columns:
-        return (
-            result_df,
-            array_row_groups,
-            clear_on_continuation_cols,
-            nested_originated_columns,
-        )
-
-    result_df, array_row_groups = _explode_array_columns(result_df, array_columns)
     return (
-        result_df,
-        array_row_groups,
+        struct_columns,
+        array_columns,
+        array_of_struct_columns,
         clear_on_continuation_cols,
         nested_originated_columns,
     )
@@ -292,25 +309,43 @@ def render_html(
     ) = _flatten_nested_data(dataframe)
 
     classes = "dataframe table table-striped table-hover"
-    table_html_parts = []
-    precision = options.display.precision
+    table_html_parts = [f'<table border="1" class="{classes}" id="{table_id}">\n']
+    table_html_parts.append(_render_table_header(flattened_df))
+    table_html_parts.append(
+        _render_table_body(
+            flattened_df,
+            array_row_groups,
+            clear_on_continuation,
+            nested_originated_columns,
+        )
+    )
+    table_html_parts.append("</table>")
+    return "".join(table_html_parts)
 
-    table_html_parts.append(f'<table border="1" class="{classes}" id="{table_id}">\n')
 
-    # Render table head
-    table_html_parts.append("  <thead>\n")
-    table_html_parts.append("    <tr>\n")
-    for col in flattened_df.columns:
-        table_html_parts.append(
+def _render_table_header(dataframe: pd.DataFrame) -> str:
+    """Render the header of the HTML table."""
+    header_parts = ["  <thead>\n", "    <tr>\n"]
+    for col in dataframe.columns:
+        header_parts.append(
             f'      <th><div class="bf-header-content">'
             f"{html.escape(str(col))}</div></th>\n"
         )
-    table_html_parts.append("    </tr>\n")
-    table_html_parts.append("  </thead>\n")
+    header_parts.extend(["    </tr>\n", "  </thead>\n"])
+    return "".join(header_parts)
 
-    # Render table body
-    table_html_parts.append("  <tbody>\n")
-    for i in range(len(flattened_df)):
+
+def _render_table_body(
+    dataframe: pd.DataFrame,
+    array_row_groups: dict[str, list[int]],
+    clear_on_continuation: list[str],
+    nested_originated_columns: set[str],
+) -> str:
+    """Render the body of the HTML table."""
+    body_parts = ["  <tbody>\n"]
+    precision = options.display.precision
+
+    for i in range(len(dataframe)):
         row_class = ""
         orig_row_idx = None
         is_continuation = False
@@ -322,21 +357,20 @@ def render_html(
                 break
 
         if row_class:
-            table_html_parts.append(
+            body_parts.append(
                 f'    <tr class="{row_class}" data-orig-row="{orig_row_idx}">\n'
             )
         else:
-            table_html_parts.append("    <tr>\n")
+            body_parts.append("    <tr>\n")
 
-        row = flattened_df.iloc[i]
+        row = dataframe.iloc[i]
         for col_name, value in row.items():
             col_name_str = str(col_name)
             if is_continuation and col_name_str in clear_on_continuation:
-                table_html_parts.append("      <td></td>\n")
+                body_parts.append("      <td></td>\n")
                 continue
-            dtype = flattened_df.dtypes.loc[col_name]  # type: ignore
+            dtype = dataframe.dtypes.loc[col_name]  # type: ignore
 
-            # Check if column originated from an array
             if col_name_str in nested_originated_columns:
                 align = "left"
             else:
@@ -345,19 +379,17 @@ def render_html(
             cell_content = ""
             if pandas.api.types.is_scalar(value) and pd.isna(value):
                 cell_content = ""
-                align = "left"  # Force left alignment for empty cells (NA)
+                align = "left"
             elif isinstance(value, float):
                 cell_content = f"{value:.{precision}f}"
             else:
                 cell_content = str(value)
 
             align_class = f"cell-align-{align}"
-            table_html_parts.append(
+            body_parts.append(
                 f'      <td class="{align_class}">'
                 f"{html.escape(cell_content)}</td>\n"
             )
-        table_html_parts.append("    </tr>\n")
-    table_html_parts.append("  </tbody>\n")
-    table_html_parts.append("</table>")
-
-    return "".join(table_html_parts)
+        body_parts.append("    </tr>\n")
+    body_parts.append("  </tbody>\n")
+    return "".join(body_parts)