Skip to content

Commit 0bac31b

Browse files
committed
style: change font color
1 parent d978c97 commit 0bac31b

File tree

7 files changed

+1025
-146
lines changed

7 files changed

+1025
-146
lines changed

bigframes/display/anywidget.py

Lines changed: 72 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -39,15 +39,15 @@
3939
import anywidget
4040
import traitlets
4141

42-
ANYWIDGET_INSTALLED = True
42+
_ANYWIDGET_INSTALLED = True
4343
except Exception:
44-
ANYWIDGET_INSTALLED = False
44+
_ANYWIDGET_INSTALLED = False
4545

46-
WIDGET_BASE: type[Any]
47-
if ANYWIDGET_INSTALLED:
48-
WIDGET_BASE = anywidget.AnyWidget
46+
_WIDGET_BASE: type[Any]
47+
if _ANYWIDGET_INSTALLED:
48+
_WIDGET_BASE = anywidget.AnyWidget
4949
else:
50-
WIDGET_BASE = object
50+
_WIDGET_BASE = object
5151

5252

5353
@dataclasses.dataclass(frozen=True)
@@ -56,7 +56,7 @@ class _SortState:
5656
ascending: bool
5757

5858

59-
class TableWidget(WIDGET_BASE):
59+
class TableWidget(_WIDGET_BASE):
6060
"""An interactive, paginated table widget for BigFrames DataFrames.
6161
6262
This widget provides a user-friendly way to display and navigate through
@@ -82,7 +82,7 @@ def __init__(self, dataframe: bigframes.dataframe.DataFrame):
8282
Args:
8383
dataframe: The Bigframes Dataframe to display in the widget.
8484
"""
85-
if not ANYWIDGET_INSTALLED:
85+
if not _ANYWIDGET_INSTALLED:
8686
raise ImportError(
8787
"Please `pip install anywidget traitlets` or "
8888
"`pip install 'bigframes[anywidget]'` to use TableWidget."
@@ -105,6 +105,7 @@ def __init__(self, dataframe: bigframes.dataframe.DataFrame):
105105
# set traitlets properties that trigger observers
106106
# TODO(b/462525985): Investigate and improve TableWidget UX for DataFrames with a large number of columns.
107107
self.page_size = initial_page_size
108+
# TODO(b/469861913): Nested columns from structs (e.g., 'struct_col.name') are not currently sortable.
108109
# TODO(b/463754889): Support non-string column labels for sorting.
109110
if all(isinstance(col, str) for col in dataframe.columns):
110111
self.orderable_columns = [
@@ -115,6 +116,14 @@ def __init__(self, dataframe: bigframes.dataframe.DataFrame):
115116
else:
116117
self.orderable_columns = []
117118

119+
self._initial_load()
120+
121+
# Signals to the frontend that the initial data load is complete.
122+
# Also used as a guard to prevent observers from firing during initialization.
123+
self._initial_load_complete = True
124+
125+
def _initial_load(self):
126+
"""Get initial data and row count."""
118127
# obtain the row counts
119128
# TODO(b/428238610): Start iterating over the result of `to_pandas_batches()`
120129
# before we get here so that the count might already be cached.
@@ -138,10 +147,6 @@ def __init__(self, dataframe: bigframes.dataframe.DataFrame):
138147
# get the initial page
139148
self._set_table_html()
140149

141-
# Signals to the frontend that the initial data load is complete.
142-
# Also used as a guard to prevent observers from firing during initialization.
143-
self._initial_load_complete = True
144-
145150
@traitlets.observe("_initial_load_complete")
146151
def _on_initial_load_complete(self, change: dict[str, Any]):
147152
if change["new"]:
@@ -294,53 +299,8 @@ def _set_table_html(self) -> None:
294299
)
295300
self.page = 0 # Reset to first page
296301

297-
page_data = pd.DataFrame()
298-
# This loop is to handle auto-correction of page number when row count is unknown
299-
while True:
300-
start = self.page * self.page_size
301-
end = start + self.page_size
302-
303-
# fetch more data if the requested page is outside our cache
304-
cached_data = self._cached_data
305-
while len(cached_data) < end and not self._all_data_loaded:
306-
if self._get_next_batch():
307-
cached_data = self._cached_data
308-
else:
309-
break
310-
311-
# Get the data for the current page
312-
page_data = cached_data.iloc[start:end].copy()
313-
314-
# Handle case where user navigated beyond available data with unknown row count
315-
is_unknown_count = self.row_count is None
316-
is_beyond_data = (
317-
self._all_data_loaded and len(page_data) == 0 and self.page > 0
318-
)
319-
if is_unknown_count and is_beyond_data:
320-
# Calculate the last valid page (zero-indexed)
321-
total_rows = len(cached_data)
322-
last_valid_page = max(0, math.ceil(total_rows / self.page_size) - 1)
323-
# Navigate back to the last valid page
324-
self.page = last_valid_page
325-
# Continue the loop to re-calculate page data
326-
continue
327-
328-
# If page is valid, break out of the loop.
329-
break
330-
331-
# Handle index display
332-
if self._dataframe._block.has_index:
333-
is_unnamed_single_index = (
334-
page_data.index.name is None
335-
and not isinstance(page_data.index, pd.MultiIndex)
336-
)
337-
page_data = page_data.reset_index()
338-
if is_unnamed_single_index and "index" in page_data.columns:
339-
page_data.rename(columns={"index": ""}, inplace=True)
340-
341-
# Default index - include as "Row" column if no index was present originally
342-
if not self._dataframe._block.has_index:
343-
page_data.insert(0, "Row", range(start + 1, start + len(page_data) + 1))
302+
page_data = self._get_page_data()
303+
page_data = self._prepare_dataframe_for_display(page_data)
344304

345305
# Generate HTML table
346306
self.table_html = bigframes.display.html.render_html(
@@ -350,6 +310,59 @@ def _set_table_html(self) -> None:
350310
finally:
351311
delattr(self, "_setting_html")
352312

313+
def _get_page_data(self) -> pd.DataFrame:
314+
"""Get the data for the current page, handling unknown row count."""
315+
# This loop is to handle auto-correction of page number when row count is unknown
316+
while True:
317+
start = self.page * self.page_size
318+
end = start + self.page_size
319+
320+
# fetch more data if the requested page is outside our cache
321+
cached_data = self._cached_data
322+
while len(cached_data) < end and not self._all_data_loaded:
323+
if self._get_next_batch():
324+
cached_data = self._cached_data
325+
else:
326+
break
327+
328+
# Get the data for the current page
329+
page_data = cached_data.iloc[start:end].copy()
330+
331+
# Handle case where user navigated beyond available data with unknown row count
332+
is_unknown_count = self.row_count is None
333+
is_beyond_data = (
334+
self._all_data_loaded and len(page_data) == 0 and self.page > 0
335+
)
336+
if is_unknown_count and is_beyond_data:
337+
# Calculate the last valid page (zero-indexed)
338+
total_rows = len(cached_data)
339+
last_valid_page = max(0, math.ceil(total_rows / self.page_size) - 1)
340+
# Navigate back to the last valid page
341+
self.page = last_valid_page
342+
# Continue the loop to re-calculate page data
343+
continue
344+
345+
# If page is valid, break out of the loop.
346+
return page_data
347+
348+
def _prepare_dataframe_for_display(self, page_data: pd.DataFrame) -> pd.DataFrame:
349+
"""Prepare the DataFrame for display, handling index and row numbers."""
350+
start = self.page * self.page_size
351+
# Handle index display
352+
if self._dataframe._block.has_index:
353+
is_unnamed_single_index = page_data.index.name is None and not isinstance(
354+
page_data.index, pd.MultiIndex
355+
)
356+
page_data = page_data.reset_index()
357+
if is_unnamed_single_index and "index" in page_data.columns:
358+
page_data.rename(columns={"index": ""}, inplace=True)
359+
360+
# Default index - include as "Row" column if no index was present originally
361+
if not self._dataframe._block.has_index:
362+
page_data.insert(0, "Row", range(start + 1, start + len(page_data) + 1))
363+
364+
return page_data
365+
353366
@traitlets.observe("sort_column", "sort_ascending")
354367
def _sort_changed(self, _change: dict[str, Any]):
355368
"""Handler for when sorting parameters change from the frontend."""

bigframes/display/html.py

Lines changed: 84 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -56,18 +56,53 @@ def _flatten_nested_data(
5656
return dataframe.copy(), {}, [], set()
5757

5858
result_df = _try_parse_json_strings(dataframe)
59-
initial_columns = list(result_df.columns)
6059

61-
array_row_groups: dict[str, list[int]] = {}
62-
nested_originated_columns: set[str] = set()
60+
(
61+
struct_columns,
62+
array_columns,
63+
array_of_struct_columns,
64+
clear_on_continuation_cols,
65+
nested_originated_columns,
66+
) = _classify_columns(result_df)
67+
68+
result_df, array_columns = _flatten_array_of_struct_columns(
69+
result_df, array_of_struct_columns, array_columns, nested_originated_columns
70+
)
6371

64-
# First, identify all STRUCT and ARRAY columns
72+
result_df, clear_on_continuation_cols = _flatten_struct_columns(
73+
result_df, struct_columns, clear_on_continuation_cols, nested_originated_columns
74+
)
75+
76+
# Now handle ARRAY columns (including the newly created ones from ARRAY of STRUCT)
77+
if not array_columns:
78+
return (
79+
result_df,
80+
{},
81+
clear_on_continuation_cols,
82+
nested_originated_columns,
83+
)
84+
85+
result_df, array_row_groups = _explode_array_columns(result_df, array_columns)
86+
return (
87+
result_df,
88+
array_row_groups,
89+
clear_on_continuation_cols,
90+
nested_originated_columns,
91+
)
92+
93+
94+
def _classify_columns(
95+
dataframe: pd.DataFrame,
96+
) -> tuple[list[str], list[str], list[str], list[str], set[str]]:
97+
"""Identify all STRUCT and ARRAY columns."""
98+
initial_columns = list(dataframe.columns)
6599
struct_columns: list[str] = []
66100
array_columns: list[str] = []
67101
array_of_struct_columns: list[str] = []
68102
clear_on_continuation_cols: list[str] = []
103+
nested_originated_columns: set[str] = set()
69104

70-
for col_name_raw, col_data in result_df.items():
105+
for col_name_raw, col_data in dataframe.items():
71106
col_name = str(col_name_raw)
72107
dtype = col_data.dtype
73108
if isinstance(dtype, pd.ArrowDtype):
@@ -86,28 +121,10 @@ def _flatten_nested_data(
86121
clear_on_continuation_cols.append(col_name)
87122
elif col_name in initial_columns:
88123
clear_on_continuation_cols.append(col_name)
89-
90-
result_df, array_columns = _flatten_array_of_struct_columns(
91-
result_df, array_of_struct_columns, array_columns, nested_originated_columns
92-
)
93-
94-
result_df, clear_on_continuation_cols = _flatten_struct_columns(
95-
result_df, struct_columns, clear_on_continuation_cols, nested_originated_columns
96-
)
97-
98-
# Now handle ARRAY columns (including the newly created ones from ARRAY of STRUCT)
99-
if not array_columns:
100-
return (
101-
result_df,
102-
array_row_groups,
103-
clear_on_continuation_cols,
104-
nested_originated_columns,
105-
)
106-
107-
result_df, array_row_groups = _explode_array_columns(result_df, array_columns)
108124
return (
109-
result_df,
110-
array_row_groups,
125+
struct_columns,
126+
array_columns,
127+
array_of_struct_columns,
111128
clear_on_continuation_cols,
112129
nested_originated_columns,
113130
)
@@ -292,25 +309,43 @@ def render_html(
292309
) = _flatten_nested_data(dataframe)
293310

294311
classes = "dataframe table table-striped table-hover"
295-
table_html_parts = []
296-
precision = options.display.precision
312+
table_html_parts = [f'<table border="1" class="{classes}" id="{table_id}">\n']
313+
table_html_parts.append(_render_table_header(flattened_df))
314+
table_html_parts.append(
315+
_render_table_body(
316+
flattened_df,
317+
array_row_groups,
318+
clear_on_continuation,
319+
nested_originated_columns,
320+
)
321+
)
322+
table_html_parts.append("</table>")
323+
return "".join(table_html_parts)
297324

298-
table_html_parts.append(f'<table border="1" class="{classes}" id="{table_id}">\n')
299325

300-
# Render table head
301-
table_html_parts.append(" <thead>\n")
302-
table_html_parts.append(" <tr>\n")
303-
for col in flattened_df.columns:
304-
table_html_parts.append(
326+
def _render_table_header(dataframe: pd.DataFrame) -> str:
327+
"""Render the header of the HTML table."""
328+
header_parts = [" <thead>\n", " <tr>\n"]
329+
for col in dataframe.columns:
330+
header_parts.append(
305331
f' <th><div class="bf-header-content">'
306332
f"{html.escape(str(col))}</div></th>\n"
307333
)
308-
table_html_parts.append(" </tr>\n")
309-
table_html_parts.append(" </thead>\n")
334+
header_parts.extend([" </tr>\n", " </thead>\n"])
335+
return "".join(header_parts)
310336

311-
# Render table body
312-
table_html_parts.append(" <tbody>\n")
313-
for i in range(len(flattened_df)):
337+
338+
def _render_table_body(
339+
dataframe: pd.DataFrame,
340+
array_row_groups: dict[str, list[int]],
341+
clear_on_continuation: list[str],
342+
nested_originated_columns: set[str],
343+
) -> str:
344+
"""Render the body of the HTML table."""
345+
body_parts = [" <tbody>\n"]
346+
precision = options.display.precision
347+
348+
for i in range(len(dataframe)):
314349
row_class = ""
315350
orig_row_idx = None
316351
is_continuation = False
@@ -322,21 +357,20 @@ def render_html(
322357
break
323358

324359
if row_class:
325-
table_html_parts.append(
360+
body_parts.append(
326361
f' <tr class="{row_class}" data-orig-row="{orig_row_idx}">\n'
327362
)
328363
else:
329-
table_html_parts.append(" <tr>\n")
364+
body_parts.append(" <tr>\n")
330365

331-
row = flattened_df.iloc[i]
366+
row = dataframe.iloc[i]
332367
for col_name, value in row.items():
333368
col_name_str = str(col_name)
334369
if is_continuation and col_name_str in clear_on_continuation:
335-
table_html_parts.append(" <td></td>\n")
370+
body_parts.append(" <td></td>\n")
336371
continue
337-
dtype = flattened_df.dtypes.loc[col_name] # type: ignore
372+
dtype = dataframe.dtypes.loc[col_name] # type: ignore
338373

339-
# Check if column originated from an array
340374
if col_name_str in nested_originated_columns:
341375
align = "left"
342376
else:
@@ -345,19 +379,17 @@ def render_html(
345379
cell_content = ""
346380
if pandas.api.types.is_scalar(value) and pd.isna(value):
347381
cell_content = ""
348-
align = "left" # Force left alignment for empty cells (NA)
382+
align = "left"
349383
elif isinstance(value, float):
350384
cell_content = f"{value:.{precision}f}"
351385
else:
352386
cell_content = str(value)
353387

354388
align_class = f"cell-align-{align}"
355-
table_html_parts.append(
389+
body_parts.append(
356390
f' <td class="{align_class}">'
357391
f"{html.escape(cell_content)}</td>\n"
358392
)
359-
table_html_parts.append(" </tr>\n")
360-
table_html_parts.append(" </tbody>\n")
361-
table_html_parts.append("</table>")
362-
363-
return "".join(table_html_parts)
393+
body_parts.append(" </tr>\n")
394+
body_parts.append(" </tbody>\n")
395+
return "".join(body_parts)

0 commit comments

Comments
 (0)