|
| 1 | +# Copyright 2025 Google LLC |
| 2 | +# |
| 3 | +# Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | +# you may not use this file except in compliance with the License. |
| 5 | +# You may obtain a copy of the License at |
| 6 | +# |
| 7 | +# http://www.apache.org/licenses/LICENSE-2.0 |
| 8 | +# |
| 9 | +# Unless required by applicable law or agreed to in writing, software |
| 10 | +# distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | +# See the License for the specific language governing permissions and |
| 13 | +# limitations under the License. |
| 14 | + |
| 15 | +from __future__ import annotations |
| 16 | + |
| 17 | +from importlib import resources |
| 18 | +import functools |
| 19 | +import math |
| 20 | +from typing import Any, Dict, Iterator, List, Optional, Type |
| 21 | +import uuid |
| 22 | + |
| 23 | +import pandas as pd |
| 24 | + |
| 25 | +import bigframes |
| 26 | + |
| 27 | +# anywidget and traitlets are optional dependencies. We don't want the import of this |
| 28 | +# module to fail if they aren't installed, though. Instead, we try to limit the surface that |
| 29 | +# these packages could affect. This makes unit testing easier and ensures we don't |
| 30 | +# accidentally make these required packages. |
| 31 | +try: |
| 32 | + import anywidget |
| 33 | + import traitlets |
| 34 | + |
| 35 | + ANYWIDGET_INSTALLED = True |
| 36 | +except Exception: |
| 37 | + ANYWIDGET_INSTALLED = False |
| 38 | + |
| 39 | +WIDGET_BASE: Type[Any] |
| 40 | +if ANYWIDGET_INSTALLED: |
| 41 | + WIDGET_BASE = anywidget.AnyWidget |
| 42 | +else: |
| 43 | + WIDGET_BASE = object |
| 44 | + |
| 45 | + |
| 46 | +class TableWidget(WIDGET_BASE): |
| 47 | + """ |
| 48 | + An interactive, paginated table widget for BigFrames DataFrames. |
| 49 | + """ |
| 50 | + |
| 51 | + def __init__(self, dataframe: bigframes.dataframe.DataFrame): |
| 52 | + """Initialize the TableWidget. |
| 53 | +
|
| 54 | + Args: |
| 55 | + dataframe: The Bigframes Dataframe to display in the widget. |
| 56 | + """ |
| 57 | + if not ANYWIDGET_INSTALLED: |
| 58 | + raise ImportError( |
| 59 | + "Please `pip install anywidget traitlets` or `pip install 'bigframes[anywidget]'` to use TableWidget." |
| 60 | + ) |
| 61 | + |
| 62 | + super().__init__() |
| 63 | + self._dataframe = dataframe |
| 64 | + |
| 65 | + # respect display options |
| 66 | + self.page_size = bigframes.options.display.max_rows |
| 67 | + |
| 68 | + # Initialize data fetching attributes. |
| 69 | + self._batches = dataframe.to_pandas_batches(page_size=self.page_size) |
| 70 | + |
| 71 | + # Use list of DataFrames to avoid memory copies from concatenation |
| 72 | + self._cached_batches: List[pd.DataFrame] = [] |
| 73 | + |
| 74 | + # Unique identifier for HTML table element |
| 75 | + self._table_id = str(uuid.uuid4()) |
| 76 | + self._all_data_loaded = False |
| 77 | + # Renamed from _batch_iterator to _batch_iter to avoid naming conflict |
| 78 | + self._batch_iter: Optional[Iterator[pd.DataFrame]] = None |
| 79 | + |
| 80 | + # len(dataframe) is expensive, since it will trigger a |
| 81 | + # SELECT COUNT(*) query. It is a must have however. |
| 82 | + # TODO(b/428238610): Start iterating over the result of `to_pandas_batches()` |
| 83 | + # before we get here so that the count might already be cached. |
| 84 | + self.row_count = len(dataframe) |
| 85 | + |
| 86 | + # get the initial page |
| 87 | + self._set_table_html() |
| 88 | + |
| 89 | + @functools.cached_property |
| 90 | + def _esm(self): |
| 91 | + """Load JavaScript code from external file.""" |
| 92 | + return resources.read_text(bigframes.display, "table_widget.js") |
| 93 | + |
| 94 | + page = traitlets.Int(0).tag(sync=True) |
| 95 | + page_size = traitlets.Int(25).tag(sync=True) |
| 96 | + row_count = traitlets.Int(0).tag(sync=True) |
| 97 | + table_html = traitlets.Unicode().tag(sync=True) |
| 98 | + |
| 99 | + @traitlets.validate("page") |
| 100 | + def _validate_page(self, proposal: Dict[str, Any]): |
| 101 | + """Validate and clamp the page number to a valid range. |
| 102 | +
|
| 103 | + Args: |
| 104 | + proposal: A dictionary from the traitlets library containing the |
| 105 | + proposed change. The new value is in proposal["value"]. |
| 106 | + """ |
| 107 | + |
| 108 | + value = proposal["value"] |
| 109 | + if self.row_count == 0 or self.page_size == 0: |
| 110 | + return 0 |
| 111 | + |
| 112 | + # Calculate the zero-indexed maximum page number. |
| 113 | + max_page = max(0, math.ceil(self.row_count / self.page_size) - 1) |
| 114 | + |
| 115 | + # Clamp the proposed value to the valid range [0, max_page]. |
| 116 | + return max(0, min(value, max_page)) |
| 117 | + |
| 118 | + def _get_next_batch(self) -> bool: |
| 119 | + """ |
| 120 | + Gets the next batch of data from the generator and appends to cache. |
| 121 | +
|
| 122 | + Return: |
| 123 | + True if a batch was successfully loaded, False otherwise. |
| 124 | + """ |
| 125 | + if self._all_data_loaded: |
| 126 | + return False |
| 127 | + |
| 128 | + try: |
| 129 | + iterator = self._batch_iterator |
| 130 | + batch = next(iterator) |
| 131 | + self._cached_batches.append(batch) |
| 132 | + return True |
| 133 | + except StopIteration: |
| 134 | + self._all_data_loaded = True |
| 135 | + return False |
| 136 | + |
| 137 | + @property |
| 138 | + def _batch_iterator(self) -> Iterator[pd.DataFrame]: |
| 139 | + """Lazily initializes and returns the batch iterator.""" |
| 140 | + if self._batch_iter is None: |
| 141 | + self._batch_iter = iter(self._batches) |
| 142 | + return self._batch_iter |
| 143 | + |
| 144 | + @property |
| 145 | + def _cached_data(self) -> pd.DataFrame: |
| 146 | + """Combine all cached batches into a single DataFrame.""" |
| 147 | + if not self._cached_batches: |
| 148 | + return pd.DataFrame(columns=self._dataframe.columns) |
| 149 | + return pd.concat(self._cached_batches, ignore_index=True) |
| 150 | + |
| 151 | + def _set_table_html(self): |
| 152 | + """Sets the current html data based on the current page and page size.""" |
| 153 | + start = self.page * self.page_size |
| 154 | + end = start + self.page_size |
| 155 | + |
| 156 | + # fetch more data if the requested page is outside our cache |
| 157 | + cached_data = self._cached_data |
| 158 | + while len(cached_data) < end and not self._all_data_loaded: |
| 159 | + if self._get_next_batch(): |
| 160 | + cached_data = self._cached_data |
| 161 | + else: |
| 162 | + break |
| 163 | + |
| 164 | + # Get the data for the current page |
| 165 | + page_data = cached_data.iloc[start:end] |
| 166 | + |
| 167 | + # Generate HTML table |
| 168 | + self.table_html = page_data.to_html( |
| 169 | + index=False, |
| 170 | + max_rows=None, |
| 171 | + table_id=f"table-{self._table_id}", |
| 172 | + classes="table table-striped table-hover", |
| 173 | + escape=False, |
| 174 | + ) |
| 175 | + |
| 176 | + @traitlets.observe("page") |
| 177 | + def _page_changed(self, change): |
| 178 | + """Handler for when the page number is changed from the frontend.""" |
| 179 | + self._set_table_html() |
0 commit comments