Skip to content

Commit 8eca767

Browse files
feat: Add pagination buttons (prev/next) to anywidget mode for DataFrames (#1841)
* code update * update init file * add more testcases and demo notebook * add testcase for anywidget buttons * move js code to a separate file * fix owlbot.py * remove extra line * modify testcase * add cleanup session * use traceback * no need of fall back method * use test-specific small data instead * testcase update * handle opitonal import * polish python function * remove duplicate warning * finish touch up * use WIDGET_BASE * code change, add more tests * change function name * remove widget reuse * remove weakref import * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --------- Co-authored-by: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com>
1 parent 05cb7d0 commit 8eca767

File tree

13 files changed

+875
-21
lines changed

13 files changed

+875
-21
lines changed

.pre-commit-config.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,3 +42,8 @@ repos:
4242
additional_dependencies: [types-requests, types-tabulate, types-PyYAML, pandas-stubs<=2.2.3.241126]
4343
exclude: "^third_party"
4444
args: ["--check-untyped-defs", "--explicit-package-bases", "--ignore-missing-imports"]
45+
- repo: https://github.com/biomejs/pre-commit
46+
rev: v2.0.2
47+
hooks:
48+
- id: biome-check
49+
files: '\.js$'

MANIFEST.in

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
# Generated by synthtool. DO NOT EDIT!
1818
include README.rst LICENSE
1919
recursive-include third_party/bigframes_vendored *
20-
recursive-include bigframes *.json *.proto py.typed
20+
recursive-include bigframes *.json *.proto *.js py.typed
2121
recursive-include tests *
2222
global-exclude *.py[co]
2323
global-exclude __pycache__

bigframes/dataframe.py

Lines changed: 25 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -779,22 +779,7 @@ def _repr_html_(self) -> str:
779779
if opts.repr_mode == "deferred":
780780
return formatter.repr_query_job(self._compute_dry_run())
781781

782-
if opts.repr_mode == "anywidget":
783-
import anywidget # type: ignore
784-
785-
# create an iterator for the data batches
786-
batches = self.to_pandas_batches()
787-
788-
# get the first page result
789-
try:
790-
first_page = next(iter(batches))
791-
except StopIteration:
792-
first_page = pandas.DataFrame(columns=self.columns)
793-
794-
# Instantiate and return the widget. The widget's frontend will
795-
# handle the display of the table and pagination
796-
return anywidget.AnyWidget(dataframe=first_page)
797-
782+
# Process blob columns first, regardless of display mode
798783
self._cached()
799784
df = self.copy()
800785
if bigframes.options.display.blob_display:
@@ -806,7 +791,31 @@ def _repr_html_(self) -> str:
806791
for col in blob_cols:
807792
# TODO(garrettwu): Not necessary to get access urls for all the rows. Update when having a to get URLs from local data.
808793
df[col] = df[col].blob._get_runtime(mode="R", with_metadata=True)
794+
else:
795+
blob_cols = []
796+
797+
if opts.repr_mode == "anywidget":
798+
try:
799+
from IPython.display import display as ipython_display
800+
801+
from bigframes import display
802+
803+
# Always create a new widget instance for each display call
804+
# This ensures that each cell gets its own widget and prevents
805+
# unintended sharing between cells
806+
widget = display.TableWidget(df.copy())
809807

808+
ipython_display(widget)
809+
return "" # Return empty string since we used display()
810+
811+
except (AttributeError, ValueError, ImportError):
812+
# Fallback if anywidget is not available
813+
warnings.warn(
814+
"Anywidget mode is not available. Please `pip install anywidget traitlets` or `pip install 'bigframes[anywidget]'` to use interactive tables. Falling back to deferred mode."
815+
)
816+
return formatter.repr_query_job(self._compute_dry_run())
817+
818+
# Continue with regular HTML rendering for non-anywidget modes
810819
# TODO(swast): pass max_columns and get the true column count back. Maybe
811820
# get 1 more column than we have requested so that pandas can add the
812821
# ... for us?
@@ -815,7 +824,6 @@ def _repr_html_(self) -> str:
815824
)
816825

817826
self._set_internal_query_job(query_job)
818-
819827
column_count = len(pandas_df.columns)
820828

821829
with display_options.pandas_repr(opts):

bigframes/display/__init__.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from __future__ import annotations
16+
17+
try:
18+
import anywidget # noqa
19+
20+
from bigframes.display.anywidget import TableWidget
21+
22+
__all__ = ["TableWidget"]
23+
except Exception:
24+
pass

bigframes/display/anywidget.py

Lines changed: 179 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,179 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from __future__ import annotations
16+
17+
from importlib import resources
18+
import functools
19+
import math
20+
from typing import Any, Dict, Iterator, List, Optional, Type
21+
import uuid
22+
23+
import pandas as pd
24+
25+
import bigframes
26+
27+
# anywidget and traitlets are optional dependencies. We don't want the import of this
28+
# module to fail if they aren't installed, though. Instead, we try to limit the surface that
29+
# these packages could affect. This makes unit testing easier and ensures we don't
30+
# accidentally make these required packages.
31+
try:
32+
import anywidget
33+
import traitlets
34+
35+
ANYWIDGET_INSTALLED = True
36+
except Exception:
37+
ANYWIDGET_INSTALLED = False
38+
39+
WIDGET_BASE: Type[Any]
40+
if ANYWIDGET_INSTALLED:
41+
WIDGET_BASE = anywidget.AnyWidget
42+
else:
43+
WIDGET_BASE = object
44+
45+
46+
class TableWidget(WIDGET_BASE):
47+
"""
48+
An interactive, paginated table widget for BigFrames DataFrames.
49+
"""
50+
51+
def __init__(self, dataframe: bigframes.dataframe.DataFrame):
52+
"""Initialize the TableWidget.
53+
54+
Args:
55+
dataframe: The Bigframes Dataframe to display in the widget.
56+
"""
57+
if not ANYWIDGET_INSTALLED:
58+
raise ImportError(
59+
"Please `pip install anywidget traitlets` or `pip install 'bigframes[anywidget]'` to use TableWidget."
60+
)
61+
62+
super().__init__()
63+
self._dataframe = dataframe
64+
65+
# respect display options
66+
self.page_size = bigframes.options.display.max_rows
67+
68+
# Initialize data fetching attributes.
69+
self._batches = dataframe.to_pandas_batches(page_size=self.page_size)
70+
71+
# Use list of DataFrames to avoid memory copies from concatenation
72+
self._cached_batches: List[pd.DataFrame] = []
73+
74+
# Unique identifier for HTML table element
75+
self._table_id = str(uuid.uuid4())
76+
self._all_data_loaded = False
77+
# Renamed from _batch_iterator to _batch_iter to avoid naming conflict
78+
self._batch_iter: Optional[Iterator[pd.DataFrame]] = None
79+
80+
# len(dataframe) is expensive, since it will trigger a
81+
# SELECT COUNT(*) query. It is a must have however.
82+
# TODO(b/428238610): Start iterating over the result of `to_pandas_batches()`
83+
# before we get here so that the count might already be cached.
84+
self.row_count = len(dataframe)
85+
86+
# get the initial page
87+
self._set_table_html()
88+
89+
@functools.cached_property
90+
def _esm(self):
91+
"""Load JavaScript code from external file."""
92+
return resources.read_text(bigframes.display, "table_widget.js")
93+
94+
page = traitlets.Int(0).tag(sync=True)
95+
page_size = traitlets.Int(25).tag(sync=True)
96+
row_count = traitlets.Int(0).tag(sync=True)
97+
table_html = traitlets.Unicode().tag(sync=True)
98+
99+
@traitlets.validate("page")
100+
def _validate_page(self, proposal: Dict[str, Any]):
101+
"""Validate and clamp the page number to a valid range.
102+
103+
Args:
104+
proposal: A dictionary from the traitlets library containing the
105+
proposed change. The new value is in proposal["value"].
106+
"""
107+
108+
value = proposal["value"]
109+
if self.row_count == 0 or self.page_size == 0:
110+
return 0
111+
112+
# Calculate the zero-indexed maximum page number.
113+
max_page = max(0, math.ceil(self.row_count / self.page_size) - 1)
114+
115+
# Clamp the proposed value to the valid range [0, max_page].
116+
return max(0, min(value, max_page))
117+
118+
def _get_next_batch(self) -> bool:
119+
"""
120+
Gets the next batch of data from the generator and appends to cache.
121+
122+
Return:
123+
True if a batch was successfully loaded, False otherwise.
124+
"""
125+
if self._all_data_loaded:
126+
return False
127+
128+
try:
129+
iterator = self._batch_iterator
130+
batch = next(iterator)
131+
self._cached_batches.append(batch)
132+
return True
133+
except StopIteration:
134+
self._all_data_loaded = True
135+
return False
136+
137+
@property
138+
def _batch_iterator(self) -> Iterator[pd.DataFrame]:
139+
"""Lazily initializes and returns the batch iterator."""
140+
if self._batch_iter is None:
141+
self._batch_iter = iter(self._batches)
142+
return self._batch_iter
143+
144+
@property
145+
def _cached_data(self) -> pd.DataFrame:
146+
"""Combine all cached batches into a single DataFrame."""
147+
if not self._cached_batches:
148+
return pd.DataFrame(columns=self._dataframe.columns)
149+
return pd.concat(self._cached_batches, ignore_index=True)
150+
151+
def _set_table_html(self):
152+
"""Sets the current html data based on the current page and page size."""
153+
start = self.page * self.page_size
154+
end = start + self.page_size
155+
156+
# fetch more data if the requested page is outside our cache
157+
cached_data = self._cached_data
158+
while len(cached_data) < end and not self._all_data_loaded:
159+
if self._get_next_batch():
160+
cached_data = self._cached_data
161+
else:
162+
break
163+
164+
# Get the data for the current page
165+
page_data = cached_data.iloc[start:end]
166+
167+
# Generate HTML table
168+
self.table_html = page_data.to_html(
169+
index=False,
170+
max_rows=None,
171+
table_id=f"table-{self._table_id}",
172+
classes="table table-striped table-hover",
173+
escape=False,
174+
)
175+
176+
@traitlets.observe("page")
177+
def _page_changed(self, change):
178+
"""Handler for when the page number is changed from the frontend."""
179+
self._set_table_html()

bigframes/display/table_widget.js

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
/**
2+
* Copyright 2025 Google LLC
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
const ModelProperty = {
18+
TABLE_HTML: "table_html",
19+
ROW_COUNT: "row_count",
20+
PAGE_SIZE: "page_size",
21+
PAGE: "page",
22+
};
23+
24+
const Event = {
25+
CHANGE_TABLE_HTML: `change:${ModelProperty.TABLE_HTML}`,
26+
CLICK: "click",
27+
};
28+
29+
/**
30+
* Renders a paginated table and its controls into a given element.
31+
* @param {{
32+
* model: !Backbone.Model,
33+
* el: !HTMLElement
34+
* }} options
35+
*/
36+
function render({ model, el }) {
37+
const container = document.createElement("div");
38+
container.innerHTML = model.get(ModelProperty.TABLE_HTML);
39+
40+
const buttonContainer = document.createElement("div");
41+
const prevPage = document.createElement("button");
42+
const label = document.createElement("span");
43+
const nextPage = document.createElement("button");
44+
45+
prevPage.type = "button";
46+
nextPage.type = "button";
47+
prevPage.textContent = "Prev";
48+
nextPage.textContent = "Next";
49+
50+
/** Updates the button states and page label based on the model. */
51+
function updateButtonStates() {
52+
const totalPages = Math.ceil(
53+
model.get(ModelProperty.ROW_COUNT) / model.get(ModelProperty.PAGE_SIZE),
54+
);
55+
const currentPage = model.get(ModelProperty.PAGE);
56+
57+
label.textContent = `Page ${currentPage + 1} of ${totalPages}`;
58+
prevPage.disabled = currentPage === 0;
59+
nextPage.disabled = currentPage >= totalPages - 1;
60+
}
61+
62+
/**
63+
* Updates the page in the model.
64+
* @param {number} direction -1 for previous, 1 for next.
65+
*/
66+
function handlePageChange(direction) {
67+
const currentPage = model.get(ModelProperty.PAGE);
68+
const newPage = Math.max(0, currentPage + direction);
69+
if (newPage !== currentPage) {
70+
model.set(ModelProperty.PAGE, newPage);
71+
model.save_changes();
72+
}
73+
}
74+
75+
prevPage.addEventListener(Event.CLICK, () => handlePageChange(-1));
76+
nextPage.addEventListener(Event.CLICK, () => handlePageChange(1));
77+
78+
model.on(Event.CHANGE_TABLE_HTML, () => {
79+
// Note: Using innerHTML can be a security risk if the content is
80+
// user-generated. Ensure 'table_html' is properly sanitized.
81+
container.innerHTML = model.get(ModelProperty.TABLE_HTML);
82+
updateButtonStates();
83+
});
84+
85+
// Initial setup
86+
updateButtonStates();
87+
88+
buttonContainer.appendChild(prevPage);
89+
buttonContainer.appendChild(label);
90+
buttonContainer.appendChild(nextPage);
91+
el.appendChild(container);
92+
el.appendChild(buttonContainer);
93+
}
94+
95+
export default { render };

0 commit comments

Comments
 (0)