Skip to content
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
218b7c2
feat: Enhance Series and DataFrame display with anywidget
shuoweil Dec 19, 2025
9e3163f
test: add more npm tests
shuoweil Dec 19, 2025
3227b23
test: add this file for faster and reliable npm tests
shuoweil Dec 19, 2025
c1a8f83
docs: notebook update
shuoweil Dec 19, 2025
c39293a
test: update old testcase due to new feature implementation
shuoweil Dec 19, 2025
9dff0f4
Revert "test: update old testcase due to new feature implementation"
shuoweil Dec 19, 2025
f67e30f
feat: only display row count when series is large than the number can…
shuoweil Dec 19, 2025
81d1dbe
refactor: Handle special float values and None consistently in sqlglo…
chelsea-lin Dec 19, 2025
057f54d
Merge branch 'main' into shuowei-anywidget-series-display
shuoweil Dec 19, 2025
f70d5c1
Merge branch 'main' into shuowei-anywidget-series-display
shuoweil Dec 19, 2025
58e357a
Merge branch 'main' into shuowei-anywidget-series-display
shuoweil Dec 19, 2025
fd04e6a
refactor: code refactor
shuoweil Dec 19, 2025
4825aeb
fix: fix mypy
shuoweil Dec 19, 2025
8845464
Merge branch 'main' into shuowei-anywidget-series-display
shuoweil Dec 22, 2025
d36fc0f
refactor: move code to plaintext file and add checks
shuoweil Dec 22, 2025
593f9ae
refactor: move code to plaintext file and add checks
shuoweil Dec 22, 2025
400ea07
Revert "refactor: move code to plaintext file and add checks"
shuoweil Dec 22, 2025
945616c
Merge branch 'main' into shuowei-anywidget-series-display
shuoweil Dec 23, 2025
a474606
refactor: move create_text_representation to plaintext.py
shuoweil Dec 23, 2025
bd56992
refactor: move display logic to display/plaintext.py and display/html.py
shuoweil Dec 23, 2025
971ee33
refactor: restore original order of max_results in __repr__
shuoweil Dec 23, 2025
1a73628
docs: add todo back
shuoweil Dec 23, 2025
1b7952b
refactor: split repr_mimebundle logic, handle deferred mode in html, …
shuoweil Dec 23, 2025
15b2ac6
refactor: rename repr_mimebundle helpers and improve fallback comments
shuoweil Dec 23, 2025
f8914c8
style: fix repr_mimebundle docstring formatting
shuoweil Dec 23, 2025
9fea10e
docs: update anywidget demo notebook with series display showcase
shuoweil Dec 23, 2025
a20a5ee
docs: update notebook
shuoweil Dec 23, 2025
9e92c2a
Merge branch 'main' into shuowei-anywidget-series-display
shuoweil Dec 23, 2025
c0f4b4e
refactor: decouple plaintext representation from core objects
shuoweil Dec 23, 2025
38899b7
refactor: consolidate object metadata extraction for display
shuoweil Dec 23, 2025
64230d1
fix: refactor html display to address review comments
shuoweil Dec 23, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
141 changes: 5 additions & 136 deletions bigframes/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
import re
import sys
import textwrap
import traceback
import typing
from typing import (
Any,
Expand Down Expand Up @@ -788,44 +787,18 @@ def __repr__(self) -> str:
return object.__repr__(self)

opts = bigframes.options.display
max_results = opts.max_rows
if opts.repr_mode == "deferred":
return formatter.repr_query_job(self._compute_dry_run())

# TODO(swast): pass max_columns and get the true column count back. Maybe
# get 1 more column than we have requested so that pandas can add the
# ... for us?
max_results = opts.max_rows
pandas_df, row_count, query_job = self._block.retrieve_repr_request_results(
max_results
)

self._set_internal_query_job(query_job)
from bigframes.display import html

column_count = len(pandas_df.columns)

with display_options.pandas_repr(opts):
import pandas.io.formats

# safe to mutate this, this dict is owned by this code, and does not affect global config
to_string_kwargs = (
pandas.io.formats.format.get_dataframe_repr_params() # type: ignore
)
if not self._has_index:
to_string_kwargs.update({"index": False})
repr_string = pandas_df.to_string(**to_string_kwargs)

# Modify the end of the string to reflect count.
lines = repr_string.split("\n")
pattern = re.compile("\\[[0-9]+ rows x [0-9]+ columns\\]")
if pattern.match(lines[-1]):
lines = lines[:-2]

if row_count > len(lines) - 1:
lines.append("...")

lines.append("")
lines.append(f"[{row_count} rows x {column_count} columns]")
return "\n".join(lines)
return html.create_text_representation(self, pandas_df, row_count)

def _get_display_df_and_blob_cols(self) -> tuple[DataFrame, list[str]]:
"""Process blob columns for display."""
Expand All @@ -844,118 +817,14 @@ def _get_display_df_and_blob_cols(self) -> tuple[DataFrame, list[str]]:
df[col] = df[col].blob._get_runtime(mode="R", with_metadata=True)
return df, blob_cols

def _get_anywidget_bundle(
self, include=None, exclude=None
) -> tuple[dict[str, Any], dict[str, Any]]:
"""
Helper method to create and return the anywidget mimebundle.
This function encapsulates the logic for anywidget display.
"""
from bigframes import display

df, blob_cols = self._get_display_df_and_blob_cols()

# Create and display the widget
widget = display.TableWidget(df)
widget_repr_result = widget._repr_mimebundle_(include=include, exclude=exclude)

# Handle both tuple (data, metadata) and dict returns
if isinstance(widget_repr_result, tuple):
widget_repr, widget_metadata = widget_repr_result
else:
widget_repr = widget_repr_result
widget_metadata = {}

widget_repr = dict(widget_repr)

# At this point, we have already executed the query as part of the
# widget construction. Let's use the information available to render
# the HTML and plain text versions.
widget_repr["text/html"] = self._create_html_representation(
widget._cached_data,
widget.row_count,
len(self.columns),
blob_cols,
)

widget_repr["text/plain"] = self._create_text_representation(
widget._cached_data, widget.row_count
)

return widget_repr, widget_metadata

def _create_text_representation(
self, pandas_df: pandas.DataFrame, total_rows: typing.Optional[int]
) -> str:
"""Create a text representation of the DataFrame."""
opts = bigframes.options.display
with display_options.pandas_repr(opts):
import pandas.io.formats

# safe to mutate this, this dict is owned by this code, and does not affect global config
to_string_kwargs = (
pandas.io.formats.format.get_dataframe_repr_params() # type: ignore
)
if not self._has_index:
to_string_kwargs.update({"index": False})

# We add our own dimensions string, so don't want pandas to.
to_string_kwargs.update({"show_dimensions": False})
repr_string = pandas_df.to_string(**to_string_kwargs)

lines = repr_string.split("\n")

if total_rows is not None and total_rows > len(pandas_df):
lines.append("...")

lines.append("")
column_count = len(self.columns)
lines.append(f"[{total_rows or '?'} rows x {column_count} columns]")
return "\n".join(lines)

def _repr_mimebundle_(self, include=None, exclude=None):
"""
Custom display method for IPython/Jupyter environments.
This is called by IPython's display system when the object is displayed.
"""
# TODO(b/467647693): Anywidget integration has been tested in Jupyter, VS Code, and
# BQ Studio, but there is a known compatibility issue with Marimo that needs to be addressed.
opts = bigframes.options.display
# Only handle widget display in anywidget mode
if opts.repr_mode == "anywidget":
try:
return self._get_anywidget_bundle(include=include, exclude=exclude)

except ImportError:
# Anywidget is an optional dependency, so warn rather than fail.
# TODO(shuowei): When Anywidget becomes the default for all repr modes,
# remove this warning.
warnings.warn(
"Anywidget mode is not available. "
"Please `pip install anywidget traitlets` or `pip install 'bigframes[anywidget]'` to use interactive tables. "
f"Falling back to static HTML. Error: {traceback.format_exc()}"
)

# In non-anywidget mode, fetch data once and use it for both HTML
# and plain text representations to avoid multiple queries.
opts = bigframes.options.display
max_results = opts.max_rows

df, blob_cols = self._get_display_df_and_blob_cols()

pandas_df, row_count, query_job = df._block.retrieve_repr_request_results(
max_results
)
self._set_internal_query_job(query_job)
column_count = len(pandas_df.columns)

html_string = self._create_html_representation(
pandas_df, row_count, column_count, blob_cols
)

text_representation = self._create_text_representation(pandas_df, row_count)
from bigframes.display import html

return {"text/html": html_string, "text/plain": text_representation}
return html.repr_mimebundle(self, include=include, exclude=exclude)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't see any checks for deferred mode in html.repr_mimebundle.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done. repr_mimebundle now checks for the display mode and will delegate to a new repr_mimebundle_deferred function when appropriate.


def _create_html_representation(
self,
Expand Down
165 changes: 163 additions & 2 deletions bigframes/display/html.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,18 @@
from __future__ import annotations

import html
from typing import Any
import traceback
import typing
from typing import Any, Union
import warnings

import pandas as pd
import pandas.api.types

from bigframes._config import options
import bigframes
from bigframes._config import display_options, options
import bigframes.dataframe
import bigframes.series


def _is_dtype_numeric(dtype: Any) -> bool:
Expand Down Expand Up @@ -91,3 +97,158 @@ def render_html(
table_html.append("</table>")

return "\n".join(table_html)


def create_text_representation(
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: the html module seems an inappropriate place for this. bigframes.display.plaintext would make more sense to me.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also, how does this differ from DataFrame._create_text_representation? Seems this is pretty redundant.

Just as I commented on create_html_representation below, it would be more object oriented to avoid the isinstance checks and put _create_text_representation on Series.

Copy link
Contributor Author

@shuoweil shuoweil Dec 22, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the feedback! I have refactored the display logic significantly in the latest revision to address this:

  1. Moved to bigframes.display.plaintext: As suggested, I created bigframes.display.plaintext and moved the text formatting logic there.
  2. Removed Redundancy: I removed _create_text_representation from both DataFrame and Series (and _create_html_representation from DataFrame). Both classes now delegate to the centralized functions in bigframes.display.plaintext and bigframes.display.html.
  3. Design Choice: I opted to centralize the display logic within the bigframes.display package to separate formatting concerns from the core data structures. This necessitates the isinstance checks within the display modules, but it keeps the DataFrame and Series classes cleaner and avoids circular dependency issues with the display logic.

obj: Union[bigframes.dataframe.DataFrame, bigframes.series.Series],
pandas_df: pd.DataFrame,
total_rows: typing.Optional[int],
) -> str:
"""Create a text representation of the DataFrame or Series."""
opts = bigframes.options.display
with display_options.pandas_repr(opts):
if isinstance(obj, bigframes.series.Series):
pd_series = pandas_df.iloc[:, 0]
if len(obj._block.index_columns) == 0:
repr_string = pd_series.to_string(
length=False, index=False, name=True, dtype=True
)
else:
repr_string = pd_series.to_string(length=False, name=True, dtype=True)
else:
import pandas.io.formats

to_string_kwargs = (
pandas.io.formats.format.get_dataframe_repr_params() # type: ignore
)
if not obj._has_index:
to_string_kwargs.update({"index": False})
to_string_kwargs.update({"show_dimensions": False})
repr_string = pandas_df.to_string(**to_string_kwargs)

lines = repr_string.split("\n")
is_truncated = total_rows is not None and total_rows > len(pandas_df)

if is_truncated:
lines.append("...")
lines.append("") # Add empty line for spacing only if truncated
if isinstance(obj, bigframes.series.Series):
lines.append(f"[{total_rows} rows]")
else:
column_count = len(obj.columns)
lines.append(f"[{total_rows or '?'} rows x {column_count} columns]")
elif isinstance(obj, bigframes.dataframe.DataFrame):
# For non-truncated DataFrames, we still need to add dimensions if show_dimensions was False
column_count = len(obj.columns)
lines.append("")
lines.append(f"[{total_rows or '?'} rows x {column_count} columns]")

return "\n".join(lines)


def create_html_representation(
obj: Union[bigframes.dataframe.DataFrame, bigframes.series.Series],
pandas_df: pd.DataFrame,
total_rows: int,
total_columns: int,
blob_cols: list[str],
) -> str:
"""Create an HTML representation of the DataFrame or Series."""
if isinstance(obj, bigframes.series.Series):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we had obj._create_html_representation on Series, we could avoid this isinstance check.

We could even add the same to Index at some point.

Copy link
Contributor Author

@shuoweil shuoweil Dec 22, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I decided to keep the logic centralized in bigframes.display.html rather than adding a new method to Series. This follows the direction of the other refactors in this branch, centralizing all formatting concerns within the bigframes.display package to keep the core data classes cleaner and simplify dependency management.

pd_series = pandas_df.iloc[:, 0]
try:
html_string = pd_series._repr_html_()
except AttributeError:
html_string = f"<pre>{pd_series.to_string()}</pre>"
else:
html_string = obj._create_html_representation(
pandas_df, total_rows, total_columns, blob_cols
)
return html_string


def get_anywidget_bundle(
obj: Union[bigframes.dataframe.DataFrame, bigframes.series.Series],
include=None,
exclude=None,
) -> tuple[dict[str, Any], dict[str, Any]]:
"""
Helper method to create and return the anywidget mimebundle.
This function encapsulates the logic for anywidget display.
"""
from bigframes import display

if isinstance(obj, bigframes.series.Series):
df = obj.to_frame()
else:
df, blob_cols = obj._get_display_df_and_blob_cols()

widget = display.TableWidget(df)
widget_repr_result = widget._repr_mimebundle_(include=include, exclude=exclude)

if isinstance(widget_repr_result, tuple):
widget_repr, widget_metadata = widget_repr_result
else:
widget_repr = widget_repr_result
widget_metadata = {}

widget_repr = dict(widget_repr)

# Use cached data from widget to render HTML and plain text versions.
cached_pd = widget._cached_data
total_rows = widget.row_count
total_columns = len(df.columns)

widget_repr["text/html"] = create_html_representation(
obj,
cached_pd,
total_rows,
total_columns,
blob_cols if "blob_cols" in locals() else [],
)
widget_repr["text/plain"] = create_text_representation(obj, cached_pd, total_rows)

return widget_repr, widget_metadata


def repr_mimebundle(
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's create separate methods for head and deferred, like repr_mimebundle_head and repr_mimebundle_deferred.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done. I've split the logic into repr_mimebundle_head for standard display and repr_mimebundle_deferred for deferred execution mode.

obj: Union[bigframes.dataframe.DataFrame, bigframes.series.Series],
include=None,
exclude=None,
):
"""
Custom display method for IPython/Jupyter environments.
"""
opts = bigframes.options.display
if opts.repr_mode == "anywidget":
try:
return get_anywidget_bundle(obj, include=include, exclude=exclude)
except ImportError:
warnings.warn(
"Anywidget mode is not available. "
"Please `pip install anywidget traitlets` or `pip install 'bigframes[anywidget]'` to use interactive tables. "
f"Falling back to static HTML. Error: {traceback.format_exc()}"
)

blob_cols: list[str]
if isinstance(obj, bigframes.series.Series):
pandas_df, row_count, query_job = obj._block.retrieve_repr_request_results(
opts.max_rows
)
blob_cols = []
else:
df, blob_cols = obj._get_display_df_and_blob_cols()
pandas_df, row_count, query_job = df._block.retrieve_repr_request_results(
opts.max_rows
)

obj._set_internal_query_job(query_job)
column_count = len(pandas_df.columns)

html_string = create_html_representation(
obj, pandas_df, row_count, column_count, blob_cols
)

text_representation = create_text_representation(obj, pandas_df, row_count)

return {"text/html": html_string, "text/plain": text_representation}
Loading