feat: Add bpd.options.display.repr_mode = "anywidget" to create an interactive display of the results (#1820)

shuoweil · web-flow · commit be0a3cf7711d · 2025-06-18T16:59:00.000-05:00
* add anywidget as extra python package

* Add anywidget to bigframes

* add the first testcase

* fix mypy error

* Show first page of results (to_pandas_batches()) is done

* add the testcase

* Add more testcase

* change a import

* change anywidget mode for plain text

* fix noxfile

* add anywidget for docx

* ignore missing import

* fix doctest

* add unittest

* add notebook test

* fix unit-10.12

* change testcase

* make anywidget addtional

* remove anywidget_mode.ipynb in notebook session due to deferred mode

* fix typo

* fix failed testcase
diff --git a/bigframes/_config/display_options.py b/bigframes/_config/display_options.py
@@ -29,7 +29,7 @@ class DisplayOptions:
     max_columns: int = 20
     max_rows: int = 25
     progress_bar: Optional[str] = "auto"
-    repr_mode: Literal["head", "deferred"] = "head"
+    repr_mode: Literal["head", "deferred", "anywidget"] = "head"
 
     max_info_columns: int = 100
     max_info_rows: Optional[int] = 200000
diff --git a/bigframes/core/indexes/base.py b/bigframes/core/indexes/base.py
@@ -251,7 +251,9 @@ def __repr__(self) -> str:
         # metadata, like we do with DataFrame.
         opts = bigframes.options.display
         max_results = opts.max_rows
-        if opts.repr_mode == "deferred":
+        # anywdiget mode uses the same display logic as the "deferred" mode
+        # for faster execution
+        if opts.repr_mode in ("deferred", "anywidget"):
             _, dry_run_query_job = self._block._compute_dry_run()
             return formatter.repr_query_job(dry_run_query_job)
 
diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py
@@ -725,7 +725,9 @@ def __repr__(self) -> str:
 
         opts = bigframes.options.display
         max_results = opts.max_rows
-        if opts.repr_mode == "deferred":
+        # anywdiget mode uses the same display logic as the "deferred" mode
+        # for faster execution
+        if opts.repr_mode in ("deferred", "anywidget"):
             return formatter.repr_query_job(self._compute_dry_run())
 
         # TODO(swast): pass max_columns and get the true column count back. Maybe
@@ -774,6 +776,23 @@ def _repr_html_(self) -> str:
         if opts.repr_mode == "deferred":
             return formatter.repr_query_job(self._compute_dry_run())
 
+        if opts.repr_mode == "anywidget":
+            import anywidget  # type: ignore
+
+            # create an iterator for the data batches
+            batches = self.to_pandas_batches()
+
+            # get the first page result
+            try:
+                first_page = next(iter(batches))
+            except StopIteration:
+                first_page = pandas.DataFrame(columns=self.columns)
+
+            # Instantiate and return the widget. The widget's frontend will
+            # handle the display of the table and pagination
+            return anywidget.AnyWidget(dataframe=first_page)
+
+        self._cached()
         df = self.copy()
         if bigframes.options.display.blob_display:
             blob_cols = [
diff --git a/bigframes/series.py b/bigframes/series.py
@@ -430,7 +430,9 @@ def __repr__(self) -> str:
         # metadata, like we do with DataFrame.
         opts = bigframes.options.display
         max_results = opts.max_rows
-        if opts.repr_mode == "deferred":
+        # anywdiget mode uses the same display logic as the "deferred" mode
+        # for faster execution
+        if opts.repr_mode in ("deferred", "anywidget"):
             return formatter.repr_query_job(self._compute_dry_run())
 
         self._cached()
diff --git a/mypy.ini b/mypy.ini
@@ -41,3 +41,6 @@ ignore_missing_imports = True
 
 [mypy-google.cloud.bigtable]
 ignore_missing_imports = True
+
+[mypy-anywidget]
+ignore_missing_imports = True
diff --git a/notebooks/dataframes/anywidget_mode.ipynb b/notebooks/dataframes/anywidget_mode.ipynb
@@ -0,0 +1,149 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "d10bfca4",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Copyright 2025 Google LLC\n",
+    "#\n",
+    "# Licensed under the Apache License, Version 2.0 (the \"License\");\n",
+    "# you may not use this file except in compliance with the License.\n",
+    "# You may obtain a copy of the License at\n",
+    "#\n",
+    "#     https://www.apache.org/licenses/LICENSE-2.0\n",
+    "#\n",
+    "# Unless required by applicable law or agreed to in writing, software\n",
+    "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
+    "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
+    "# See the License for the specific language governing permissions and\n",
+    "# limitations under the License."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "acca43ae",
+   "metadata": {},
+   "source": [
+    "# Demo to Show Anywidget mode"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "ca22f059",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import bigframes.pandas as bpd"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "04406a4d",
+   "metadata": {},
+   "source": [
+    "Set the display option to use anywidget"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "1bc5aaf3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "bpd.options.display.repr_mode = \"anywidget\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "0a354c69",
+   "metadata": {},
+   "source": [
+    "Display the dataframe in anywidget mode"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "f289d250",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "Query job 91997f19-1768-4360-afa7-4a431b3e2d22 is DONE. 0 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:91997f19-1768-4360-afa7-4a431b3e2d22&page=queryresults\">Open Job</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Computation deferred. Computation will process 171.4 MB\n"
+     ]
+    }
+   ],
+   "source": [
+    "df = bpd.read_gbq(\"bigquery-public-data.usa_names.usa_1910_2013\")\n",
+    "print(df)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3a73e472",
+   "metadata": {},
+   "source": [
+    "Display Series in anywidget mode"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "42bb02ab",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Computation deferred. Computation will process 171.4 MB\n"
+     ]
+    }
+   ],
+   "source": [
+    "test_series = df[\"year\"]\n",
+    "print(test_series)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.15"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/noxfile.py b/noxfile.py
@@ -77,9 +77,9 @@
 ]
 UNIT_TEST_LOCAL_DEPENDENCIES: List[str] = []
 UNIT_TEST_DEPENDENCIES: List[str] = []
-UNIT_TEST_EXTRAS: List[str] = ["tests"]
+UNIT_TEST_EXTRAS: List[str] = ["tests", "anywidget"]
 UNIT_TEST_EXTRAS_BY_PYTHON: Dict[str, List[str]] = {
-    "3.12": ["tests", "polars", "scikit-learn"],
+    "3.12": ["tests", "polars", "scikit-learn", "anywidget"],
 }
 
 # 3.10 is needed for Windows tests as it is the only version installed in the
@@ -106,9 +106,9 @@
 SYSTEM_TEST_DEPENDENCIES: List[str] = []
 SYSTEM_TEST_EXTRAS: List[str] = []
 SYSTEM_TEST_EXTRAS_BY_PYTHON: Dict[str, List[str]] = {
-    "3.9": ["tests"],
+    "3.9": ["tests", "anywidget"],
     "3.10": ["tests"],
-    "3.12": ["tests", "scikit-learn", "polars"],
+    "3.12": ["tests", "scikit-learn", "polars", "anywidget"],
     "3.13": ["tests", "polars"],
 }
 
@@ -276,6 +276,7 @@ def mypy(session):
                 "types-setuptools",
                 "types-tabulate",
                 "polars",
+                "anywidget",
             ]
         )
         | set(SYSTEM_TEST_STANDARD_DEPENDENCIES)
@@ -518,6 +519,7 @@ def docs(session):
         SPHINX_VERSION,
         "alabaster",
         "recommonmark",
+        "anywidget",
     )
 
     shutil.rmtree(os.path.join("docs", "_build"), ignore_errors=True)
@@ -560,6 +562,7 @@ def docfx(session):
         "alabaster",
         "recommonmark",
         "gcp-sphinx-docfx-yaml==3.0.1",
+        "anywidget",
     )
 
     shutil.rmtree(os.path.join("docs", "_build"), ignore_errors=True)
@@ -763,6 +766,7 @@ def notebook(session: nox.Session):
         "google-cloud-aiplatform",
         "matplotlib",
         "seaborn",
+        "anywidget",
     )
 
     notebooks_list = list(pathlib.Path("notebooks/").glob("*/*.ipynb"))
@@ -805,6 +809,9 @@ def notebook(session: nox.Session):
         # continuously tested.
         "notebooks/apps/synthetic_data_generation.ipynb",
         "notebooks/multimodal/multimodal_dataframe.ipynb",  # too slow
+        # This anywidget notebook uses deferred execution, so it won't
+        # produce metrics for the performance benchmark script.
+        "notebooks/dataframes/anywidget_mode.ipynb",
     ]
 
     # TODO: remove exception for Python 3.13 cloud run adds a runtime for it (internal issue 333742751)
diff --git a/setup.py b/setup.py
@@ -86,6 +86,10 @@
         "nox",
         "google-cloud-testutils",
     ],
+    # install anywidget for SQL
+    "anywidget": [
+        "anywidget>=0.9.18",
+    ],
 }
 extras["all"] = list(sorted(frozenset(itertools.chain.from_iterable(extras.values()))))
 
diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py
@@ -810,7 +810,7 @@ def test_repr_html_w_all_rows(scalars_dfs, session):
         + f"[{len(pandas_df.index)} rows x {len(pandas_df.columns)} columns in total]"
     )
     assert actual == expected
-    assert (executions_post - executions_pre) <= 2
+    assert (executions_post - executions_pre) <= 3
 
 
 def test_df_column_name_with_space(scalars_dfs):
diff --git a/tests/system/small/test_progress_bar.py b/tests/system/small/test_progress_bar.py
@@ -17,6 +17,7 @@
 
 import numpy as np
 import pandas as pd
+import pytest
 
 import bigframes as bf
 import bigframes.formatting_helpers as formatting_helpers
@@ -164,3 +165,18 @@ def test_query_job_dry_run_series(penguins_df_default_index: bf.dataframe.DataFr
     with bf.option_context("display.repr_mode", "deferred"):
         series_result = repr(penguins_df_default_index["body_mass_g"])
         assert EXPECTED_DRY_RUN_MESSAGE in series_result
+
+
+def test_repr_anywidget_dataframe(penguins_df_default_index: bf.dataframe.DataFrame):
+    pytest.importorskip("anywidget")
+    with bf.option_context("display.repr_mode", "anywidget"):
+        actual_repr = repr(penguins_df_default_index)
+        assert EXPECTED_DRY_RUN_MESSAGE in actual_repr
+
+
+def test_repr_anywidget_idex(penguins_df_default_index: bf.dataframe.DataFrame):
+    pytest.importorskip("anywidget")
+    with bf.option_context("display.repr_mode", "anywidget"):
+        index = penguins_df_default_index.index
+        actual_repr = repr(index)
+        assert EXPECTED_DRY_RUN_MESSAGE in actual_repr

Original file line number	Diff line number	Diff line change
`@@ -810,7 +810,7 @@ def test_repr_html_w_all_rows(scalars_dfs, session):`
`810`	`810`	`+ f"[{len(pandas_df.index)} rows x {len(pandas_df.columns)} columns in total]"`
`811`	`811`	`)`
`812`	`812`	`assert actual == expected`
`813`		`- assert (executions_post - executions_pre) <= 2`
	`813`	`+ assert (executions_post - executions_pre) <= 3`
`814`	`814`
`815`	`815`
`816`	`816`	`def test_df_column_name_with_space(scalars_dfs):`