-
Notifications
You must be signed in to change notification settings - Fork 63
fix: Correct DataFrame widget rendering in Colab #2319
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 2 commits
be1dc28
751c96d
9e61250
fb9c94a
a4b5d69
b516978
63c15dd
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -789,9 +789,7 @@ def __repr__(self) -> str: | |
|
|
||
| opts = bigframes.options.display | ||
| max_results = opts.max_rows | ||
| # anywdiget mode uses the same display logic as the "deferred" mode | ||
| # for faster execution | ||
| if opts.repr_mode in ("deferred", "anywidget"): | ||
| if opts.repr_mode == "deferred": | ||
| return formatter.repr_query_job(self._compute_dry_run()) | ||
|
|
||
| # TODO(swast): pass max_columns and get the true column count back. Maybe | ||
|
|
@@ -829,68 +827,143 @@ def __repr__(self) -> str: | |
| lines.append(f"[{row_count} rows x {column_count} columns]") | ||
| return "\n".join(lines) | ||
|
|
||
| def _repr_html_(self) -> str: | ||
| """ | ||
| Returns an html string primarily for use by notebooks for displaying | ||
| a representation of the DataFrame. Displays 20 rows by default since | ||
| many notebooks are not configured for large tables. | ||
| """ | ||
| opts = bigframes.options.display | ||
| max_results = opts.max_rows | ||
| if opts.repr_mode == "deferred": | ||
| return formatter.repr_query_job(self._compute_dry_run()) | ||
|
|
||
| # Process blob columns first, regardless of display mode | ||
| self._cached() | ||
| df = self.copy() | ||
| def _get_display_df_and_blob_cols(self) -> tuple[DataFrame, list[str]]: | ||
| """Process blob columns for display.""" | ||
| df = self | ||
| blob_cols = [] | ||
| if bigframes.options.display.blob_display: | ||
| blob_cols = [ | ||
| series_name | ||
| for series_name, series in df.items() | ||
| for series_name, series in self.items() | ||
| if series.dtype == bigframes.dtypes.OBJ_REF_DTYPE | ||
| ] | ||
| for col in blob_cols: | ||
| # TODO(garrettwu): Not necessary to get access urls for all the rows. Update when having a to get URLs from local data. | ||
| df[col] = df[col].blob._get_runtime(mode="R", with_metadata=True) | ||
| if blob_cols: | ||
| df = self.copy() | ||
| for col in blob_cols: | ||
| # TODO(garrettwu): Not necessary to get access urls for all the rows. Update when having a to get URLs from local data. | ||
| df[col] = df[col].blob._get_runtime(mode="R", with_metadata=True) | ||
| return df, blob_cols | ||
|
|
||
| def _get_anywidget_bundle(self, include=None, exclude=None): | ||
| """ | ||
| Helper method to create and return the anywidget mimebundle. | ||
| This function encapsulates the logic for anywidget display. | ||
| """ | ||
| from bigframes import display | ||
|
|
||
| # TODO(shuowei): Keep blob_cols and pass them to TableWidget so that they can render properly. | ||
| df, _ = self._get_display_df_and_blob_cols() | ||
|
|
||
| # Create and display the widget | ||
| widget = display.TableWidget(df) | ||
| widget_repr_result = widget._repr_mimebundle_(include=include, exclude=exclude) | ||
|
|
||
| # Handle both tuple (data, metadata) and dict returns | ||
| if isinstance(widget_repr_result, tuple): | ||
| widget_repr, widget_metadata = widget_repr_result | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Even though this PR is a large one, the only difference from #2271 is this added metadata part. PR 2271 discards metadata, which causes the widget is not displayed at colab notebook. I modify the method to ensure metadata is preserved and returned. Verified at: screen/AjTEQC8SrSfMqhN |
||
| else: | ||
| blob_cols = [] | ||
| widget_repr = widget_repr_result | ||
| widget_metadata = None | ||
|
||
|
|
||
| if opts.repr_mode == "anywidget": | ||
| try: | ||
| from IPython.display import display as ipython_display | ||
| widget_repr = dict(widget_repr) | ||
|
|
||
| from bigframes import display | ||
| # At this point, we have already executed the query as part of the | ||
| # widget construction. Let's use the information available to render | ||
| # the HTML and plain text versions. | ||
| widget_repr["text/html"] = widget.table_html | ||
|
||
|
|
||
| # Always create a new widget instance for each display call | ||
| # This ensures that each cell gets its own widget and prevents | ||
| # unintended sharing between cells | ||
| widget = display.TableWidget(df.copy()) | ||
| widget_repr["text/plain"] = self._create_text_representation( | ||
| widget._cached_data, widget.row_count | ||
| ) | ||
|
|
||
| if widget_metadata is not None: | ||
| return widget_repr, widget_metadata | ||
| return widget_repr | ||
|
||
|
|
||
| def _create_text_representation( | ||
| self, pandas_df: pandas.DataFrame, total_rows: typing.Optional[int] | ||
| ) -> str: | ||
| """Create a text representation of the DataFrame.""" | ||
| opts = bigframes.options.display | ||
| with display_options.pandas_repr(opts): | ||
| import pandas.io.formats | ||
|
|
||
| ipython_display(widget) | ||
| return "" # Return empty string since we used display() | ||
| # safe to mutate this, this dict is owned by this code, and does not affect global config | ||
| to_string_kwargs = ( | ||
| pandas.io.formats.format.get_dataframe_repr_params() # type: ignore | ||
| ) | ||
| if not self._has_index: | ||
| to_string_kwargs.update({"index": False}) | ||
|
|
||
| # We add our own dimensions string, so don't want pandas to. | ||
| to_string_kwargs.update({"show_dimensions": False}) | ||
| repr_string = pandas_df.to_string(**to_string_kwargs) | ||
|
|
||
| except (AttributeError, ValueError, ImportError): | ||
| # Fallback if anywidget is not available | ||
| lines = repr_string.split("\n") | ||
|
|
||
| if total_rows is not None and total_rows > len(pandas_df): | ||
| lines.append("...") | ||
|
|
||
| lines.append("") | ||
| column_count = len(self.columns) | ||
| lines.append(f"[{total_rows or '?'} rows x {column_count} columns]") | ||
| return "\n".join(lines) | ||
|
|
||
| def _repr_mimebundle_(self, include=None, exclude=None): | ||
| """ | ||
| Custom display method for IPython/Jupyter environments. | ||
| This is called by IPython's display system when the object is displayed. | ||
| """ | ||
| opts = bigframes.options.display | ||
| # Only handle widget display in anywidget mode | ||
| if opts.repr_mode == "anywidget": | ||
| try: | ||
| return self._get_anywidget_bundle(include=include, exclude=exclude) | ||
|
|
||
| except ImportError: | ||
| # Anywidget is an optional dependency, so warn rather than fail. | ||
| # TODO(shuowei): When Anywidget becomes the default for all repr modes, | ||
| # remove this warning. | ||
| warnings.warn( | ||
| "Anywidget mode is not available. " | ||
| "Please `pip install anywidget traitlets` or `pip install 'bigframes[anywidget]'` to use interactive tables. " | ||
| f"Falling back to deferred mode. Error: {traceback.format_exc()}" | ||
| f"Falling back to static HTML. Error: {traceback.format_exc()}" | ||
| ) | ||
| return formatter.repr_query_job(self._compute_dry_run()) | ||
|
|
||
| # Continue with regular HTML rendering for non-anywidget modes | ||
| # TODO(swast): pass max_columns and get the true column count back. Maybe | ||
| # get 1 more column than we have requested so that pandas can add the | ||
| # ... for us? | ||
| # In non-anywidget mode, fetch data once and use it for both HTML | ||
| # and plain text representations to avoid multiple queries. | ||
| opts = bigframes.options.display | ||
| max_results = opts.max_rows | ||
|
|
||
| df, blob_cols = self._get_display_df_and_blob_cols() | ||
|
|
||
| pandas_df, row_count, query_job = df._block.retrieve_repr_request_results( | ||
| max_results | ||
| ) | ||
|
|
||
| self._set_internal_query_job(query_job) | ||
| column_count = len(pandas_df.columns) | ||
|
|
||
| html_string = self._create_html_representation( | ||
| pandas_df, row_count, column_count, blob_cols | ||
| ) | ||
|
|
||
| text_representation = self._create_text_representation(pandas_df, row_count) | ||
|
|
||
| return {"text/html": html_string, "text/plain": text_representation} | ||
|
|
||
| def _create_html_representation( | ||
| self, | ||
| pandas_df: pandas.DataFrame, | ||
| row_count: int, | ||
| column_count: int, | ||
| blob_cols: list[str], | ||
| ) -> str: | ||
| """Create an HTML representation of the DataFrame.""" | ||
| opts = bigframes.options.display | ||
| with display_options.pandas_repr(opts): | ||
| # Allows to preview images in the DataFrame. The implementation changes the string repr as well, that it doesn't truncate strings or escape html charaters such as "<" and ">". We may need to implement a full-fledged repr module to better support types not in pandas. | ||
| # TODO(shuowei, b/464053870): Escaping HTML would be useful, but | ||
| # `escape=False` is needed to show images. We may need to implement | ||
| # a full-fledged repr module to better support types not in pandas. | ||
| if bigframes.options.display.blob_display and blob_cols: | ||
|
|
||
| def obj_ref_rt_to_html(obj_ref_rt) -> str: | ||
|
|
@@ -919,15 +992,12 @@ def obj_ref_rt_to_html(obj_ref_rt) -> str: | |
|
|
||
| # set max_colwidth so not to truncate the image url | ||
| with pandas.option_context("display.max_colwidth", None): | ||
| max_rows = pandas.get_option("display.max_rows") | ||
| max_cols = pandas.get_option("display.max_columns") | ||
| show_dimensions = pandas.get_option("display.show_dimensions") | ||
| html_string = pandas_df.to_html( | ||
| escape=False, | ||
| notebook=True, | ||
| max_rows=max_rows, | ||
| max_cols=max_cols, | ||
| show_dimensions=show_dimensions, | ||
| max_rows=pandas.get_option("display.max_rows"), | ||
| max_cols=pandas.get_option("display.max_columns"), | ||
| show_dimensions=pandas.get_option("display.show_dimensions"), | ||
| formatters=formatters, # type: ignore | ||
| ) | ||
| else: | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Let's add a return type here, which is a tuple with two dictionaries.