Skip to content

Commit 088b183

Browse files
authored
chore: add experimental image blob preview in DataFrame._repr_html_ (#1276)
1 parent 2c771aa commit 088b183

File tree

1 file changed

+39
-3
lines changed

1 file changed

+39
-3
lines changed

bigframes/dataframe.py

Lines changed: 39 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -739,10 +739,23 @@ def _repr_html_(self) -> str:
739739
if opts.repr_mode == "deferred":
740740
return formatter.repr_query_job(self._compute_dry_run())
741741

742+
df = self.copy()
743+
if bigframes.options.experiments.blob:
744+
import bigframes.bigquery as bbq
745+
746+
blob_cols = [
747+
col
748+
for col in df.columns
749+
if df[col].dtype == bigframes.dtypes.OBJ_REF_DTYPE
750+
]
751+
for col in blob_cols:
752+
df[col] = df[col]._apply_unary_op(ops.ObjGetAccessUrl(mode="R"))
753+
df[col] = bbq.json_extract(df[col], "$.access_urls.read_url")
754+
742755
# TODO(swast): pass max_columns and get the true column count back. Maybe
743756
# get 1 more column than we have requested so that pandas can add the
744757
# ... for us?
745-
pandas_df, row_count, query_job = self._block.retrieve_repr_request_results(
758+
pandas_df, row_count, query_job = df._block.retrieve_repr_request_results(
746759
max_results
747760
)
748761

@@ -751,8 +764,31 @@ def _repr_html_(self) -> str:
751764
column_count = len(pandas_df.columns)
752765

753766
with display_options.pandas_repr(opts):
754-
# _repr_html_ stub is missing so mypy thinks it's a Series. Ignore mypy.
755-
html_string = pandas_df._repr_html_() # type:ignore
767+
# Allows to preview images in the DataFrame. The implementation changes the string repr as well, that it doesn't truncate strings or escape html charaters such as "<" and ">". We may need to implement a full-fledged repr module to better support types not in pandas.
768+
if bigframes.options.experiments.blob:
769+
770+
def url_to_image_html(url: str) -> str:
771+
# url is a json string, which already contains double-quotes ""
772+
return f"<img src={url}>"
773+
774+
formatters = {blob_col: url_to_image_html for blob_col in blob_cols}
775+
776+
# set max_colwidth so not to truncate the image url
777+
with pandas.option_context("display.max_colwidth", None):
778+
max_rows = pandas.get_option("display.max_rows")
779+
max_cols = pandas.get_option("display.max_columns")
780+
show_dimensions = pandas.get_option("display.show_dimensions")
781+
html_string = pandas_df.to_html(
782+
escape=False,
783+
notebook=True,
784+
max_rows=max_rows,
785+
max_cols=max_cols,
786+
show_dimensions=show_dimensions,
787+
formatters=formatters, # type: ignore
788+
)
789+
else:
790+
# _repr_html_ stub is missing so mypy thinks it's a Series. Ignore mypy.
791+
html_string = pandas_df._repr_html_() # type:ignore
756792

757793
html_string += f"[{row_count} rows x {column_count} columns in total]"
758794
return html_string

0 commit comments

Comments
 (0)