googleapis
diff --git a/‎.librarian/state.yaml‎
Lines changed: 1 addition & 1 deletion b/‎.librarian/state.yaml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎bigframes/core/block_transforms.py‎
Lines changed: 22 additions & 23 deletions b/‎bigframes/core/block_transforms.py‎
Lines changed: 22 additions & 23 deletions
diff --git a/‎bigframes/core/compile/polars/compiler.py‎
Lines changed: 3 additions & 0 deletions b/‎bigframes/core/compile/polars/compiler.py‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎bigframes/core/indexes/base.py‎
Lines changed: 1 addition & 5 deletions b/‎bigframes/core/indexes/base.py‎
Lines changed: 1 addition & 5 deletions
diff --git a/‎bigframes/dataframe.py‎
Lines changed: 113 additions & 52 deletions b/‎bigframes/dataframe.py‎
Lines changed: 113 additions & 52 deletions
diff --git a/‎bigframes/series.py‎
Lines changed: 0 additions & 4 deletions b/‎bigframes/series.py‎
Lines changed: 0 additions & 4 deletions
diff --git a/‎bigframes/streaming/dataframe.py‎
Lines changed: 4 additions & 4 deletions b/‎bigframes/streaming/dataframe.py‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎docs/conf.py‎
Lines changed: 3 additions & 0 deletions b/‎docs/conf.py‎
Lines changed: 3 additions & 0 deletions
@@ -1,7 +1,7 @@
 image: us-central1-docker.pkg.dev/cloud-sdk-librarian-prod/images-prod/python-librarian-generator@sha256:c8612d3fffb3f6a32353b2d1abd16b61e87811866f7ec9d65b59b02eb452a620
 libraries:
   - id: bigframes
-    version: 2.28.0
+    version: 2.29.1
     apis: []
     source_roots:
       - .
 
@@ -67,40 +67,39 @@ def indicate_duplicates(
     if keep not in ["first", "last", False]:
         raise ValueError("keep must be one of 'first', 'last', or False'")
 
+    rownums = agg_expressions.WindowExpression(
+        agg_expressions.NullaryAggregation(
+            agg_ops.RowNumberOp(),
+        ),
+        window=windows.unbound(grouping_keys=tuple(columns)),
+    )
+    count = agg_expressions.WindowExpression(
+        agg_expressions.NullaryAggregation(
+            agg_ops.SizeOp(),
+        ),
+        window=windows.unbound(grouping_keys=tuple(columns)),
+    )
+
     if keep == "first":
         # Count how many copies occur up to current copy of value
         # Discard this value if there are copies BEFORE
-        window_spec = windows.cumulative_rows(
-            grouping_keys=tuple(columns),
-        )
+        predicate = ops.gt_op.as_expr(rownums, ex.const(0))
     elif keep == "last":
         # Count how many copies occur up to current copy of values
         # Discard this value if there are copies AFTER
-        window_spec = windows.inverse_cumulative_rows(
-            grouping_keys=tuple(columns),
-        )
+        predicate = ops.lt_op.as_expr(rownums, ops.sub_op.as_expr(count, ex.const(1)))
     else:  # keep == False
         # Count how many copies of the value occur in entire series.
         # Discard this value if there are copies ANYWHERE
-        window_spec = windows.unbound(grouping_keys=tuple(columns))
-    block, dummy = block.create_constant(1)
-    # use row number as will work even with partial ordering
-    block, val_count_col_id = block.apply_window_op(
-        dummy,
-        agg_ops.sum_op,
-        window_spec=window_spec,
-    )
-    block, duplicate_indicator = block.project_expr(
-        ops.gt_op.as_expr(val_count_col_id, ex.const(1))
+        predicate = ops.gt_op.as_expr(count, ex.const(1))
+
+    block = block.project_block_exprs(
+        [predicate],
+        labels=[None],
     )
     return (
-        block.drop_columns(
-            (
-                dummy,
-                val_count_col_id,
-            )
-        ),
-        duplicate_indicator,
+        block,
+        block.value_columns[-1],
     )
 
 
 
@@ -547,6 +547,9 @@ def compile_agg_op(
                 return pl.col(*inputs).first()
             if isinstance(op, agg_ops.LastOp):
                 return pl.col(*inputs).last()
+            if isinstance(op, agg_ops.RowNumberOp):
+                # pl.row_index is not yet stable enough to use here, and only supports polars>=1.32
+                return pl.int_range(pl.len(), dtype=pl.Int64)
             if isinstance(op, agg_ops.ShiftOp):
                 return pl.col(*inputs).shift(op.periods)
             if isinstance(op, agg_ops.DiffOp):
 
@@ -376,9 +376,7 @@ def __repr__(self) -> __builtins__.str:
         # metadata, like we do with DataFrame.
         opts = bigframes.options.display
         max_results = opts.max_rows
-        # anywdiget mode uses the same display logic as the "deferred" mode
-        # for faster execution
-        if opts.repr_mode in ("deferred", "anywidget"):
+        if opts.repr_mode == "deferred":
             _, dry_run_query_job = self._block._compute_dry_run()
             return formatter.repr_query_job(dry_run_query_job)
 
@@ -626,8 +624,6 @@ def dropna(self, how: typing.Literal["all", "any"] = "any") -> Index:
         return Index(result)
 
     def drop_duplicates(self, *, keep: __builtins__.str = "first") -> Index:
-        if keep is not False:
-            validations.enforce_ordered(self, "drop_duplicates")
         block = block_ops.drop_duplicates(self._block, self._block.index_columns, keep)
         return Index(block)
 
 
@@ -789,9 +789,7 @@ def __repr__(self) -> str:
 
         opts = bigframes.options.display
         max_results = opts.max_rows
-        # anywdiget mode uses the same display logic as the "deferred" mode
-        # for faster execution
-        if opts.repr_mode in ("deferred", "anywidget"):
+        if opts.repr_mode == "deferred":
             return formatter.repr_query_job(self._compute_dry_run())
 
         # TODO(swast): pass max_columns and get the true column count back. Maybe
@@ -829,68 +827,138 @@ def __repr__(self) -> str:
         lines.append(f"[{row_count} rows x {column_count} columns]")
         return "\n".join(lines)
 
-    def _repr_html_(self) -> str:
-        """
-        Returns an html string primarily for use by notebooks for displaying
-        a representation of the DataFrame. Displays 20 rows by default since
-        many notebooks are not configured for large tables.
-        """
-        opts = bigframes.options.display
-        max_results = opts.max_rows
-        if opts.repr_mode == "deferred":
-            return formatter.repr_query_job(self._compute_dry_run())
-
-        # Process blob columns first, regardless of display mode
-        self._cached()
-        df = self.copy()
+    def _get_display_df_and_blob_cols(self) -> tuple[DataFrame, list[str]]:
+        """Process blob columns for display."""
+        df = self
+        blob_cols = []
         if bigframes.options.display.blob_display:
             blob_cols = [
                 series_name
-                for series_name, series in df.items()
+                for series_name, series in self.items()
                 if series.dtype == bigframes.dtypes.OBJ_REF_DTYPE
             ]
-            for col in blob_cols:
-                # TODO(garrettwu): Not necessary to get access urls for all the rows. Update when having a to get URLs from local data.
-                df[col] = df[col].blob._get_runtime(mode="R", with_metadata=True)
+            if blob_cols:
+                df = self.copy()
+                for col in blob_cols:
+                    # TODO(garrettwu): Not necessary to get access urls for all the rows. Update when having a to get URLs from local data.
+                    df[col] = df[col].blob._get_runtime(mode="R", with_metadata=True)
+        return df, blob_cols
+
+    def _get_anywidget_bundle(self, include=None, exclude=None):
+        """
+        Helper method to create and return the anywidget mimebundle.
+        This function encapsulates the logic for anywidget display.
+        """
+        from bigframes import display
+
+        # TODO(shuowei): Keep blob_cols and pass them to TableWidget so that they can render properly.
+        df, _ = self._get_display_df_and_blob_cols()
+
+        # Create and display the widget
+        widget = display.TableWidget(df)
+        widget_repr_result = widget._repr_mimebundle_(include=include, exclude=exclude)
+
+        # Handle both tuple (data, metadata) and dict returns
+        if isinstance(widget_repr_result, tuple):
+            widget_repr = dict(widget_repr_result[0])  # Extract data dict from tuple
         else:
-            blob_cols = []
+            widget_repr = dict(widget_repr_result)
 
-        if opts.repr_mode == "anywidget":
-            try:
-                from IPython.display import display as ipython_display
+        # At this point, we have already executed the query as part of the
+        # widget construction. Let's use the information available to render
+        # the HTML and plain text versions.
+        widget_repr["text/html"] = widget.table_html
+
+        widget_repr["text/plain"] = self._create_text_representation(
+            widget._cached_data, widget.row_count
+        )
+
+        return widget_repr
+
+    def _create_text_representation(
+        self, pandas_df: pandas.DataFrame, total_rows: typing.Optional[int]
+    ) -> str:
+        """Create a text representation of the DataFrame."""
+        opts = bigframes.options.display
+        with display_options.pandas_repr(opts):
+            import pandas.io.formats
+
+            # safe to mutate this, this dict is owned by this code, and does not affect global config
+            to_string_kwargs = (
+                pandas.io.formats.format.get_dataframe_repr_params()  # type: ignore
+            )
+            if not self._has_index:
+                to_string_kwargs.update({"index": False})
+
+            # We add our own dimensions string, so don't want pandas to.
+            to_string_kwargs.update({"show_dimensions": False})
+            repr_string = pandas_df.to_string(**to_string_kwargs)
 
-                from bigframes import display
+        lines = repr_string.split("\n")
 
-                # Always create a new widget instance for each display call
-                # This ensures that each cell gets its own widget and prevents
-                # unintended sharing between cells
-                widget = display.TableWidget(df.copy())
+        if total_rows is not None and total_rows > len(pandas_df):
+            lines.append("...")
 
-                ipython_display(widget)
-                return ""  # Return empty string since we used display()
+        lines.append("")
+        column_count = len(self.columns)
+        lines.append(f"[{total_rows or '?'} rows x {column_count} columns]")
+        return "\n".join(lines)
 
-            except (AttributeError, ValueError, ImportError):
-                # Fallback if anywidget is not available
+    def _repr_mimebundle_(self, include=None, exclude=None):
+        """
+        Custom display method for IPython/Jupyter environments.
+        This is called by IPython's display system when the object is displayed.
+        """
+        opts = bigframes.options.display
+        # Only handle widget display in anywidget mode
+        if opts.repr_mode == "anywidget":
+            try:
+                return self._get_anywidget_bundle(include=include, exclude=exclude)
+
+            except ImportError:
+                # Anywidget is an optional dependency, so warn rather than fail.
+                # TODO(shuowei): When Anywidget becomes the default for all repr modes,
+                # remove this warning.
                 warnings.warn(
                     "Anywidget mode is not available. "
                     "Please `pip install anywidget traitlets` or `pip install 'bigframes[anywidget]'` to use interactive tables. "
-                    f"Falling back to deferred mode. Error: {traceback.format_exc()}"
+                    f"Falling back to static HTML. Error: {traceback.format_exc()}"
                 )
-                return formatter.repr_query_job(self._compute_dry_run())
 
-        # Continue with regular HTML rendering for non-anywidget modes
-        # TODO(swast): pass max_columns and get the true column count back. Maybe
-        # get 1 more column than we have requested so that pandas can add the
-        # ... for us?
+        # In non-anywidget mode, fetch data once and use it for both HTML
+        # and plain text representations to avoid multiple queries.
+        opts = bigframes.options.display
+        max_results = opts.max_rows
+
+        df, blob_cols = self._get_display_df_and_blob_cols()
+
         pandas_df, row_count, query_job = df._block.retrieve_repr_request_results(
             max_results
         )
-
         self._set_internal_query_job(query_job)
         column_count = len(pandas_df.columns)
 
+        html_string = self._create_html_representation(
+            pandas_df, row_count, column_count, blob_cols
+        )
+
+        text_representation = self._create_text_representation(pandas_df, row_count)
+
+        return {"text/html": html_string, "text/plain": text_representation}
+
+    def _create_html_representation(
+        self,
+        pandas_df: pandas.DataFrame,
+        row_count: int,
+        column_count: int,
+        blob_cols: list[str],
+    ) -> str:
+        """Create an HTML representation of the DataFrame."""
+        opts = bigframes.options.display
         with display_options.pandas_repr(opts):
-            # Allows to preview images in the DataFrame. The implementation changes the string repr as well, that it doesn't truncate strings or escape html charaters such as "<" and ">". We may need to implement a full-fledged repr module to better support types not in pandas.
+            # TODO(shuowei, b/464053870): Escaping HTML would be useful, but
+            # `escape=False` is needed to show images. We may need to implement
+            # a full-fledged repr module to better support types not in pandas.
             if bigframes.options.display.blob_display and blob_cols:
 
                 def obj_ref_rt_to_html(obj_ref_rt) -> str:
@@ -919,15 +987,12 @@ def obj_ref_rt_to_html(obj_ref_rt) -> str:
 
                 # set max_colwidth so not to truncate the image url
                 with pandas.option_context("display.max_colwidth", None):
-                    max_rows = pandas.get_option("display.max_rows")
-                    max_cols = pandas.get_option("display.max_columns")
-                    show_dimensions = pandas.get_option("display.show_dimensions")
                     html_string = pandas_df.to_html(
                         escape=False,
                         notebook=True,
-                        max_rows=max_rows,
-                        max_cols=max_cols,
-                        show_dimensions=show_dimensions,
+                        max_rows=pandas.get_option("display.max_rows"),
+                        max_cols=pandas.get_option("display.max_columns"),
+                        show_dimensions=pandas.get_option("display.show_dimensions"),
                         formatters=formatters,  # type: ignore
                     )
             else:
@@ -4989,8 +5054,6 @@ def drop_duplicates(
         *,
         keep: str = "first",
     ) -> DataFrame:
-        if keep is not False:
-            validations.enforce_ordered(self, "drop_duplicates(keep != False)")
         if subset is None:
             column_ids = self._block.value_columns
         elif utils.is_list_like(subset):
@@ -5004,8 +5067,6 @@ def drop_duplicates(
         return DataFrame(block)
 
     def duplicated(self, subset=None, keep: str = "first") -> bigframes.series.Series:
-        if keep is not False:
-            validations.enforce_ordered(self, "duplicated(keep != False)")
         if subset is None:
             column_ids = self._block.value_columns
         else:
 
@@ -2227,8 +2227,6 @@ def reindex_like(self, other: Series, *, validate: typing.Optional[bool] = None)
         return self.reindex(other.index, validate=validate)
 
     def drop_duplicates(self, *, keep: str = "first") -> Series:
-        if keep is not False:
-            validations.enforce_ordered(self, "drop_duplicates(keep != False)")
         block = block_ops.drop_duplicates(self._block, (self._value_column,), keep)
         return Series(block)
 
@@ -2249,8 +2247,6 @@ def unique(self, keep_order=True) -> Series:
         return Series(block.select_columns(result).reset_index())
 
     def duplicated(self, keep: str = "first") -> Series:
-        if keep is not False:
-            validations.enforce_ordered(self, "duplicated(keep != False)")
         block, indicator = block_ops.indicate_duplicates(
             self._block, (self._value_column,), keep
         )
 
@@ -291,13 +291,13 @@ def __repr__(self, *args, **kwargs):
 
     __repr__.__doc__ = _curate_df_doc(inspect.getdoc(dataframe.DataFrame.__repr__))
 
-    def _repr_html_(self, *args, **kwargs):
-        return _return_type_wrapper(self._df._repr_html_, StreamingDataFrame)(
+    def _repr_mimebundle_(self, *args, **kwargs):
+        return _return_type_wrapper(self._df._repr_mimebundle_, StreamingDataFrame)(
             *args, **kwargs
         )
 
-    _repr_html_.__doc__ = _curate_df_doc(
-        inspect.getdoc(dataframe.DataFrame._repr_html_)
+    _repr_mimebundle_.__doc__ = _curate_df_doc(
+        inspect.getdoc(dataframe.DataFrame._repr_mimebundle_)
     )
 
     @property
 
@@ -172,6 +172,9 @@
             "url": "https://docs.cloud.google.com/bigquery/docs/bigquery-dataframes-introduction",
         },
     ],
+    "analytics": {
+        "google_analytics_id": "G-XVSRMCJ37X",
+    },
 }
 
 # Add any paths that contain custom themes here, relative to this directory.
Original file line number	Diff line number	Diff line change
`@@ -291,13 +291,13 @@ def __repr__(self, args, *kwargs):`
`291`	`291`
`292`	`292`	`__repr__.__doc__ = _curate_df_doc(inspect.getdoc(dataframe.DataFrame.__repr__))`
`293`	`293`
`294`		`- def _repr_html_(self, args, *kwargs):`
`295`		`- return _return_type_wrapper(self._df._repr_html_, StreamingDataFrame)(`
	`294`	`+ def _repr_mimebundle_(self, args, *kwargs):`
	`295`	`+ return _return_type_wrapper(self._df._repr_mimebundle_, StreamingDataFrame)(`
`296`	`296`	`args, *kwargs`
`297`	`297`	`)`
`298`	`298`
`299`		`- _repr_html_.__doc__ = _curate_df_doc(`
`300`		`- inspect.getdoc(dataframe.DataFrame._repr_html_)`
	`299`	`+ _repr_mimebundle_.__doc__ = _curate_df_doc(`
	`300`	`+ inspect.getdoc(dataframe.DataFrame._repr_mimebundle_)`
`301`	`301`	`)`
`302`	`302`
`303`	`303`	`@property`
Original file line number	Diff line number	Diff line change
`@@ -172,6 +172,9 @@`
`172`	`172`	`"url": "https://docs.cloud.google.com/bigquery/docs/bigquery-dataframes-introduction",`
`173`	`173`	`},`
`174`	`174`	`],`
	`175`	`+ "analytics": {`
	`176`	`+ "google_analytics_id": "G-XVSRMCJ37X",`
	`177`	`+ },`
`175`	`178`	`}`
`176`	`179`
`177`	`180`	`# Add any paths that contain custom themes here, relative to this directory.`