googleapis
diff --git a/‎bigframes/core/__init__.py
Lines changed: 44 additions & 15 deletions b/‎bigframes/core/__init__.py
Lines changed: 44 additions & 15 deletions
diff --git a/‎bigframes/core/blocks.py
Lines changed: 57 additions & 27 deletions b/‎bigframes/core/blocks.py
Lines changed: 57 additions & 27 deletions
diff --git a/‎bigframes/core/indexers.py
Lines changed: 16 additions & 28 deletions b/‎bigframes/core/indexers.py
Lines changed: 16 additions & 28 deletions
diff --git a/‎bigframes/core/joins/single_column.py
Lines changed: 2 additions & 2 deletions b/‎bigframes/core/joins/single_column.py
Lines changed: 2 additions & 2 deletions
@@ -269,7 +269,7 @@ def _get_hidden_ordering_column(self, key: str) -> ibis_types.Column:
         return typing.cast(ibis_types.Column, self._hidden_ordering_column_names[key])
 
     def apply_limit(self, max_results: int) -> ArrayValue:
-        table = self.to_ibis_expr(
+        table = self._to_ibis_expr(
             ordering_mode="order_by",
             expose_hidden_cols=True,
         ).limit(max_results)
@@ -285,11 +285,23 @@ def apply_limit(self, max_results: int) -> ArrayValue:
             ordering=self._ordering,
         )
 
-    def filter(self, predicate: ibis_types.BooleanValue) -> ArrayValue:
+    def filter(self, predicate_id: str, keep_null: bool = False) -> ArrayValue:
+        """Filter the table on a given expression, the predicate must be a boolean series aligned with the table expression."""
+        condition = typing.cast(ibis_types.BooleanValue, self.get_column(predicate_id))
+        if keep_null:
+            condition = typing.cast(
+                ibis_types.BooleanValue,
+                condition.fillna(
+                    typing.cast(ibis_types.BooleanScalar, ibis_types.literal(True))
+                ),
+            )
+        return self._filter(condition)
+
+    def _filter(self, predicate_value: ibis_types.BooleanValue) -> ArrayValue:
         """Filter the table on a given expression, the predicate must be a boolean series aligned with the table expression."""
         expr = self.builder()
         expr.ordering = expr.ordering.with_non_sequential()
-        expr.predicates = [*self._predicates, predicate]
+        expr.predicates = [*self._predicates, predicate_value]
         return expr.build()
 
     def order_by(
@@ -310,7 +322,7 @@ def _uniform_sampling(self, fraction: float) -> ArrayValue:
         .. warning::
             The row numbers of result is non-deterministic, avoid to use.
         """
-        table = self.to_ibis_expr(
+        table = self._to_ibis_expr(
             ordering_mode="order_by", expose_hidden_cols=True, fraction=fraction
         )
         columns = [table[column_name] for column_name in self._column_names]
@@ -342,7 +354,7 @@ def project_offsets(self) -> ArrayValue:
         if self._ordering.is_sequential:
             return self
         # TODO(tbergeron): Enforce total ordering
-        table = self.to_ibis_expr(
+        table = self._to_ibis_expr(
             ordering_mode="offset_col", order_col_name=ORDER_ID_COLUMN
         )
         columns = [table[column_name] for column_name in self._column_names]
@@ -412,7 +424,7 @@ def projection(self, columns: Iterable[ibis_types.Value]) -> ArrayValue:
     def shape(self) -> typing.Tuple[int, int]:
         """Returns dimensions as (length, width) tuple."""
         width = len(self.columns)
-        count_expr = self.to_ibis_expr(ordering_mode="unordered").count()
+        count_expr = self._to_ibis_expr(ordering_mode="unordered").count()
         sql = self._session.ibis_client.compile(count_expr)
         row_iterator, _ = self._session._start_query(
             sql=sql,
@@ -435,7 +447,7 @@ def concat(self, other: typing.Sequence[ArrayValue]) -> ArrayValue:
         )
         for i, expr in enumerate([self, *other]):
             ordering_prefix = str(i).zfill(prefix_size)
-            table = expr.to_ibis_expr(
+            table = expr._to_ibis_expr(
                 ordering_mode="string_encoded", order_col_name=ORDER_ID_COLUMN
             )
             # Rename the value columns based on horizontal offset before applying union.
@@ -522,7 +534,7 @@ def aggregate(
             by_column_id: column id of the aggregation key, this is preserved through the transform
             dropna: whether null keys should be dropped
         """
-        table = self.to_ibis_expr(ordering_mode="unordered")
+        table = self._to_ibis_expr(ordering_mode="unordered")
         stats = {
             col_out: agg_op._as_ibis(table[col_in])
             for col_in, agg_op, col_out in aggregations
@@ -541,7 +553,7 @@ def aggregate(
             expr = ArrayValue(self._session, result, columns=columns, ordering=ordering)
             if dropna:
                 for column_id in by_column_ids:
-                    expr = expr.filter(
+                    expr = expr._filter(
                         ops.notnull_op._as_ibis(expr.get_column(column_id))
                     )
             # Can maybe remove this as Ordering id is redundant as by_column is unique after aggregation
@@ -572,7 +584,7 @@ def corr_aggregate(
         Arguments:
             corr_aggregations: left_column_id, right_column_id, output_column_id tuples
         """
-        table = self.to_ibis_expr(ordering_mode="unordered")
+        table = self._to_ibis_expr(ordering_mode="unordered")
         stats = {
             col_out: table[col_left].corr(table[col_right], how="pop")
             for col_left, col_right, col_out in corr_aggregations
@@ -646,7 +658,24 @@ def project_window_op(
         # TODO(tbergeron): Automatically track analytic expression usage and defer reprojection until required for valid query generation.
         return result._reproject_to_table() if not skip_reproject_unsafe else result
 
-    def to_ibis_expr(
+    def to_sql(
+        self,
+        ordering_mode: Literal[
+            "order_by", "string_encoded", "offset_col", "unordered"
+        ] = "order_by",
+        order_col_name: Optional[str] = ORDER_ID_COLUMN,
+        col_id_overrides: typing.Mapping[str, str] = {},
+    ) -> str:
+        sql = self._session.ibis_client.compile(
+            self._to_ibis_expr(
+                ordering_mode=ordering_mode,
+                order_col_name=order_col_name,
+                col_id_overrides=col_id_overrides,
+            )
+        )
+        return typing.cast(str, sql)
+
+    def _to_ibis_expr(
         self,
         ordering_mode: Literal[
             "order_by", "string_encoded", "offset_col", "unordered"
@@ -814,7 +843,7 @@ def start_query(
         # a LocalSession for unit testing.
         # TODO(swast): Add a timeout here? If the query is taking a long time,
         # maybe we just print the job metadata that we have so far?
-        table = self.to_ibis_expr(expose_hidden_cols=expose_extra_columns)
+        table = self._to_ibis_expr(expose_hidden_cols=expose_extra_columns)
         sql = self._session.ibis_client.compile(table)  # type:ignore
         return self._session._start_query(
             sql=sql,
@@ -833,7 +862,7 @@ def _reproject_to_table(self) -> ArrayValue:
         some operations such as window operations that cannot be used
         recursively in projections.
         """
-        table = self.to_ibis_expr(
+        table = self._to_ibis_expr(
             ordering_mode="unordered",
             expose_hidden_cols=True,
         )
@@ -912,7 +941,7 @@ def unpivot(
         Returns:
             ArrayValue: The unpivoted ArrayValue
         """
-        table = self.to_ibis_expr(ordering_mode="offset_col")
+        table = self._to_ibis_expr(ordering_mode="offset_col")
         sub_expressions = []
 
         # Use ibis memtable to infer type of rowlabels (if possible)
@@ -1054,7 +1083,7 @@ def slice(
             start = start if (start is not None) else last_offset
             cond_list.append((start - expr_with_offsets.offsets) % (-step) == 0)
 
-        sliced_expr = expr_with_offsets.filter(
+        sliced_expr = expr_with_offsets._filter(
             functools.reduce(lambda x, y: x & y, cond_list)
         )
         return sliced_expr if step > 0 else sliced_expr.reversed()
 
@@ -30,8 +30,6 @@
 
 import geopandas as gpd  # type: ignore
 import google.cloud.bigquery as bigquery
-import ibis.expr.schema as ibis_schema
-import ibis.expr.types as ibis_types
 import numpy
 import pandas as pd
 import pyarrow as pa  # type: ignore
@@ -42,6 +40,7 @@
 import bigframes.core.indexes as indexes
 import bigframes.core.ordering as ordering
 import bigframes.core.utils
+import bigframes.core.utils as utils
 import bigframes.dtypes
 import bigframes.operations as ops
 import bigframes.operations.aggregations as agg_ops
@@ -368,7 +367,10 @@ def reorder_levels(self, ids: typing.Sequence[str]):
         level_names = [self.col_id_to_index_name[index_id] for index_id in ids]
         return Block(self.expr, ids, self.column_labels, level_names)
 
-    def _to_dataframe(self, result, schema: ibis_schema.Schema) -> pd.DataFrame:
+    @classmethod
+    def _to_dataframe(
+        cls, result, schema: typing.Mapping[str, bigframes.dtypes.Dtype]
+    ) -> pd.DataFrame:
         """Convert BigQuery data to pandas DataFrame with specific dtypes."""
         df = result.to_dataframe(
             bool_dtype=pd.BooleanDtype(),
@@ -382,8 +384,8 @@ def _to_dataframe(self, result, schema: ibis_schema.Schema) -> pd.DataFrame:
         )
 
         # Convert Geography column from StringDType to GeometryDtype.
-        for column_name, ibis_dtype in schema.items():
-            if ibis_dtype.is_geospatial():
+        for column_name, dtype in schema.items():
+            if dtype == gpd.array.GeometryDtype():
                 df[column_name] = gpd.GeoSeries.from_wkt(
                     # https://github.com/geopandas/geopandas/issues/1879
                     df[column_name].replace({numpy.nan: None}),
@@ -473,7 +475,8 @@ def _compute_and_count(
             if sampling_method == _HEAD:
                 total_rows = int(results_iterator.total_rows * fraction)
                 results_iterator.max_results = total_rows
-                df = self._to_dataframe(results_iterator, expr.to_ibis_expr().schema())
+                schema = dict(zip(self.value_columns, self.dtypes))
+                df = self._to_dataframe(results_iterator, schema)
 
                 if self.index_columns:
                     df.set_index(list(self.index_columns), inplace=True)
@@ -508,7 +511,8 @@ def _compute_and_count(
                 )
         else:
             total_rows = results_iterator.total_rows
-            df = self._to_dataframe(results_iterator, expr.to_ibis_expr().schema())
+            schema = dict(zip(self.value_columns, self.dtypes))
+            df = self._to_dataframe(results_iterator, schema)
 
             if self.index_columns:
                 df.set_index(list(self.index_columns), inplace=True)
@@ -639,13 +643,6 @@ def with_index_labels(self, value: typing.Sequence[Label]) -> Block:
             index_labels=tuple(value),
         )
 
-    def get_value_col_exprs(
-        self, column_names: Optional[Sequence[str]] = None
-    ) -> List[ibis_types.Value]:
-        """Retrive value column expressions."""
-        column_names = self.value_columns if column_names is None else column_names
-        return [self._expr.get_column(column_name) for column_name in column_names]
-
     def apply_unary_op(
         self, column: str, op: ops.UnaryOp, result_label: Label = None
     ) -> typing.Tuple[Block, str]:
@@ -816,20 +813,9 @@ def assign_label(self, column_id: str, new_label: Label) -> Block:
         )
         return self.with_column_labels(new_labels)
 
-    def filter(self, column_name: str, keep_null: bool = False):
-        condition = typing.cast(
-            ibis_types.BooleanValue, self._expr.get_column(column_name)
-        )
-        if keep_null:
-            condition = typing.cast(
-                ibis_types.BooleanValue,
-                condition.fillna(
-                    typing.cast(ibis_types.BooleanScalar, ibis_types.literal(True))
-                ),
-            )
-        filtered_expr = self.expr.filter(condition)
+    def filter(self, column_id: str, keep_null: bool = False):
         return Block(
-            filtered_expr,
+            self._expr.filter(column_id, keep_null),
             index_columns=self.index_columns,
             column_labels=self.column_labels,
             index_labels=self.index.names,
@@ -1436,6 +1422,50 @@ def is_monotonic_decreasing(
     ) -> bool:
         return self._is_monotonic(column_id, increasing=False)
 
+    def to_sql_query(
+        self, include_index: bool
+    ) -> typing.Tuple[str, list[str], list[Label]]:
+        """
+        Compiles this DataFrame's expression tree to SQL, optionally
+        including index columns.
+
+        Args:
+            include_index (bool):
+                whether to include index columns.
+
+        Returns:
+            a tuple of (sql_string, index_column_id_list, index_column_label_list).
+                If include_index is set to False, index_column_id_list and index_column_label_list
+                return empty lists.
+        """
+        array_value = self._expr
+        col_labels, idx_labels = list(self.column_labels), list(self.index_labels)
+        old_col_ids, old_idx_ids = list(self.value_columns), list(self.index_columns)
+
+        if not include_index:
+            idx_labels, old_idx_ids = [], []
+            array_value = array_value.drop_columns(self.index_columns)
+
+        old_ids = old_idx_ids + old_col_ids
+
+        new_col_ids, new_idx_ids = utils.get_standardized_ids(col_labels, idx_labels)
+        new_ids = new_idx_ids + new_col_ids
+
+        substitutions = {}
+        for old_id, new_id in zip(old_ids, new_ids):
+            # TODO(swast): Do we need to further escape this, or can we rely on
+            # the BigQuery unicode column name feature?
+            substitutions[old_id] = new_id
+
+        sql = array_value.to_sql(
+            ordering_mode="unordered", col_id_overrides=substitutions
+        )
+        return (
+            sql,
+            new_ids[: len(idx_labels)],
+            idx_labels,
+        )
+
     def _is_monotonic(
         self, column_ids: typing.Union[str, Sequence[str]], increasing: bool
     ) -> bool:
 
@@ -21,11 +21,11 @@
 import pandas as pd
 
 import bigframes.constants as constants
-import bigframes.core as core
 import bigframes.core.guid as guid
 import bigframes.core.indexes as indexes
 import bigframes.core.scalar
 import bigframes.dataframe
+import bigframes.operations as ops
 import bigframes.series
 
 if typing.TYPE_CHECKING:
@@ -59,35 +59,23 @@ def __setitem__(self, key, value) -> None:
 
         # Assume the key is for the index label.
         block = self._series._block
-        value_column = self._series._value
-        index_column = block.expr.get_column(block.index_columns[0])
-        new_value = (
-            ibis.case()
-            .when(
-                index_column == ibis.literal(key, index_column.type()),
-                ibis.literal(value, value_column.type()),
-            )
-            .else_(value_column)
-            .end()
-            .name(value_column.get_name())
+        value_column = self._series._value_column
+        index_column = block.index_columns[0]
+
+        # if index == key return value else value_colum
+        block, insert_cond = block.apply_unary_op(
+            index_column, ops.partial_right(ops.eq_op, key)
         )
-        all_columns = []
-        for column in block.expr.columns:
-            if column.get_name() != value_column.get_name():
-                all_columns.append(column)
-            else:
-                all_columns.append(new_value)
-        new_expr = block.expr.projection(all_columns)
-
-        # TODO(tbergeron): Use block operators rather than directly building desired ibis expressions.
-        self._series._set_block(
-            core.blocks.Block(
-                new_expr,
-                self._series._block.index_columns,
-                self._series._block.column_labels,
-                self._series._block.index.names,
-            )
+        block, result_id = block.apply_binary_op(
+            insert_cond,
+            self._series._value_column,
+            ops.partial_arg1(ops.where_op, value),
         )
+        block = block.copy_values(result_id, value_column).drop_columns(
+            [insert_cond, result_id]
+        )
+
+        self._series._set_block(block)
 
 
 class IlocSeriesIndexer:
 
@@ -122,11 +122,11 @@ def join_by_column(
             ),
         )
     else:
-        left_table = left.to_ibis_expr(
+        left_table = left._to_ibis_expr(
             ordering_mode="unordered",
             expose_hidden_cols=True,
         )
-        right_table = right.to_ibis_expr(
+        right_table = right._to_ibis_expr(
             ordering_mode="unordered",
             expose_hidden_cols=True,
         )
Original file line number	Diff line number	Diff line change
`@@ -122,11 +122,11 @@ def join_by_column(`
`122`	`122`	`),`
`123`	`123`	`)`
`124`	`124`	`else:`
`125`		`- left_table = left.to_ibis_expr(`
	`125`	`+ left_table = left._to_ibis_expr(`
`126`	`126`	`ordering_mode="unordered",`
`127`	`127`	`expose_hidden_cols=True,`
`128`	`128`	`)`
`129`		`- right_table = right.to_ibis_expr(`
	`129`	`+ right_table = right._to_ibis_expr(`
`130`	`130`	`ordering_mode="unordered",`
`131`	`131`	`expose_hidden_cols=True,`
`132`	`132`	`)`