googleapis
diff --git a/‎bigframes/core/blocks.py
Lines changed: 11 additions & 22 deletions b/‎bigframes/core/blocks.py
Lines changed: 11 additions & 22 deletions
diff --git a/‎bigframes/core/compile/api.py
Lines changed: 8 additions & 4 deletions b/‎bigframes/core/compile/api.py
Lines changed: 8 additions & 4 deletions
diff --git a/‎bigframes/core/compile/compiled.py
Lines changed: 26 additions & 8 deletions b/‎bigframes/core/compile/compiled.py
Lines changed: 26 additions & 8 deletions
diff --git a/‎bigframes/dataframe.py
Lines changed: 21 additions & 30 deletions b/‎bigframes/dataframe.py
Lines changed: 21 additions & 30 deletions
diff --git a/‎bigframes/series.py
Lines changed: 0 additions & 9 deletions b/‎bigframes/series.py
Lines changed: 0 additions & 9 deletions
@@ -488,12 +488,7 @@ def to_arrow(
             list(self.value_columns) + list(self.index_columns)
         )
 
-        _, query_job = self.session._query_to_destination(
-            self.session._to_sql(expr, ordered=ordered),
-            list(self.index_columns),
-            api_name="cached",
-            do_clustering=False,
-        )
+        _, query_job = self.session._execute(expr, ordered=ordered)
         results_iterator = query_job.result()
         pa_table = results_iterator.to_arrow()
 
@@ -582,12 +577,7 @@ def to_pandas_batches(
         see https://cloud.google.com/python/docs/reference/bigquery/latest/google.cloud.bigquery.job.QueryJob#google_cloud_bigquery_job_QueryJob_result"""
         dtypes = dict(zip(self.index_columns, self.index.dtypes))
         dtypes.update(zip(self.value_columns, self.dtypes))
-        _, query_job = self.session._query_to_destination(
-            self.session._to_sql(self.expr, ordered=True),
-            list(self.index_columns),
-            api_name="cached",
-            do_clustering=False,
-        )
+        _, query_job = self.session._execute(self.expr, ordered=True)
         results_iterator = query_job.result(
             page_size=page_size, max_results=max_results
         )
@@ -617,11 +607,8 @@ def _materialize_local(
     ) -> Tuple[pd.DataFrame, bigquery.QueryJob]:
         """Run query and download results as a pandas DataFrame. Return the total number of results as well."""
         # TODO(swast): Allow for dry run and timeout.
-        _, query_job = self.session._query_to_destination(
-            self.session._to_sql(self.expr, ordered=materialize_options.ordered),
-            list(self.index_columns),
-            api_name="cached",
-            do_clustering=False,
+        _, query_job = self.session._execute(
+            self.expr, ordered=materialize_options.ordered
         )
         results_iterator = query_job.result()
 
@@ -797,8 +784,7 @@ def _compute_dry_run(
         self, value_keys: Optional[Iterable[str]] = None
     ) -> bigquery.QueryJob:
         expr = self._apply_value_keys_to_expr(value_keys=value_keys)
-        job_config = bigquery.QueryJobConfig(dry_run=True)
-        _, query_job = self.session._execute(expr, job_config=job_config, dry_run=True)
+        _, query_job = self.session._dry_run(expr)
         return query_job
 
     def _apply_value_keys_to_expr(self, value_keys: Optional[Iterable[str]] = None):
@@ -2404,12 +2390,15 @@ def to_sql_query(
     def cached(self, *, force: bool = False, session_aware: bool = False) -> None:
         """Write the block to a session table."""
         # use a heuristic for whether something needs to be cached
-        if (not force) and self.session._is_trivially_executable(self.expr):
+        if (not force) and self.session._executor._is_trivially_executable(self.expr):
             return
         elif session_aware:
-            self.session._cache_with_session_awareness(self.expr)
+            bfet_roots = [obj._block._expr.node for obj in self.session.objects]
+            self.session._executor._cache_with_session_awareness(
+                self.expr, session_forest=bfet_roots
+            )
         else:
-            self.session._cache_with_cluster_cols(
+            self.session._executor._cache_with_cluster_cols(
                 self.expr, cluster_cols=self.index_columns
             )
 
 
@@ -13,7 +13,9 @@
 # limitations under the License.
 from __future__ import annotations
 
-from typing import Mapping, Tuple, TYPE_CHECKING
+from typing import Mapping, Sequence, Tuple, TYPE_CHECKING
+
+import google.cloud.bigquery as bigquery
 
 import bigframes.core.compile.compiler as compiler
 
@@ -58,11 +60,13 @@ def compile_ordered(
     def compile_raw(
         self,
         node: bigframes.core.nodes.BigFrameNode,
-    ) -> Tuple[str, bigframes.core.ordering.RowOrdering]:
+    ) -> Tuple[
+        str, Sequence[bigquery.SchemaField], bigframes.core.ordering.RowOrdering
+    ]:
         """Compile node into sql that exposes all columns, including hidden ordering-only columns."""
         ir = self._compiler.compile_ordered_ir(node)
-        sql = ir.raw_sql()
-        return sql, ir._ordering
+        sql, schema = ir.raw_sql_and_schema()
+        return sql, schema, ir._ordering
 
 
 def test_only_try_evaluate(node: bigframes.core.nodes.BigFrameNode):
 
@@ -20,11 +20,14 @@
 from typing import Collection, Literal, Optional, Sequence
 
 import bigframes_vendored.ibis.expr.operations as vendored_ibis_ops
+import google.cloud.bigquery
 import ibis
 import ibis.backends.bigquery as ibis_bigquery
+import ibis.backends.bigquery.datatypes
 import ibis.common.deferred  # type: ignore
 import ibis.expr.datatypes as ibis_dtypes
 import ibis.expr.operations as ibis_ops
+import ibis.expr.schema as ibis_schema
 import ibis.expr.types as ibis_types
 import pandas
 
@@ -531,7 +534,8 @@ def __init__(
             for column in self._columns
         }
         self._hidden_ordering_column_names = {
-            column.get_name(): column for column in self._hidden_ordering_columns
+            typing.cast(str, column.get_name()): column
+            for column in self._hidden_ordering_columns
         }
         ### Validation
         value_col_ids = self._column_names.keys()
@@ -947,14 +951,28 @@ def to_sql(
             )
         return typing.cast(str, sql)
 
-    def raw_sql(self) -> str:
-        """Return sql with all hidden columns. Used to cache with ordering information."""
-        return ibis_bigquery.Backend().compile(
-            self._to_ibis_expr(
-                ordering_mode="unordered",
-                expose_hidden_cols=True,
-            )
+    def raw_sql_and_schema(
+        self,
+    ) -> typing.Tuple[str, typing.Sequence[google.cloud.bigquery.SchemaField]]:
+        """Return sql with all hidden columns. Used to cache with ordering information.
+
+        Also returns schema, as the extra ordering columns are determined compile-time.
+        """
+        all_columns = (*self.column_ids, *self._hidden_ordering_column_names.keys())
+        as_ibis = self._to_ibis_expr(
+            ordering_mode="unordered",
+            expose_hidden_cols=True,
+        ).select(all_columns)
+
+        # Ibis will produce non-nullable schema types, but bigframes should always be nullable
+        fixed_ibis_schema = ibis_schema.Schema.from_tuples(
+            (name, dtype.copy(nullable=True))
+            for (name, dtype) in as_ibis.schema().items()
+        )
+        bq_schema = ibis.backends.bigquery.datatypes.BigQuerySchema.from_ibis(
+            fixed_ibis_schema
         )
+        return ibis_bigquery.Backend().compile(as_ibis), bq_schema
 
     def _to_ibis_expr(
         self,
 
@@ -1214,7 +1214,6 @@ def to_arrow(
             category=bigframes.exceptions.PreviewWarning,
         )
 
-        self._optimize_query_complexity()
         pa_table, query_job = self._block.to_arrow(ordered=ordered)
         self._set_internal_query_job(query_job)
         return pa_table
@@ -1255,7 +1254,6 @@ def to_pandas(
                 downsampled rows and all columns of this DataFrame.
         """
         # TODO(orrbradford): Optimize this in future. Potentially some cases where we can return the stored query job
-        self._optimize_query_complexity()
         df, query_job = self._block.to_pandas(
             max_download_size=max_download_size,
             sampling_method=sampling_method,
@@ -1285,7 +1283,6 @@ def to_pandas_batches(
                 form the original dataframe. Results stream from bigquery,
                 see https://cloud.google.com/python/docs/reference/bigquery/latest/google.cloud.bigquery.table.RowIterator#google_cloud_bigquery_table_RowIterator_to_arrow_iterable
         """
-        self._optimize_query_complexity()
         return self._block.to_pandas_batches(
             page_size=page_size, max_results=max_results
         )
@@ -3046,12 +3043,6 @@ def to_gbq(
         ordering_id: Optional[str] = None,
         clustering_columns: Union[pandas.Index, Iterable[typing.Hashable]] = (),
     ) -> str:
-        dispositions = {
-            "fail": bigquery.WriteDisposition.WRITE_EMPTY,
-            "replace": bigquery.WriteDisposition.WRITE_TRUNCATE,
-            "append": bigquery.WriteDisposition.WRITE_APPEND,
-        }
-
         temp_table_ref = None
 
         if destination_table is None:
@@ -3063,7 +3054,7 @@ def to_gbq(
                 )
             if_exists = "replace"
 
-            temp_table_ref = self._session._random_table(
+            temp_table_ref = self._session._temp_storage_manager._random_table(
                 # The client code owns this table reference now, so skip_cleanup=True
                 #  to not clean it up when we close the session.
                 skip_cleanup=True,
@@ -3086,10 +3077,11 @@ def to_gbq(
         if if_exists is None:
             if_exists = "fail"
 
-        if if_exists not in dispositions:
+        valid_if_exists = ["fail", "replace", "append"]
+        if if_exists not in valid_if_exists:
             raise ValueError(
                 f"Got invalid value {repr(if_exists)} for if_exists. "
-                f"Valid options include None or one of {dispositions.keys()}."
+                f"Valid options include None or one of {valid_if_exists}."
             )
 
         try:
@@ -3101,16 +3093,25 @@ def to_gbq(
             clustering_columns, index=index
         )
 
-        job_config = bigquery.QueryJobConfig(
-            write_disposition=dispositions[if_exists],
-            destination=bigquery.table.TableReference.from_string(
-                destination_table,
-                default_project=default_project,
-            ),
-            clustering_fields=clustering_fields if clustering_fields else None,
+        export_array, id_overrides = self._prepare_export(
+            index=index and self._has_index, ordering_id=ordering_id
+        )
+        destination = bigquery.table.TableReference.from_string(
+            destination_table,
+            default_project=default_project,
+        )
+        _, query_job = self._session._export(
+            export_array,
+            destination=destination,
+            col_id_overrides=id_overrides,
+            cluster_cols=clustering_fields,
+            if_exists=if_exists,
         )
+        self._set_internal_query_job(query_job)
 
-        self._run_io_query(index=index, ordering_id=ordering_id, job_config=job_config)
+        # The query job should have finished, so there should be always be a result table.
+        result_table = query_job.destination
+        assert result_table is not None
 
         if temp_table_ref:
             bigframes.session._io.bigquery.set_table_expiration(
@@ -3402,19 +3403,16 @@ def _run_io_query(
         self,
         index: bool,
         ordering_id: Optional[str] = None,
-        job_config: Optional[bigquery.job.QueryJobConfig] = None,
     ) -> bigquery.TableReference:
         """Executes a query job presenting this dataframe and returns the destination
         table."""
         session = self._block.expr.session
-        self._optimize_query_complexity()
         export_array, id_overrides = self._prepare_export(
             index=index and self._has_index, ordering_id=ordering_id
         )
 
         _, query_job = session._execute(
             export_array,
-            job_config=job_config,
             ordered=False,
             col_id_overrides=id_overrides,
         )
@@ -3669,13 +3667,6 @@ def _cached(self, *, force: bool = False) -> DataFrame:
         self._block.cached(force=force)
         return self
 
-    def _optimize_query_complexity(self):
-        """Reduce query complexity by caching repeated subtrees and recursively materializing maximum-complexity subtrees.
-        May generate many queries and take substantial time to execute.
-        """
-        # TODO: Move all this to session
-        self._session._simplify_with_caching(self._block.expr)
-
     _DataFrameOrSeries = typing.TypeVar("_DataFrameOrSeries")
 
     @validations.requires_ordering()
 
@@ -188,7 +188,6 @@ def __len__(self):
     __len__.__doc__ = inspect.getdoc(vendored_pandas_series.Series.__len__)
 
     def __iter__(self) -> typing.Iterator:
-        self._optimize_query_complexity()
         return itertools.chain.from_iterable(
             map(lambda x: x.squeeze(axis=1), self._block.to_pandas_batches())
         )
@@ -358,7 +357,6 @@ def to_pandas(
             pandas.Series: A pandas Series with all rows of this Series if the data_sampling_threshold_mb
                 is not exceeded; otherwise, a pandas Series with downsampled rows of the DataFrame.
         """
-        self._optimize_query_complexity()
         df, query_job = self._block.to_pandas(
             max_download_size=max_download_size,
             sampling_method=sampling_method,
@@ -1892,13 +1890,6 @@ def _cached(self, *, force: bool = True, session_aware: bool = True) -> Series:
         self._block.cached(force=force, session_aware=session_aware)
         return self
 
-    def _optimize_query_complexity(self):
-        """Reduce query complexity by caching repeated subtrees and recursively materializing maximum-complexity subtrees.
-        May generate many queries and take substantial time to execute.
-        """
-        # TODO: Move all this to session
-        self._block.session._simplify_with_caching(self._block.expr)
-
 
 def _is_list_like(obj: typing.Any) -> typing_extensions.TypeGuard[typing.Sequence]:
     return pandas.api.types.is_list_like(obj)