chore: implement compile_readtable (#1809)

chelsea-lin · web-flow · commit 582bbaf0bf27 · 2025-06-11T13:08:07.000-07:00
* use mocks.create_bigquery_session

* chore: implement compile_readtable
diff --git a/bigframes/core/compile/sqlglot/compiler.py b/bigframes/core/compile/sqlglot/compiler.py
@@ -158,6 +158,17 @@ def compile_readlocal(self, node: nodes.ReadLocalNode, *args) -> ir.SQLGlotIR:
 
         return ir.SQLGlotIR.from_pyarrow(pa_table, node.schema, uid_gen=self.uid_gen)
 
+    @_compile_node.register
+    def compile_readtable(self, node: nodes.ReadTableNode, *args):
+        table = node.source.table
+        return ir.SQLGlotIR.from_table(
+            table.project_id,
+            table.dataset_id,
+            table.table_id,
+            col_names=[col.source_id for col in node.scan_list.items],
+            alias_names=[col.id.sql for col in node.scan_list.items],
+        )
+
     @_compile_node.register
     def compile_selection(
         self, node: nodes.SelectionNode, child: ir.SQLGlotIR
diff --git a/bigframes/core/compile/sqlglot/scalar_compiler.py b/bigframes/core/compile/sqlglot/scalar_compiler.py
@@ -79,6 +79,8 @@ def compile_op_expression(expr: expression.OpExpression):
 
 
 # TODO: add parenthesize for operators
-def compile_addop(op: ops.AddOp, left: sge.Expression, right: sge.Expression):
+def compile_addop(
+    op: ops.AddOp, left: sge.Expression, right: sge.Expression
+) -> sge.Expression:
     # TODO: support addop for string dtype.
     return sge.Add(this=left, expression=right)
diff --git a/bigframes/core/compile/sqlglot/sqlglot_ir.py b/bigframes/core/compile/sqlglot/sqlglot_ir.py
@@ -106,6 +106,30 @@ def from_pyarrow(
         )
         return cls(expr=sg.select(sge.Star()).from_(expr), uid_gen=uid_gen)
 
+    @classmethod
+    def from_table(
+        cls,
+        project_id: str,
+        dataset_id: str,
+        table_id: str,
+        col_names: typing.Sequence[str],
+        alias_names: typing.Sequence[str],
+    ) -> SQLGlotIR:
+        selections = [
+            sge.Alias(
+                this=sge.to_identifier(col_name, quoted=cls.quoted),
+                alias=sge.to_identifier(alias_name, quoted=cls.quoted),
+            )
+            for col_name, alias_name in zip(col_names, alias_names)
+        ]
+        table_expr = sge.Table(
+            this=sg.to_identifier(table_id, quoted=cls.quoted),
+            db=sg.to_identifier(dataset_id, quoted=cls.quoted),
+            catalog=sg.to_identifier(project_id, quoted=cls.quoted),
+        )
+        select_expr = sge.Select().select(*selections).from_(table_expr)
+        return cls(expr=select_expr)
+
     @classmethod
     def from_query_string(
         cls,
@@ -156,9 +180,8 @@ def project(
             )
             for id, expr in projected_cols
         ]
-        # TODO: some columns are not able to be projected into the same select.
-        select_expr = self.expr.select(*projected_cols_expr, append=True)
-        return SQLGlotIR(expr=select_expr)
+        new_expr = self._encapsulate_as_cte().select(*projected_cols_expr, append=False)
+        return SQLGlotIR(expr=new_expr)
 
     def insert(
         self,
diff --git a/bigframes/testing/compiler_session.py b/bigframes/testing/compiler_session.py
@@ -14,13 +14,10 @@
 
 import dataclasses
 import typing
-import weakref
 
 import bigframes.core
 import bigframes.core.compile.sqlglot as sqlglot
-import bigframes.dataframe
 import bigframes.session.executor
-import bigframes.session.metrics
 
 
 @dataclasses.dataclass
@@ -44,35 +41,3 @@ def to_sql(
         return self.compiler.SQLGlotCompiler().compile(
             array_value.node, ordered=ordered
         )
-
-
-class SQLCompilerSession(bigframes.session.Session):
-    """Session for SQL compilation using sqlglot."""
-
-    def __init__(self):
-        # TODO: remove unused attributes.
-        self._location = None  # type: ignore
-        self._bq_kms_key_name = None  # type: ignore
-        self._clients_provider = None  # type: ignore
-        self.ibis_client = None  # type: ignore
-        self._bq_connection = None  # type: ignore
-        self._skip_bq_connection_check = True
-        self._objects: list[
-            weakref.ReferenceType[
-                typing.Union[
-                    bigframes.core.indexes.Index,
-                    bigframes.series.Series,
-                    bigframes.dataframe.DataFrame,
-                ]
-            ]
-        ] = []
-        self._strictly_ordered: bool = True
-        self._allow_ambiguity = False  # type: ignore
-        self._default_index_type = bigframes.enums.DefaultIndexKind.SEQUENTIAL_INT64
-        self._metrics = bigframes.session.metrics.ExecutionMetrics()
-        self._remote_function_session = None  # type: ignore
-        self._temp_storage_manager = None  # type: ignore
-        self._loader = None  # type: ignore
-
-        self._session_id: str = "sqlglot_unit_tests_session"
-        self._executor = SQLCompilerExecutor()
diff --git a/bigframes/testing/mocks.py b/bigframes/testing/mocks.py
@@ -64,7 +64,7 @@ def create_bigquery_session(
 
     if bqclient is None:
         bqclient = mock.create_autospec(google.cloud.bigquery.Client, instance=True)
-        bqclient.project = "test-project"
+        bqclient.project = anonymous_dataset.project
         bqclient.location = location
 
         # Mock the location.
@@ -74,9 +74,9 @@ def create_bigquery_session(
         type(table).created = mock.PropertyMock(return_value=table_time)
         type(table).location = mock.PropertyMock(return_value=location)
         type(table).schema = mock.PropertyMock(return_value=table_schema)
-        type(table).reference = mock.PropertyMock(
-            return_value=anonymous_dataset.table("test_table")
-        )
+        type(table).project = anonymous_dataset.project
+        type(table).dataset_id = anonymous_dataset.dataset_id
+        type(table).table_id = "test_table"
         type(table).num_rows = mock.PropertyMock(return_value=1000000000)
         bqclient.get_table.return_value = table
 
diff --git a/tests/unit/core/compile/sqlglot/conftest.py b/tests/unit/core/compile/sqlglot/conftest.py
@@ -13,23 +13,40 @@
 # limitations under the License.
 
 import pathlib
+import typing
 
+from google.cloud import bigquery
 import pandas as pd
 import pyarrow as pa
 import pytest
 
 from bigframes import dtypes
+import bigframes.testing.mocks as mocks
 import bigframes.testing.utils
 
 CURRENT_DIR = pathlib.Path(__file__).parent
 DATA_DIR = CURRENT_DIR.parent.parent.parent.parent / "data"
 
 
 @pytest.fixture(scope="session")
-def compiler_session():
+def compiler_session(basic_types_table_schema):
     from bigframes.testing import compiler_session
 
-    return compiler_session.SQLCompilerSession()
+    # TODO: Check if ordering mode is needed for the tests.
+    session = mocks.create_bigquery_session(table_schema=basic_types_table_schema)
+    session._executor = compiler_session.SQLCompilerExecutor()
+    return session
+
+
+@pytest.fixture(scope="session")
+def basic_types_table_schema() -> typing.Sequence[bigquery.SchemaField]:
+    return [
+        bigquery.SchemaField("rowindex", "INTEGER"),
+        bigquery.SchemaField("int64_col", "INTEGER"),
+        bigquery.SchemaField("string_col", "STRING"),
+        bigquery.SchemaField("float64_col", "FLOAT"),
+        bigquery.SchemaField("bool_col", "BOOLEAN"),
+    ]
 
 
 @pytest.fixture(scope="session")
diff --git a/tests/unit/core/compile/sqlglot/snapshots/test_compile_projection/test_compile_projection/out.sql b/tests/unit/core/compile/sqlglot/snapshots/test_compile_projection/test_compile_projection/out.sql
@@ -1,11 +1,24 @@
 WITH `bfcte_0` AS (
   SELECT
-    *,
-    `bfcol_0` AS `bfcol_3`,
-    `bfcol_1` + 1 AS `bfcol_4`
-  FROM UNNEST(ARRAY<STRUCT<`bfcol_0` INT64, `bfcol_1` INT64, `bfcol_2` INT64>>[STRUCT(0, 123456789, 0), STRUCT(1, -987654321, 1), STRUCT(2, 314159, 2), STRUCT(3, CAST(NULL AS INT64), 3), STRUCT(4, -234892, 4), STRUCT(5, 55555, 5), STRUCT(6, 101202303, 6), STRUCT(7, -214748367, 7), STRUCT(8, 2, 8)])
+    `rowindex` AS `bfcol_0`,
+    `int64_col` AS `bfcol_1`,
+    `string_col` AS `bfcol_2`,
+    `float64_col` AS `bfcol_3`,
+    `bool_col` AS `bfcol_4`
+  FROM `test-project`.`test_dataset`.`test_table`
+), `bfcte_1` AS (
+  SELECT
+    `bfcol_0` AS `bfcol_5`,
+    `bfcol_2` AS `bfcol_6`,
+    `bfcol_3` AS `bfcol_7`,
+    `bfcol_4` AS `bfcol_8`,
+    `bfcol_1` + 1 AS `bfcol_9`
+  FROM `bfcte_0`
 )
 SELECT
-  `bfcol_3` AS `rowindex`,
-  `bfcol_4` AS `int64_col`
-FROM `bfcte_0`
+  `bfcol_5` AS `rowindex`,
+  `bfcol_9` AS `int64_col`,
+  `bfcol_6` AS `string_col`,
+  `bfcol_7` AS `float64_col`,
+  `bfcol_8` AS `bool_col`
+FROM `bfcte_1`
diff --git a/tests/unit/core/compile/sqlglot/snapshots/test_compile_readtable/test_compile_readtable/out.sql b/tests/unit/core/compile/sqlglot/snapshots/test_compile_readtable/test_compile_readtable/out.sql
@@ -0,0 +1,16 @@
+WITH `bfcte_2` AS (
+  SELECT
+    `rowindex` AS `bfcol_0`,
+    `int64_col` AS `bfcol_1`,
+    `string_col` AS `bfcol_2`,
+    `float64_col` AS `bfcol_3`,
+    `bool_col` AS `bfcol_4`
+  FROM `test-project`.`test_dataset`.`test_table`
+)
+SELECT
+  `bfcol_0` AS `rowindex`,
+  `bfcol_1` AS `int64_col`,
+  `bfcol_2` AS `string_col`,
+  `bfcol_3` AS `float64_col`,
+  `bfcol_4` AS `bool_col`
+FROM `bfcte_2`
diff --git a/tests/unit/core/compile/sqlglot/test_compile_projection.py b/tests/unit/core/compile/sqlglot/test_compile_projection.py
@@ -12,20 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import pandas as pd
 import pytest
 
 import bigframes
-import bigframes.pandas as bpd
 
 pytest.importorskip("pytest_snapshot")
 
 
-def test_compile_projection(
-    scalars_types_pandas_df: pd.DataFrame, compiler_session: bigframes.Session, snapshot
-):
-    bf_df = bpd.DataFrame(
-        scalars_types_pandas_df[["int64_col"]], session=compiler_session
-    )
+def test_compile_projection(compiler_session: bigframes.Session, snapshot):
+    bf_df = compiler_session.read_gbq_table("test-project.test_dataset.test_table")
     bf_df["int64_col"] = bf_df["int64_col"] + 1
     snapshot.assert_match(bf_df.sql, "out.sql")
diff --git a/tests/unit/core/compile/sqlglot/test_compile_readtable.py b/tests/unit/core/compile/sqlglot/test_compile_readtable.py
@@ -0,0 +1,24 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pytest
+
+import bigframes
+
+pytest.importorskip("pytest_snapshot")
+
+
+def test_compile_readtable(compiler_session: bigframes.Session, snapshot):
+    bf_df = compiler_session.read_gbq_table("test-project.test_dataset.test_table")
+    snapshot.assert_match(bf_df.sql, "out.sql")