Skip to content

Commit 582bbaf

Browse files
authored
chore: implement compile_readtable (#1809)
* use mocks.create_bigquery_session * chore: implement compile_readtable
1 parent b3db519 commit 582bbaf

File tree

10 files changed

+125
-60
lines changed

10 files changed

+125
-60
lines changed

bigframes/core/compile/sqlglot/compiler.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,17 @@ def compile_readlocal(self, node: nodes.ReadLocalNode, *args) -> ir.SQLGlotIR:
158158

159159
return ir.SQLGlotIR.from_pyarrow(pa_table, node.schema, uid_gen=self.uid_gen)
160160

161+
@_compile_node.register
162+
def compile_readtable(self, node: nodes.ReadTableNode, *args):
163+
table = node.source.table
164+
return ir.SQLGlotIR.from_table(
165+
table.project_id,
166+
table.dataset_id,
167+
table.table_id,
168+
col_names=[col.source_id for col in node.scan_list.items],
169+
alias_names=[col.id.sql for col in node.scan_list.items],
170+
)
171+
161172
@_compile_node.register
162173
def compile_selection(
163174
self, node: nodes.SelectionNode, child: ir.SQLGlotIR

bigframes/core/compile/sqlglot/scalar_compiler.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,8 @@ def compile_op_expression(expr: expression.OpExpression):
7979

8080

8181
# TODO: add parenthesize for operators
82-
def compile_addop(op: ops.AddOp, left: sge.Expression, right: sge.Expression):
82+
def compile_addop(
83+
op: ops.AddOp, left: sge.Expression, right: sge.Expression
84+
) -> sge.Expression:
8385
# TODO: support addop for string dtype.
8486
return sge.Add(this=left, expression=right)

bigframes/core/compile/sqlglot/sqlglot_ir.py

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,30 @@ def from_pyarrow(
106106
)
107107
return cls(expr=sg.select(sge.Star()).from_(expr), uid_gen=uid_gen)
108108

109+
@classmethod
110+
def from_table(
111+
cls,
112+
project_id: str,
113+
dataset_id: str,
114+
table_id: str,
115+
col_names: typing.Sequence[str],
116+
alias_names: typing.Sequence[str],
117+
) -> SQLGlotIR:
118+
selections = [
119+
sge.Alias(
120+
this=sge.to_identifier(col_name, quoted=cls.quoted),
121+
alias=sge.to_identifier(alias_name, quoted=cls.quoted),
122+
)
123+
for col_name, alias_name in zip(col_names, alias_names)
124+
]
125+
table_expr = sge.Table(
126+
this=sg.to_identifier(table_id, quoted=cls.quoted),
127+
db=sg.to_identifier(dataset_id, quoted=cls.quoted),
128+
catalog=sg.to_identifier(project_id, quoted=cls.quoted),
129+
)
130+
select_expr = sge.Select().select(*selections).from_(table_expr)
131+
return cls(expr=select_expr)
132+
109133
@classmethod
110134
def from_query_string(
111135
cls,
@@ -156,9 +180,8 @@ def project(
156180
)
157181
for id, expr in projected_cols
158182
]
159-
# TODO: some columns are not able to be projected into the same select.
160-
select_expr = self.expr.select(*projected_cols_expr, append=True)
161-
return SQLGlotIR(expr=select_expr)
183+
new_expr = self._encapsulate_as_cte().select(*projected_cols_expr, append=False)
184+
return SQLGlotIR(expr=new_expr)
162185

163186
def insert(
164187
self,

bigframes/testing/compiler_session.py

Lines changed: 0 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,10 @@
1414

1515
import dataclasses
1616
import typing
17-
import weakref
1817

1918
import bigframes.core
2019
import bigframes.core.compile.sqlglot as sqlglot
21-
import bigframes.dataframe
2220
import bigframes.session.executor
23-
import bigframes.session.metrics
2421

2522

2623
@dataclasses.dataclass
@@ -44,35 +41,3 @@ def to_sql(
4441
return self.compiler.SQLGlotCompiler().compile(
4542
array_value.node, ordered=ordered
4643
)
47-
48-
49-
class SQLCompilerSession(bigframes.session.Session):
50-
"""Session for SQL compilation using sqlglot."""
51-
52-
def __init__(self):
53-
# TODO: remove unused attributes.
54-
self._location = None # type: ignore
55-
self._bq_kms_key_name = None # type: ignore
56-
self._clients_provider = None # type: ignore
57-
self.ibis_client = None # type: ignore
58-
self._bq_connection = None # type: ignore
59-
self._skip_bq_connection_check = True
60-
self._objects: list[
61-
weakref.ReferenceType[
62-
typing.Union[
63-
bigframes.core.indexes.Index,
64-
bigframes.series.Series,
65-
bigframes.dataframe.DataFrame,
66-
]
67-
]
68-
] = []
69-
self._strictly_ordered: bool = True
70-
self._allow_ambiguity = False # type: ignore
71-
self._default_index_type = bigframes.enums.DefaultIndexKind.SEQUENTIAL_INT64
72-
self._metrics = bigframes.session.metrics.ExecutionMetrics()
73-
self._remote_function_session = None # type: ignore
74-
self._temp_storage_manager = None # type: ignore
75-
self._loader = None # type: ignore
76-
77-
self._session_id: str = "sqlglot_unit_tests_session"
78-
self._executor = SQLCompilerExecutor()

bigframes/testing/mocks.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ def create_bigquery_session(
6464

6565
if bqclient is None:
6666
bqclient = mock.create_autospec(google.cloud.bigquery.Client, instance=True)
67-
bqclient.project = "test-project"
67+
bqclient.project = anonymous_dataset.project
6868
bqclient.location = location
6969

7070
# Mock the location.
@@ -74,9 +74,9 @@ def create_bigquery_session(
7474
type(table).created = mock.PropertyMock(return_value=table_time)
7575
type(table).location = mock.PropertyMock(return_value=location)
7676
type(table).schema = mock.PropertyMock(return_value=table_schema)
77-
type(table).reference = mock.PropertyMock(
78-
return_value=anonymous_dataset.table("test_table")
79-
)
77+
type(table).project = anonymous_dataset.project
78+
type(table).dataset_id = anonymous_dataset.dataset_id
79+
type(table).table_id = "test_table"
8080
type(table).num_rows = mock.PropertyMock(return_value=1000000000)
8181
bqclient.get_table.return_value = table
8282

tests/unit/core/compile/sqlglot/conftest.py

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,23 +13,40 @@
1313
# limitations under the License.
1414

1515
import pathlib
16+
import typing
1617

18+
from google.cloud import bigquery
1719
import pandas as pd
1820
import pyarrow as pa
1921
import pytest
2022

2123
from bigframes import dtypes
24+
import bigframes.testing.mocks as mocks
2225
import bigframes.testing.utils
2326

2427
CURRENT_DIR = pathlib.Path(__file__).parent
2528
DATA_DIR = CURRENT_DIR.parent.parent.parent.parent / "data"
2629

2730

2831
@pytest.fixture(scope="session")
29-
def compiler_session():
32+
def compiler_session(basic_types_table_schema):
3033
from bigframes.testing import compiler_session
3134

32-
return compiler_session.SQLCompilerSession()
35+
# TODO: Check if ordering mode is needed for the tests.
36+
session = mocks.create_bigquery_session(table_schema=basic_types_table_schema)
37+
session._executor = compiler_session.SQLCompilerExecutor()
38+
return session
39+
40+
41+
@pytest.fixture(scope="session")
42+
def basic_types_table_schema() -> typing.Sequence[bigquery.SchemaField]:
43+
return [
44+
bigquery.SchemaField("rowindex", "INTEGER"),
45+
bigquery.SchemaField("int64_col", "INTEGER"),
46+
bigquery.SchemaField("string_col", "STRING"),
47+
bigquery.SchemaField("float64_col", "FLOAT"),
48+
bigquery.SchemaField("bool_col", "BOOLEAN"),
49+
]
3350

3451

3552
@pytest.fixture(scope="session")
Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,24 @@
11
WITH `bfcte_0` AS (
22
SELECT
3-
*,
4-
`bfcol_0` AS `bfcol_3`,
5-
`bfcol_1` + 1 AS `bfcol_4`
6-
FROM UNNEST(ARRAY<STRUCT<`bfcol_0` INT64, `bfcol_1` INT64, `bfcol_2` INT64>>[STRUCT(0, 123456789, 0), STRUCT(1, -987654321, 1), STRUCT(2, 314159, 2), STRUCT(3, CAST(NULL AS INT64), 3), STRUCT(4, -234892, 4), STRUCT(5, 55555, 5), STRUCT(6, 101202303, 6), STRUCT(7, -214748367, 7), STRUCT(8, 2, 8)])
3+
`rowindex` AS `bfcol_0`,
4+
`int64_col` AS `bfcol_1`,
5+
`string_col` AS `bfcol_2`,
6+
`float64_col` AS `bfcol_3`,
7+
`bool_col` AS `bfcol_4`
8+
FROM `test-project`.`test_dataset`.`test_table`
9+
), `bfcte_1` AS (
10+
SELECT
11+
`bfcol_0` AS `bfcol_5`,
12+
`bfcol_2` AS `bfcol_6`,
13+
`bfcol_3` AS `bfcol_7`,
14+
`bfcol_4` AS `bfcol_8`,
15+
`bfcol_1` + 1 AS `bfcol_9`
16+
FROM `bfcte_0`
717
)
818
SELECT
9-
`bfcol_3` AS `rowindex`,
10-
`bfcol_4` AS `int64_col`
11-
FROM `bfcte_0`
19+
`bfcol_5` AS `rowindex`,
20+
`bfcol_9` AS `int64_col`,
21+
`bfcol_6` AS `string_col`,
22+
`bfcol_7` AS `float64_col`,
23+
`bfcol_8` AS `bool_col`
24+
FROM `bfcte_1`
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
WITH `bfcte_2` AS (
2+
SELECT
3+
`rowindex` AS `bfcol_0`,
4+
`int64_col` AS `bfcol_1`,
5+
`string_col` AS `bfcol_2`,
6+
`float64_col` AS `bfcol_3`,
7+
`bool_col` AS `bfcol_4`
8+
FROM `test-project`.`test_dataset`.`test_table`
9+
)
10+
SELECT
11+
`bfcol_0` AS `rowindex`,
12+
`bfcol_1` AS `int64_col`,
13+
`bfcol_2` AS `string_col`,
14+
`bfcol_3` AS `float64_col`,
15+
`bfcol_4` AS `bool_col`
16+
FROM `bfcte_2`

tests/unit/core/compile/sqlglot/test_compile_projection.py

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -12,20 +12,14 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
import pandas as pd
1615
import pytest
1716

1817
import bigframes
19-
import bigframes.pandas as bpd
2018

2119
pytest.importorskip("pytest_snapshot")
2220

2321

24-
def test_compile_projection(
25-
scalars_types_pandas_df: pd.DataFrame, compiler_session: bigframes.Session, snapshot
26-
):
27-
bf_df = bpd.DataFrame(
28-
scalars_types_pandas_df[["int64_col"]], session=compiler_session
29-
)
22+
def test_compile_projection(compiler_session: bigframes.Session, snapshot):
23+
bf_df = compiler_session.read_gbq_table("test-project.test_dataset.test_table")
3024
bf_df["int64_col"] = bf_df["int64_col"] + 1
3125
snapshot.assert_match(bf_df.sql, "out.sql")
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import pytest
16+
17+
import bigframes
18+
19+
pytest.importorskip("pytest_snapshot")
20+
21+
22+
def test_compile_readtable(compiler_session: bigframes.Session, snapshot):
23+
bf_df = compiler_session.read_gbq_table("test-project.test_dataset.test_table")
24+
snapshot.assert_match(bf_df.sql, "out.sql")

0 commit comments

Comments
 (0)