Skip to content

Commit 0895ef8

Browse files
authored
refactor: add empty SQLGlotCompiler classes (#1652)
* define configs module for CompileRequest and CompileResult * workaround AttributeError debugging issue * refactor: add empty SQLGlotCompiler
1 parent 53caa8d commit 0895ef8

File tree

10 files changed

+373
-26
lines changed

10 files changed

+373
-26
lines changed

bigframes/core/compile/api.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
import google.cloud.bigquery as bigquery
1919

2020
from bigframes.core import rewrite
21-
from bigframes.core.compile import compiler
21+
from bigframes.core.compile import compiler, configs
2222

2323
if TYPE_CHECKING:
2424
import bigframes.core.nodes
@@ -34,7 +34,7 @@ def compile(
3434
limit: Optional[int] = None,
3535
) -> str:
3636
"""Compile node into sql where rows are sorted with ORDER BY."""
37-
request = compiler.CompileRequest(node, sort_rows=ordered, peek_count=limit)
37+
request = configs.CompileRequest(node, sort_rows=ordered, peek_count=limit)
3838
return compiler.compile_sql(request).sql
3939

4040
def compile_raw(
@@ -44,7 +44,7 @@ def compile_raw(
4444
str, Sequence[bigquery.SchemaField], bigframes.core.ordering.RowOrdering
4545
]:
4646
"""Compile node into sql that exposes all columns, including hidden ordering-only columns."""
47-
request = compiler.CompileRequest(
47+
request = configs.CompileRequest(
4848
node, sort_rows=False, materialize_all_order_keys=True
4949
)
5050
result = compiler.compile_sql(request)

bigframes/core/compile/compiler.py

Lines changed: 4 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
from bigframes.core import expression
3030
import bigframes.core.compile.compiled as compiled
3131
import bigframes.core.compile.concat as concat_impl
32+
import bigframes.core.compile.configs as configs
3233
import bigframes.core.compile.explode
3334
import bigframes.core.compile.scalar_op_compiler as compile_scalar
3435
import bigframes.core.nodes as nodes
@@ -39,22 +40,7 @@
3940
import bigframes.core
4041

4142

42-
@dataclasses.dataclass(frozen=True)
43-
class CompileRequest:
44-
node: nodes.BigFrameNode
45-
sort_rows: bool
46-
materialize_all_order_keys: bool = False
47-
peek_count: typing.Optional[int] = None
48-
49-
50-
@dataclasses.dataclass(frozen=True)
51-
class CompileResult:
52-
sql: str
53-
sql_schema: typing.Sequence[google.cloud.bigquery.SchemaField]
54-
row_order: Optional[bf_ordering.RowOrdering]
55-
56-
57-
def compile_sql(request: CompileRequest) -> CompileResult:
43+
def compile_sql(request: configs.CompileRequest) -> configs.CompileResult:
5844
output_names = tuple((expression.DerefOp(id), id.sql) for id in request.node.ids)
5945
result_node = nodes.ResultNode(
6046
request.node,
@@ -74,7 +60,7 @@ def compile_sql(request: CompileRequest) -> CompileResult:
7460
if request.sort_rows:
7561
result_node = cast(nodes.ResultNode, rewrites.column_pruning(result_node))
7662
sql = compile_result_node(result_node)
77-
return CompileResult(
63+
return configs.CompileResult(
7864
sql, result_node.schema.to_bigquery(), result_node.order_by
7965
)
8066

@@ -88,7 +74,7 @@ def compile_sql(request: CompileRequest) -> CompileResult:
8874
ordering if ordering.referenced_columns.issubset(result_node.ids) else None
8975
)
9076
assert (not request.materialize_all_order_keys) or (output_order is not None)
91-
return CompileResult(sql, result_node.schema.to_bigquery(), output_order)
77+
return configs.CompileResult(sql, result_node.schema.to_bigquery(), output_order)
9278

9379

9480
def _replace_unsupported_ops(node: nodes.BigFrameNode):

bigframes/core/compile/configs.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
from __future__ import annotations
15+
16+
import dataclasses
17+
import typing
18+
19+
import google.cloud.bigquery
20+
21+
from bigframes.core import nodes, ordering
22+
23+
24+
@dataclasses.dataclass(frozen=True)
25+
class CompileRequest:
26+
node: nodes.BigFrameNode
27+
sort_rows: bool
28+
materialize_all_order_keys: bool = False
29+
peek_count: typing.Optional[int] = None
30+
31+
32+
@dataclasses.dataclass(frozen=True)
33+
class CompileResult:
34+
sql: str
35+
sql_schema: typing.Sequence[google.cloud.bigquery.SchemaField]
36+
row_order: typing.Optional[ordering.RowOrdering]

bigframes/core/compile/sqlglot/__init__.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,3 +11,8 @@
1111
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
14+
from __future__ import annotations
15+
16+
from bigframes.core.compile.sqlglot.compiler import SQLGlotCompiler
17+
18+
__all__ = ["SQLGlotCompiler"]
Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,141 @@
1+
# Copyright 2023 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
from __future__ import annotations
15+
16+
import dataclasses
17+
import functools
18+
import typing
19+
20+
import google.cloud.bigquery as bigquery
21+
import sqlglot.expressions as sge
22+
23+
from bigframes.core import expression, nodes, rewrite
24+
from bigframes.core.compile import configs
25+
from bigframes.core.compile.sqlglot import sql_gen
26+
import bigframes.core.ordering as bf_ordering
27+
28+
29+
@dataclasses.dataclass(frozen=True)
30+
class SQLGlotCompiler:
31+
"""Compiles BigFrame nodes into SQL using SQLGlot."""
32+
33+
sql_gen = sql_gen.SQLGen()
34+
35+
def compile(
36+
self,
37+
node: nodes.BigFrameNode,
38+
*,
39+
ordered: bool = True,
40+
limit: typing.Optional[int] = None,
41+
) -> str:
42+
"""Compile node into sql where rows are sorted with ORDER BY."""
43+
request = configs.CompileRequest(node, sort_rows=ordered, peek_count=limit)
44+
return self._compile_sql(request).sql
45+
46+
def compile_raw(
47+
self,
48+
node: nodes.BigFrameNode,
49+
) -> typing.Tuple[
50+
str, typing.Sequence[bigquery.SchemaField], bf_ordering.RowOrdering
51+
]:
52+
"""Compile node into sql that exposes all columns, including hidden
53+
ordering-only columns."""
54+
request = configs.CompileRequest(
55+
node, sort_rows=False, materialize_all_order_keys=True
56+
)
57+
result = self._compile_sql(request)
58+
assert result.row_order is not None
59+
return result.sql, result.sql_schema, result.row_order
60+
61+
def _compile_sql(self, request: configs.CompileRequest) -> configs.CompileResult:
62+
output_names = tuple(
63+
(expression.DerefOp(id), id.sql) for id in request.node.ids
64+
)
65+
result_node = nodes.ResultNode(
66+
request.node,
67+
output_cols=output_names,
68+
limit=request.peek_count,
69+
)
70+
if request.sort_rows:
71+
# Can only pullup slice if we are doing ORDER BY in outermost SELECT
72+
# Need to do this before replacing unsupported ops, as that will rewrite slice ops
73+
result_node = rewrite.pull_up_limits(result_node)
74+
result_node = _replace_unsupported_ops(result_node)
75+
# prune before pulling up order to avoid unnnecessary row_number() ops
76+
result_node = typing.cast(nodes.ResultNode, rewrite.column_pruning(result_node))
77+
result_node = rewrite.defer_order(
78+
result_node, output_hidden_row_keys=request.materialize_all_order_keys
79+
)
80+
if request.sort_rows:
81+
result_node = typing.cast(
82+
nodes.ResultNode, rewrite.column_pruning(result_node)
83+
)
84+
sql = self._compile_result_node(result_node)
85+
return configs.CompileResult(
86+
sql, result_node.schema.to_bigquery(), result_node.order_by
87+
)
88+
89+
ordering: typing.Optional[bf_ordering.RowOrdering] = result_node.order_by
90+
result_node = dataclasses.replace(result_node, order_by=None)
91+
result_node = typing.cast(nodes.ResultNode, rewrite.column_pruning(result_node))
92+
sql = self._compile_result_node(result_node)
93+
# Return the ordering iff no extra columns are needed to define the row order
94+
if ordering is not None:
95+
output_order = (
96+
ordering
97+
if ordering.referenced_columns.issubset(result_node.ids)
98+
else None
99+
)
100+
assert (not request.materialize_all_order_keys) or (output_order is not None)
101+
return configs.CompileResult(
102+
sql, result_node.schema.to_bigquery(), output_order
103+
)
104+
105+
def _compile_result_node(self, root: nodes.ResultNode) -> str:
106+
sqlglot_expr = compile_node(root.child)
107+
# TODO: add order_by, limit, and selections to sqlglot_expr
108+
return self.sql_gen.sql(sqlglot_expr)
109+
110+
111+
def _replace_unsupported_ops(node: nodes.BigFrameNode):
112+
node = nodes.bottom_up(node, rewrite.rewrite_slice)
113+
node = nodes.bottom_up(node, rewrite.rewrite_timedelta_expressions)
114+
node = nodes.bottom_up(node, rewrite.rewrite_range_rolling)
115+
return node
116+
117+
118+
@functools.lru_cache(maxsize=5000)
119+
def compile_node(node: nodes.BigFrameNode) -> sge.Expression:
120+
"""Compile node into CompileArrayValue. Caches result."""
121+
return node.reduce_up(lambda node, children: _compile_node(node, *children))
122+
123+
124+
@functools.singledispatch
125+
def _compile_node(
126+
node: nodes.BigFrameNode, *compiled_children: sge.Expression
127+
) -> sge.Expression:
128+
"""Defines transformation but isn't cached, always use compile_node instead"""
129+
raise ValueError(f"Can't compile unrecognized node: {node}")
130+
131+
132+
@_compile_node.register
133+
def compile_readlocal(node: nodes.ReadLocalNode, *args) -> sge.Expression:
134+
# TODO: add support for reading from local files
135+
return sge.select()
136+
137+
138+
@_compile_node.register
139+
def compile_selection(node: nodes.SelectionNode, child: sge.Expression):
140+
# TODO: add support for selection
141+
return child
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from __future__ import annotations
16+
17+
import dataclasses
18+
19+
import sqlglot.dialects.bigquery
20+
import sqlglot.expressions as sge
21+
22+
23+
@dataclasses.dataclass(frozen=True)
24+
class SQLGen:
25+
"""Helper class to build SQLGlot Query and generate SQL string."""
26+
27+
dialect = sqlglot.dialects.bigquery.BigQuery
28+
"""The SQL dialect used for generation."""
29+
30+
quoted: bool = True
31+
"""Whether to quote identifiers in the generated SQL."""
32+
33+
pretty: bool = True
34+
"""Whether to pretty-print the generated SQL."""
35+
36+
def sql(self, expr: sge.Expression) -> str:
37+
"""Generate SQL string from the given expression."""
38+
return expr.sql(dialect=self.dialect, pretty=self.pretty)

bigframes/dataframe.py

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -419,11 +419,23 @@ def sql(self) -> str:
419419
str:
420420
string representing the compiled SQL.
421421
"""
422-
include_index = self._has_index and (
423-
self.index.name is not None or len(self.index.names) > 1
424-
)
425-
sql, _, _ = self._to_sql_query(include_index=include_index)
426-
return sql
422+
try:
423+
include_index = self._has_index and (
424+
self.index.name is not None or len(self.index.names) > 1
425+
)
426+
sql, _, _ = self._to_sql_query(include_index=include_index)
427+
return sql
428+
except AttributeError as e:
429+
# Workaround for a development-mode debugging issue:
430+
# An `AttributeError` originating *inside* this @property getter (e.g., due to
431+
# a typo or referencing a non-existent attribute) can be mistakenly intercepted
432+
# by the class's __getattr__ method if one is defined.
433+
# We catch the AttributeError and raise SyntaxError instead to make it clear
434+
# the error originates *here* in the property implementation.
435+
# See: https://stackoverflow.com/questions/50542177/correct-handling-of-attributeerror-in-getattr-when-using-property
436+
raise SyntaxError(
437+
"AttributeError encountered. Please check the implementation for incorrect attribute access."
438+
) from e
427439

428440
@property
429441
def query_job(self) -> Optional[bigquery.QueryJob]:

0 commit comments

Comments
 (0)