Skip to content

Commit 91efe8e

Browse files
authored
refactor: create a basic set of SQL AST nodes (#757)
* feat: define googlesql AST nodes * updates docstring * address comments: quoting, adding as_alias, select_all to respect googlesql restrictly
1 parent e7c6fc1 commit 91efe8e

File tree

6 files changed

+505
-0
lines changed

6 files changed

+505
-0
lines changed
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
# Copyright 2024 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
"""Python classes representing GoogleSQL syntax nodes, adhering to the official syntax:
16+
https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax"""
17+
18+
from __future__ import annotations
19+
20+
from bigframes.core.compile.googlesql.expression import (
21+
AliasExpression,
22+
ColumnExpression,
23+
CTEExpression,
24+
StarExpression,
25+
TableExpression,
26+
)
27+
from bigframes.core.compile.googlesql.query import (
28+
AsAlias,
29+
FromClause,
30+
FromItem,
31+
NonRecursiveCTE,
32+
QueryExpr,
33+
Select,
34+
SelectAll,
35+
SelectExpression,
36+
)
37+
38+
__all__ = [
39+
"AliasExpression",
40+
"AsAlias",
41+
"ColumnExpression",
42+
"CTEExpression",
43+
"FromClause",
44+
"FromItem",
45+
"NonRecursiveCTE",
46+
"QueryExpr",
47+
"Select",
48+
"SelectAll",
49+
"SelectExpression",
50+
"StarExpression",
51+
"TableExpression",
52+
]
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
# Copyright 2024 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from __future__ import annotations
16+
17+
import abc
18+
19+
20+
class SQLSyntax(abc.ABC):
21+
"""Abstract base class provides GoogleSQL syntax."""
22+
23+
@abc.abstractmethod
24+
def sql(self):
25+
...
Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
# Copyright 2024 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from __future__ import annotations
16+
17+
import dataclasses
18+
import typing
19+
20+
import bigframes.core.compile.googlesql.abc as abc
21+
22+
"""This module represents GoogleSQL `expression` and its extensions.
23+
Core class:
24+
25+
* `expression`: Models basic SQL expressions.
26+
27+
Extended classes (not part of standard GoogleSQL syntax, but added for convenience):
28+
29+
* `ColumnExpression`: Represents column references.
30+
* `TableExpression`: Represents table references.
31+
* `AliasExpression`: Represents aliased expressions.
32+
* ...
33+
"""
34+
35+
36+
@dataclasses.dataclass
37+
class Expression(abc.SQLSyntax):
38+
pass
39+
40+
41+
@dataclasses.dataclass
42+
class ColumnExpression(Expression):
43+
name: str
44+
parent: typing.Optional[TableExpression | AliasExpression | CTEExpression] = None
45+
46+
def sql(self) -> str:
47+
if self.parent is not None:
48+
return f"{self.parent.sql()}.`{self.name}`"
49+
return f"`{self.name}`"
50+
51+
52+
@dataclasses.dataclass
53+
class StarExpression(Expression):
54+
parent: typing.Optional[TableExpression | AliasExpression | CTEExpression] = None
55+
56+
def sql(self) -> str:
57+
if self.parent is not None:
58+
return f"{self.parent.sql()}.*"
59+
return "*"
60+
61+
62+
@dataclasses.dataclass
63+
class TableExpression(Expression):
64+
table_id: str
65+
dataset_id: typing.Optional[str] = None
66+
project_id: typing.Optional[str] = None
67+
68+
def __post_init__(self):
69+
if self.project_id is not None and self.dataset_id is None:
70+
raise ValueError("The `dataset_id` is missing.")
71+
72+
def sql(self) -> str:
73+
text = []
74+
if self.project_id is not None:
75+
text.append(f"`{self.project_id}`")
76+
if self.dataset_id is not None:
77+
text.append(f"`{self.dataset_id}`")
78+
text.append(f"`{self.table_id}`")
79+
return ".".join(text)
80+
81+
82+
@dataclasses.dataclass
83+
class AliasExpression(Expression):
84+
alias: str
85+
86+
def sql(self) -> str:
87+
return f"`{self.alias}`"
88+
89+
90+
@dataclasses.dataclass
91+
class CTEExpression(Expression):
92+
name: str
93+
94+
def sql(self) -> str:
95+
return f"`{self.name}`"
Lines changed: 165 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,165 @@
1+
# Copyright 2024 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from __future__ import annotations
16+
17+
import dataclasses
18+
import typing
19+
20+
import bigframes.core.compile.googlesql.abc as abc
21+
import bigframes.core.compile.googlesql.expression as expr
22+
23+
"""This module provides a structured representation of GoogleSQL syntax using nodes.
24+
Each node's name and child nodes are designed to strictly follow the official GoogleSQL
25+
syntax rules outlined in the documentation:
26+
https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax"""
27+
28+
29+
@dataclasses.dataclass
30+
class QueryExpr(abc.SQLSyntax):
31+
"""This class represents GoogleSQL `query_expr` syntax."""
32+
33+
select: Select
34+
with_cte_list: typing.Sequence[NonRecursiveCTE] = ()
35+
36+
def sql(self) -> str:
37+
text = []
38+
if len(self.with_cte_list) > 0:
39+
with_cte_text = ",\n".join(
40+
[with_cte.sql() for with_cte in self.with_cte_list]
41+
)
42+
text.append(f"WITH {with_cte_text}")
43+
44+
text.append(self.select.sql())
45+
return "\n".join(text)
46+
47+
48+
@dataclasses.dataclass
49+
class Select(abc.SQLSyntax):
50+
"""This class represents GoogleSQL `select` syntax."""
51+
52+
select_list: typing.Sequence[typing.Union[SelectExpression, SelectAll]]
53+
from_clause_list: typing.Sequence[FromClause] = ()
54+
55+
def sql(self) -> str:
56+
text = ["SELECT"]
57+
58+
select_list_sql = ",\n".join([select.sql() for select in self.select_list])
59+
text.append(select_list_sql)
60+
61+
if self.from_clause_list is not None:
62+
from_clauses_sql = ",\n".join(
63+
[clause.sql() for clause in self.from_clause_list]
64+
)
65+
text.append(f"FROM\n{from_clauses_sql}")
66+
return "\n".join(text)
67+
68+
69+
@dataclasses.dataclass
70+
class SelectExpression(abc.SQLSyntax):
71+
"""This class represents `select_expression`."""
72+
73+
expression: expr.ColumnExpression
74+
alias: typing.Optional[expr.AliasExpression] = None
75+
76+
def sql(self) -> str:
77+
if self.alias is None:
78+
return self.expression.sql()
79+
else:
80+
return f"{self.expression.sql()} AS {self.alias.sql()}"
81+
82+
83+
@dataclasses.dataclass
84+
class SelectAll(abc.SQLSyntax):
85+
"""This class represents `select_all` (aka. `SELECT *`)."""
86+
87+
expression: expr.StarExpression
88+
89+
def sql(self) -> str:
90+
return self.expression.sql()
91+
92+
93+
@dataclasses.dataclass
94+
class FromClause(abc.SQLSyntax):
95+
"""This class represents GoogleSQL `from_clause` syntax."""
96+
97+
from_item: FromItem
98+
99+
def sql(self) -> str:
100+
return self.from_item.sql()
101+
102+
103+
@dataclasses.dataclass
104+
class FromItem(abc.SQLSyntax):
105+
"""This class represents GoogleSQL `from_item` syntax."""
106+
107+
table_name: typing.Optional[expr.TableExpression] = None
108+
# Note: Temporarily introduces the `str` type to interact with pre-existing,
109+
# compiled SQL strings.
110+
query_expr: typing.Optional[QueryExpr | str] = None
111+
cte_name: typing.Optional[expr.CTEExpression] = None
112+
as_alias: typing.Optional[AsAlias] = None
113+
114+
def __post_init__(self):
115+
non_none = sum(
116+
expr is not None
117+
for expr in [
118+
self.table_name,
119+
self.query_expr,
120+
self.cte_name,
121+
]
122+
)
123+
if non_none != 1:
124+
raise ValueError("Exactly one of expressions must be provided.")
125+
126+
def sql(self) -> str:
127+
if self.table_name is not None:
128+
text = self.table_name.sql()
129+
elif self.query_expr is not None:
130+
text = (
131+
self.query_expr
132+
if isinstance(self.query_expr, str)
133+
else self.query_expr.sql()
134+
)
135+
text = f"({text})"
136+
elif self.cte_name is not None:
137+
text = self.cte_name.sql()
138+
else:
139+
raise ValueError("One of from items must be provided.")
140+
141+
if self.as_alias is None:
142+
return text
143+
else:
144+
return f"{text} {self.as_alias.sql()}"
145+
146+
147+
@dataclasses.dataclass
148+
class NonRecursiveCTE(abc.SQLSyntax):
149+
"""This class represents GoogleSQL `non_recursive_cte` syntax."""
150+
151+
cte_name: expr.CTEExpression
152+
query_expr: QueryExpr
153+
154+
def sql(self) -> str:
155+
return f"{self.cte_name.sql()} AS (\n{self.query_expr.sql()}\n)"
156+
157+
158+
@dataclasses.dataclass
159+
class AsAlias(abc.SQLSyntax):
160+
"""This class represents GoogleSQL `as_alias` syntax."""
161+
162+
alias: expr.AliasExpression
163+
164+
def sql(self) -> str:
165+
return f"AS {self.alias.sql()}"

tests/unit/core/compiler/__init__.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# Copyright 2024 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.

0 commit comments

Comments
 (0)