Skip to content

Commit 8e8adf0

Browse files
refactor: move operator definitions from __init__.py to separate files (#1290)
* refactor: move operator definitions from __init__.py to separate files * import geo operators * use Union instead of | * update more Union types * expose CaseWhenOp * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * fix lint error --------- Co-authored-by: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com>
1 parent cf852a2 commit 8e8adf0

19 files changed

+1848
-1080
lines changed

bigframes/operations/__init__.py

Lines changed: 297 additions & 1080 deletions
Large diffs are not rendered by default.

bigframes/operations/array_ops.py

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import dataclasses
16+
import typing
17+
18+
from bigframes import dtypes
19+
from bigframes.operations import base_ops
20+
21+
22+
@dataclasses.dataclass(frozen=True)
23+
class ArrayToStringOp(base_ops.UnaryOp):
24+
name: typing.ClassVar[str] = "array_to_string"
25+
delimiter: str
26+
27+
def output_type(self, *input_types):
28+
input_type = input_types[0]
29+
if not dtypes.is_array_string_like(input_type):
30+
raise TypeError("Input type must be an array of string type.")
31+
return dtypes.STRING_DTYPE
32+
33+
34+
@dataclasses.dataclass(frozen=True)
35+
class ArrayIndexOp(base_ops.UnaryOp):
36+
name: typing.ClassVar[str] = "array_index"
37+
index: int
38+
39+
def output_type(self, *input_types):
40+
input_type = input_types[0]
41+
if dtypes.is_string_like(input_type):
42+
return dtypes.STRING_DTYPE
43+
elif dtypes.is_array_like(input_type):
44+
return dtypes.arrow_dtype_to_bigframes_dtype(
45+
input_type.pyarrow_dtype.value_type
46+
)
47+
else:
48+
raise TypeError("Input type must be an array or string-like type.")
49+
50+
51+
@dataclasses.dataclass(frozen=True)
52+
class ArraySliceOp(base_ops.UnaryOp):
53+
name: typing.ClassVar[str] = "array_slice"
54+
start: int
55+
stop: typing.Optional[int] = None
56+
step: typing.Optional[int] = None
57+
58+
def output_type(self, *input_types):
59+
input_type = input_types[0]
60+
if dtypes.is_string_like(input_type):
61+
return dtypes.STRING_DTYPE
62+
elif dtypes.is_array_like(input_type):
63+
return input_type
64+
else:
65+
raise TypeError("Input type must be an array or string-like type.")

bigframes/operations/base_ops.py

Lines changed: 197 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,197 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from __future__ import annotations
16+
17+
import dataclasses
18+
import typing
19+
20+
from bigframes import dtypes
21+
import bigframes.operations.type as op_typing
22+
23+
if typing.TYPE_CHECKING:
24+
# Avoids circular dependency
25+
import bigframes.core.expression
26+
27+
28+
class RowOp(typing.Protocol):
29+
@property
30+
def name(self) -> str:
31+
...
32+
33+
def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionType:
34+
...
35+
36+
@property
37+
def is_monotonic(self) -> bool:
38+
"""Whether the row operation preserves total ordering. Can be pruned from ordering expressions."""
39+
...
40+
41+
@property
42+
def is_bijective(self) -> bool:
43+
"""Whether the operation has a 1:1 mapping between inputs and outputs"""
44+
...
45+
46+
@property
47+
def deterministic(self) -> bool:
48+
"""Whether the operation is deterministic" (given deterministic inputs)"""
49+
...
50+
51+
52+
@dataclasses.dataclass(frozen=True)
53+
class ScalarOp:
54+
@property
55+
def name(self) -> str:
56+
raise NotImplementedError("RowOp abstract base class has no implementation")
57+
58+
def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionType:
59+
raise NotImplementedError("Abstract operation has no output type")
60+
61+
@property
62+
def is_monotonic(self) -> bool:
63+
"""Whether the row operation preserves total ordering. Can be pruned from ordering expressions."""
64+
return False
65+
66+
@property
67+
def is_bijective(self) -> bool:
68+
"""Whether the operation has a 1:1 mapping between inputs and outputs"""
69+
return False
70+
71+
@property
72+
def deterministic(self) -> bool:
73+
"""Whether the operation is deterministic" (given deterministic inputs)"""
74+
return True
75+
76+
77+
@dataclasses.dataclass(frozen=True)
78+
class NaryOp(ScalarOp):
79+
def as_expr(
80+
self,
81+
*exprs: typing.Union[str, bigframes.core.expression.Expression],
82+
) -> bigframes.core.expression.Expression:
83+
import bigframes.core.expression
84+
85+
# Keep this in sync with output_type and compilers
86+
inputs: list[bigframes.core.expression.Expression] = []
87+
88+
for expr in exprs:
89+
inputs.append(_convert_expr_input(expr))
90+
91+
return bigframes.core.expression.OpExpression(
92+
self,
93+
tuple(inputs),
94+
)
95+
96+
97+
# These classes can be used to create simple ops that don't take local parameters
98+
# All is needed is a unique name, and to register an implementation in ibis_mappings.py
99+
@dataclasses.dataclass(frozen=True)
100+
class UnaryOp(ScalarOp):
101+
@property
102+
def arguments(self) -> int:
103+
return 1
104+
105+
def as_expr(
106+
self, input_id: typing.Union[str, bigframes.core.expression.Expression] = "arg"
107+
) -> bigframes.core.expression.Expression:
108+
import bigframes.core.expression
109+
110+
return bigframes.core.expression.OpExpression(
111+
self, (_convert_expr_input(input_id),)
112+
)
113+
114+
115+
@dataclasses.dataclass(frozen=True)
116+
class BinaryOp(ScalarOp):
117+
@property
118+
def arguments(self) -> int:
119+
return 2
120+
121+
def as_expr(
122+
self,
123+
left_input: typing.Union[str, bigframes.core.expression.Expression] = "arg1",
124+
right_input: typing.Union[str, bigframes.core.expression.Expression] = "arg2",
125+
) -> bigframes.core.expression.Expression:
126+
import bigframes.core.expression
127+
128+
return bigframes.core.expression.OpExpression(
129+
self,
130+
(
131+
_convert_expr_input(left_input),
132+
_convert_expr_input(right_input),
133+
),
134+
)
135+
136+
137+
@dataclasses.dataclass(frozen=True)
138+
class TernaryOp(ScalarOp):
139+
@property
140+
def arguments(self) -> int:
141+
return 3
142+
143+
def as_expr(
144+
self,
145+
input1: typing.Union[str, bigframes.core.expression.Expression] = "arg1",
146+
input2: typing.Union[str, bigframes.core.expression.Expression] = "arg2",
147+
input3: typing.Union[str, bigframes.core.expression.Expression] = "arg3",
148+
) -> bigframes.core.expression.Expression:
149+
import bigframes.core.expression
150+
151+
return bigframes.core.expression.OpExpression(
152+
self,
153+
(
154+
_convert_expr_input(input1),
155+
_convert_expr_input(input2),
156+
_convert_expr_input(input3),
157+
),
158+
)
159+
160+
161+
def _convert_expr_input(
162+
input: typing.Union[str, bigframes.core.expression.Expression]
163+
) -> bigframes.core.expression.Expression:
164+
"""Allows creating column references with just a string"""
165+
import bigframes.core.expression
166+
167+
if isinstance(input, str):
168+
return bigframes.core.expression.deref(input)
169+
else:
170+
return input
171+
172+
173+
# Operation Factories
174+
def create_unary_op(name: str, type_signature: op_typing.UnaryTypeSignature) -> UnaryOp:
175+
return dataclasses.make_dataclass(
176+
name,
177+
[
178+
("name", typing.ClassVar[str], name),
179+
("output_type", typing.ClassVar[typing.Callable], type_signature.as_method),
180+
],
181+
bases=(UnaryOp,),
182+
frozen=True,
183+
)()
184+
185+
186+
def create_binary_op(
187+
name: str, type_signature: op_typing.BinaryTypeSignature
188+
) -> BinaryOp:
189+
return dataclasses.make_dataclass(
190+
name,
191+
[
192+
("name", typing.ClassVar[str], name),
193+
("output_type", typing.ClassVar[typing.Callable], type_signature.as_method),
194+
],
195+
bases=(BinaryOp,),
196+
frozen=True,
197+
)()

bigframes/operations/blob_ops.py

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import dataclasses
16+
import typing
17+
18+
from bigframes import dtypes
19+
from bigframes.operations import base_ops
20+
import bigframes.operations.type as op_typing
21+
22+
obj_fetch_metadata_op = base_ops.create_unary_op(
23+
name="obj_fetch_metadata", type_signature=op_typing.BLOB_TRANSFORM
24+
)
25+
26+
27+
@dataclasses.dataclass(frozen=True)
28+
class ObjGetAccessUrl(base_ops.UnaryOp):
29+
name: typing.ClassVar[str] = "obj_get_access_url"
30+
mode: str # access mode, e.g. R read, W write, RW read & write
31+
32+
def output_type(self, *input_types):
33+
return dtypes.JSON_DTYPE
34+
35+
36+
@dataclasses.dataclass(frozen=True)
37+
class ObjMakeRef(base_ops.BinaryOp):
38+
name: typing.ClassVar[str] = "obj.make_ref"
39+
40+
def output_type(self, *input_types):
41+
if not all(map(dtypes.is_string_like, input_types)):
42+
raise TypeError("obj.make_ref requires string-like arguments")
43+
44+
return dtypes.OBJ_REF_DTYPE
45+
46+
47+
obj_make_ref_op = ObjMakeRef()

bigframes/operations/bool_ops.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
16+
from bigframes.operations import base_ops
17+
import bigframes.operations.type as op_typing
18+
19+
and_op = base_ops.create_binary_op(name="and", type_signature=op_typing.LOGICAL)
20+
21+
or_op = base_ops.create_binary_op(name="or", type_signature=op_typing.LOGICAL)
22+
23+
xor_op = base_ops.create_binary_op(name="xor", type_signature=op_typing.LOGICAL)
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
16+
from bigframes.operations import base_ops
17+
import bigframes.operations.type as op_typing
18+
19+
eq_op = base_ops.create_binary_op(name="eq", type_signature=op_typing.COMPARISON)
20+
21+
eq_null_match_op = base_ops.create_binary_op(
22+
name="eq_nulls_match", type_signature=op_typing.COMPARISON
23+
)
24+
25+
ne_op = base_ops.create_binary_op(name="ne", type_signature=op_typing.COMPARISON)
26+
27+
lt_op = base_ops.create_binary_op(name="lt", type_signature=op_typing.COMPARISON)
28+
29+
gt_op = base_ops.create_binary_op(name="gt", type_signature=op_typing.COMPARISON)
30+
31+
le_op = base_ops.create_binary_op(name="le", type_signature=op_typing.COMPARISON)
32+
33+
ge_op = base_ops.create_binary_op(name="ge", type_signature=op_typing.COMPARISON)

0 commit comments

Comments
 (0)