Skip to content

Commit 4ea0e90

Browse files
chore: refactor IsNullOp and NotNullOp logic to make scalar ops generation easier (#1822)
* Refactor IsNullOp and NotNullOp logic This change consolidates the definition and compilation logic for IsNullOp, isnull_op, NotNullOp, and notnull_op into a new, dedicated file: `bigframes/operations/isnull_op.py`. Key changes include: - Moved operator definitions from `generic_ops.py` to `isnull_op.py`. - Moved Ibis scalar compilation logic from `scalar_op_compiler.py` to `isnull_op.py`. - Moved Polars expression compilation logic from `polars/compiler.py` to `isnull_op.py`. - Updated main compilers (`ScalarOpCompiler` and `PolarsExpressionCompiler`) to directly import and register the compilation functions from `isnull_op.py`. - Ensured all internal references and naming conventions (`IsNullOp`, `isnull_op`, `NotNullOp`, `notnull_op`) are consistent with the refactored structure. NOTE: I was unable to perform test validation (unit and system) due to missing project-specific dependencies, primarily `bigframes_vendored` and `test_utils.prefixer`. The changes are provided based on the completion of the refactoring steps as you requested. * fix circular imports * bad merge * fix local pytest * dont construct polars compiler if no polars * limit scope to just splitting large files * Update bigframes/core/compile/compiled.py * revert unneeded circular import workaround * combine null ops into generic_ops files * revert expression change * Update bigframes/core/compile/polars/operations/__init__.py * skip polars test for old polars * Update bigframes/core/compile/ibis_compiler/operations/__init__.py * add minversion to skips * more skips * fix minimum polars version detection * update colab constraints * skip polars on 3.10 --------- Co-authored-by: google-labs-jules[bot] <161369871+google-labs-jules[bot]@users.noreply.github.com>
1 parent d9bc4a5 commit 4ea0e90

22 files changed

+1030
-232
lines changed

bigframes/_importing.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
import importlib
1515
from types import ModuleType
1616

17+
import numpy
1718
from packaging import version
1819

1920
# Keep this in sync with setup.py
@@ -22,9 +23,13 @@
2223

2324
def import_polars() -> ModuleType:
2425
polars_module = importlib.import_module("polars")
25-
imported_version = version.Version(polars_module.build_info()["version"])
26-
if imported_version < POLARS_MIN_VERSION:
26+
# Check for necessary methods instead of the version number because we
27+
# can't trust the polars version until
28+
# https://github.com/pola-rs/polars/issues/23940 is fixed.
29+
try:
30+
polars_module.lit(numpy.int64(100), dtype=polars_module.Int64())
31+
except TypeError:
2732
raise ImportError(
28-
f"Imported polars version: {imported_version} is below the minimum version: {POLARS_MIN_VERSION}"
33+
f"Imported polars version is likely below the minimum version: {POLARS_MIN_VERSION}"
2934
)
3035
return polars_module

bigframes/core/compile/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,8 @@
1414
from __future__ import annotations
1515

1616
from bigframes.core.compile.api import test_only_ibis_inferred_schema
17-
from bigframes.core.compile.compiler import compile_sql
1817
from bigframes.core.compile.configs import CompileRequest, CompileResult
18+
from bigframes.core.compile.ibis_compiler.ibis_compiler import compile_sql
1919

2020
__all__ = [
2121
"test_only_ibis_inferred_schema",

bigframes/core/compile/api.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
from typing import TYPE_CHECKING
1717

1818
from bigframes.core import rewrite
19-
from bigframes.core.compile import compiler
19+
from bigframes.core.compile.ibis_compiler import ibis_compiler
2020

2121
if TYPE_CHECKING:
2222
import bigframes.core.nodes
@@ -26,9 +26,9 @@ def test_only_ibis_inferred_schema(node: bigframes.core.nodes.BigFrameNode):
2626
"""Use only for testing paths to ensure ibis inferred schema does not diverge from bigframes inferred schema."""
2727
import bigframes.core.schema
2828

29-
node = compiler._replace_unsupported_ops(node)
29+
node = ibis_compiler._replace_unsupported_ops(node)
3030
node = rewrite.bake_order(node)
31-
ir = compiler.compile_node(node)
31+
ir = ibis_compiler.compile_node(node)
3232
items = tuple(
3333
bigframes.core.schema.SchemaItem(name, ir.get_column_type(ibis_id))
3434
for name, ibis_id in zip(node.schema.names, ir.column_ids)

bigframes/core/compile/compiled.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30,11 +30,10 @@
3030
import pyarrow as pa
3131

3232
from bigframes.core import utils
33-
import bigframes.core.compile.aggregate_compiler as agg_compiler
3433
import bigframes.core.compile.googlesql
34+
import bigframes.core.compile.ibis_compiler.aggregate_compiler as agg_compiler
35+
import bigframes.core.compile.ibis_compiler.scalar_op_compiler as op_compilers
3536
import bigframes.core.compile.ibis_types
36-
import bigframes.core.compile.scalar_op_compiler as op_compilers
37-
import bigframes.core.compile.scalar_op_compiler as scalar_op_compiler
3837
import bigframes.core.expression as ex
3938
from bigframes.core.ordering import OrderingExpression
4039
import bigframes.core.sql
@@ -679,13 +678,15 @@ def _join_condition(
679678

680679

681680
def _as_groupable(value: ibis_types.Value):
681+
from bigframes.core.compile.ibis_compiler import scalar_op_registry
682+
682683
# Some types need to be converted to another type to enable groupby
683684
if value.type().is_float64():
684685
return value.cast(ibis_dtypes.str)
685686
elif value.type().is_geospatial():
686687
return typing.cast(ibis_types.GeoSpatialColumn, value).as_binary()
687688
elif value.type().is_json():
688-
return scalar_op_compiler.to_json_string(value)
689+
return scalar_op_registry.to_json_string(value)
689690
else:
690691
return value
691692

tests/system/small/pandas/io/__init__.py renamed to bigframes/core/compile/ibis_compiler/__init__.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,3 +11,14 @@
1111
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
14+
15+
"""Compiler for BigFrames expression to Ibis expression.
16+
17+
Make sure to import all ibis_compiler implementations here so that they get
18+
registered.
19+
"""
20+
21+
from __future__ import annotations
22+
23+
import bigframes.core.compile.ibis_compiler.operations.generic_ops # noqa: F401
24+
import bigframes.core.compile.ibis_compiler.scalar_op_registry # noqa: F401

bigframes/core/compile/aggregate_compiler.py renamed to bigframes/core/compile/ibis_compiler/aggregate_compiler.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,8 @@
2727
import pandas as pd
2828

2929
from bigframes.core.compile import constants as compiler_constants
30+
import bigframes.core.compile.ibis_compiler.scalar_op_compiler as scalar_compilers
3031
import bigframes.core.compile.ibis_types as compile_ibis_types
31-
import bigframes.core.compile.scalar_op_compiler as scalar_compilers
3232
import bigframes.core.expression as ex
3333
import bigframes.core.window_spec as window_spec
3434
import bigframes.operations.aggregations as agg_ops

bigframes/core/compile/compiler.py renamed to bigframes/core/compile/ibis_compiler/ibis_compiler.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@
2929
import bigframes.core.compile.concat as concat_impl
3030
import bigframes.core.compile.configs as configs
3131
import bigframes.core.compile.explode
32-
import bigframes.core.compile.scalar_op_compiler as compile_scalar
3332
import bigframes.core.nodes as nodes
3433
import bigframes.core.ordering as bf_ordering
3534
import bigframes.core.rewrite as rewrites
@@ -178,6 +177,8 @@ def compile_readlocal(node: nodes.ReadLocalNode, *args):
178177

179178
@_compile_node.register
180179
def compile_readtable(node: nodes.ReadTableNode, *args):
180+
from bigframes.core.compile.ibis_compiler import scalar_op_registry
181+
181182
ibis_table = _table_to_ibis(
182183
node.source, scan_cols=[col.source_id for col in node.scan_list.items]
183184
)
@@ -188,7 +189,7 @@ def compile_readtable(node: nodes.ReadTableNode, *args):
188189
scan_item.dtype == dtypes.JSON_DTYPE
189190
and ibis_table[scan_item.source_id].type() == ibis_dtypes.string
190191
):
191-
json_column = compile_scalar.parse_json(
192+
json_column = scalar_op_registry.parse_json(
192193
ibis_table[scan_item.source_id]
193194
).name(scan_item.source_id)
194195
ibis_table = ibis_table.mutate(json_column)

tests/system/small/pandas/io/api/__init__.py renamed to bigframes/core/compile/ibis_compiler/operations/__init__.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,3 +11,11 @@
1111
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
14+
15+
"""Operation implementations for the Ibis-based compiler.
16+
17+
This directory structure should reflect the same layout as the
18+
`bigframes/operations` directory where the operations are defined.
19+
20+
Prefer a few ops per file to keep file sizes manageable for text editors and LLMs.
21+
"""
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
"""
16+
BigFrames -> Ibis compilation for the operations in bigframes.operations.generic_ops.
17+
18+
Please keep implementations in sequential order by op name.
19+
"""
20+
21+
from __future__ import annotations
22+
23+
from bigframes_vendored.ibis.expr import types as ibis_types
24+
25+
from bigframes.core.compile.ibis_compiler import scalar_op_compiler
26+
from bigframes.operations import generic_ops
27+
28+
register_unary_op = scalar_op_compiler.scalar_op_compiler.register_unary_op
29+
30+
31+
@register_unary_op(generic_ops.notnull_op)
32+
def notnull_op_impl(x: ibis_types.Value):
33+
return x.notnull()
34+
35+
36+
@register_unary_op(generic_ops.isnull_op)
37+
def isnull_op_impl(x: ibis_types.Value):
38+
return x.isnull()
Lines changed: 207 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,207 @@
1+
# Copyright 2023 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
"""To avoid circular imports, this module should _not_ depend on any ops."""
16+
17+
from __future__ import annotations
18+
19+
import functools
20+
import typing
21+
from typing import TYPE_CHECKING
22+
23+
import bigframes_vendored.ibis.expr.types as ibis_types
24+
25+
import bigframes.core.compile.ibis_types
26+
import bigframes.core.expression as ex
27+
28+
if TYPE_CHECKING:
29+
import bigframes.operations as ops
30+
31+
32+
class ScalarOpCompiler:
33+
# Mapping of operation name to implemenations
34+
_registry: dict[
35+
str,
36+
typing.Callable[
37+
[typing.Sequence[ibis_types.Value], ops.RowOp], ibis_types.Value
38+
],
39+
] = {}
40+
41+
@functools.singledispatchmethod
42+
def compile_expression(
43+
self,
44+
expression: ex.Expression,
45+
bindings: typing.Dict[str, ibis_types.Value],
46+
) -> ibis_types.Value:
47+
raise NotImplementedError(f"Unrecognized expression: {expression}")
48+
49+
@compile_expression.register
50+
def _(
51+
self,
52+
expression: ex.ScalarConstantExpression,
53+
bindings: typing.Dict[str, ibis_types.Value],
54+
) -> ibis_types.Value:
55+
return bigframes.core.compile.ibis_types.literal_to_ibis_scalar(
56+
expression.value, expression.dtype
57+
)
58+
59+
@compile_expression.register
60+
def _(
61+
self,
62+
expression: ex.DerefOp,
63+
bindings: typing.Dict[str, ibis_types.Value],
64+
) -> ibis_types.Value:
65+
if expression.id.sql not in bindings:
66+
raise ValueError(f"Could not resolve unbound variable {expression.id}")
67+
else:
68+
return bindings[expression.id.sql]
69+
70+
@compile_expression.register
71+
def _(
72+
self,
73+
expression: ex.OpExpression,
74+
bindings: typing.Dict[str, ibis_types.Value],
75+
) -> ibis_types.Value:
76+
inputs = [
77+
self.compile_expression(sub_expr, bindings)
78+
for sub_expr in expression.inputs
79+
]
80+
return self.compile_row_op(expression.op, inputs)
81+
82+
def compile_row_op(
83+
self, op: ops.RowOp, inputs: typing.Sequence[ibis_types.Value]
84+
) -> ibis_types.Value:
85+
impl = self._registry[op.name]
86+
return impl(inputs, op)
87+
88+
def register_unary_op(
89+
self,
90+
op_ref: typing.Union[ops.UnaryOp, type[ops.UnaryOp]],
91+
pass_op: bool = False,
92+
):
93+
"""
94+
Decorator to register a unary op implementation.
95+
96+
Args:
97+
op_ref (UnaryOp or UnaryOp type):
98+
Class or instance of operator that is implemented by the decorated function.
99+
pass_op (bool):
100+
Set to true if implementation takes the operator object as the last argument.
101+
This is needed for parameterized ops where parameters are part of op object.
102+
"""
103+
key = typing.cast(str, op_ref.name)
104+
105+
def decorator(impl: typing.Callable[..., ibis_types.Value]):
106+
def normalized_impl(args: typing.Sequence[ibis_types.Value], op: ops.RowOp):
107+
if pass_op:
108+
return impl(args[0], op)
109+
else:
110+
return impl(args[0])
111+
112+
self._register(key, normalized_impl)
113+
return impl
114+
115+
return decorator
116+
117+
def register_binary_op(
118+
self,
119+
op_ref: typing.Union[ops.BinaryOp, type[ops.BinaryOp]],
120+
pass_op: bool = False,
121+
):
122+
"""
123+
Decorator to register a binary op implementation.
124+
125+
Args:
126+
op_ref (BinaryOp or BinaryOp type):
127+
Class or instance of operator that is implemented by the decorated function.
128+
pass_op (bool):
129+
Set to true if implementation takes the operator object as the last argument.
130+
This is needed for parameterized ops where parameters are part of op object.
131+
"""
132+
key = typing.cast(str, op_ref.name)
133+
134+
def decorator(impl: typing.Callable[..., ibis_types.Value]):
135+
def normalized_impl(args: typing.Sequence[ibis_types.Value], op: ops.RowOp):
136+
if pass_op:
137+
return impl(args[0], args[1], op)
138+
else:
139+
return impl(args[0], args[1])
140+
141+
self._register(key, normalized_impl)
142+
return impl
143+
144+
return decorator
145+
146+
def register_ternary_op(
147+
self, op_ref: typing.Union[ops.TernaryOp, type[ops.TernaryOp]]
148+
):
149+
"""
150+
Decorator to register a ternary op implementation.
151+
152+
Args:
153+
op_ref (TernaryOp or TernaryOp type):
154+
Class or instance of operator that is implemented by the decorated function.
155+
"""
156+
key = typing.cast(str, op_ref.name)
157+
158+
def decorator(impl: typing.Callable[..., ibis_types.Value]):
159+
def normalized_impl(args: typing.Sequence[ibis_types.Value], op: ops.RowOp):
160+
return impl(args[0], args[1], args[2])
161+
162+
self._register(key, normalized_impl)
163+
return impl
164+
165+
return decorator
166+
167+
def register_nary_op(
168+
self, op_ref: typing.Union[ops.NaryOp, type[ops.NaryOp]], pass_op: bool = False
169+
):
170+
"""
171+
Decorator to register a nary op implementation.
172+
173+
Args:
174+
op_ref (NaryOp or NaryOp type):
175+
Class or instance of operator that is implemented by the decorated function.
176+
pass_op (bool):
177+
Set to true if implementation takes the operator object as the last argument.
178+
This is needed for parameterized ops where parameters are part of op object.
179+
"""
180+
key = typing.cast(str, op_ref.name)
181+
182+
def decorator(impl: typing.Callable[..., ibis_types.Value]):
183+
def normalized_impl(args: typing.Sequence[ibis_types.Value], op: ops.RowOp):
184+
if pass_op:
185+
return impl(*args, op=op)
186+
else:
187+
return impl(*args)
188+
189+
self._register(key, normalized_impl)
190+
return impl
191+
192+
return decorator
193+
194+
def _register(
195+
self,
196+
op_name: str,
197+
impl: typing.Callable[
198+
[typing.Sequence[ibis_types.Value], ops.RowOp], ibis_types.Value
199+
],
200+
):
201+
if op_name in self._registry:
202+
raise ValueError(f"Operation name {op_name} already registered")
203+
self._registry[op_name] = impl
204+
205+
206+
# Singleton compiler
207+
scalar_op_compiler = ScalarOpCompiler()

0 commit comments

Comments
 (0)