Skip to content

Commit e0f065f

Browse files
test: Engine tests for selection ops (#1800)
1 parent 72a021f commit e0f065f

File tree

4 files changed

+107
-23
lines changed

4 files changed

+107
-23
lines changed

tests/system/small/engines/conftest.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
import pytest
2020

2121
import bigframes
22-
from bigframes.core import local_data
22+
from bigframes.core import ArrayValue, local_data
2323
from bigframes.session import (
2424
direct_gbq_execution,
2525
local_scan_executor,
@@ -62,6 +62,13 @@ def managed_data_source(
6262
return local_data.ManagedArrowTable.from_pandas(scalars_pandas_df_index)
6363

6464

65+
@pytest.fixture(scope="module")
66+
def scalars_array_value(
67+
managed_data_source: local_data.ManagedArrowTable, fake_session: bigframes.Session
68+
):
69+
return ArrayValue.from_managed(managed_data_source, fake_session)
70+
71+
6572
@pytest.fixture(scope="module")
6673
def zero_row_source() -> local_data.ManagedArrowTable:
6774
return local_data.ManagedArrowTable.from_pandas(pd.DataFrame({"a": [], "b": []}))
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from bigframes.core import nodes
16+
from bigframes.session import semi_executor
17+
18+
19+
def assert_equivalence_execution(
20+
node: nodes.BigFrameNode,
21+
engine1: semi_executor.SemiExecutor,
22+
engine2: semi_executor.SemiExecutor,
23+
):
24+
e1_result = engine1.execute(node, ordered=True)
25+
e2_result = engine2.execute(node, ordered=True)
26+
assert e1_result is not None
27+
assert e2_result is not None
28+
# Schemas might have extra nullity markers, normalize to node expected schema, which should be looser
29+
e1_table = e1_result.to_arrow_table().cast(node.schema.to_pyarrow())
30+
e2_table = e2_result.to_arrow_table().cast(node.schema.to_pyarrow())
31+
assert e1_table.equals(e2_table), f"{e1_table} is not equal to {e2_table}"

tests/system/small/engines/test_read_local.py

Lines changed: 8 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -16,29 +16,15 @@
1616

1717
import bigframes
1818
from bigframes.core import identifiers, local_data, nodes
19-
from bigframes.session import polars_executor, semi_executor
19+
from bigframes.session import polars_executor
20+
from tests.system.small.engines.engine_utils import assert_equivalence_execution
2021

2122
pytest.importorskip("polars")
2223

2324
# Polars used as reference as its fast and local. Generally though, prefer gbq engine where they disagree.
2425
REFERENCE_ENGINE = polars_executor.PolarsExecutor()
2526

2627

27-
def ensure_equivalence(
28-
node: nodes.BigFrameNode,
29-
engine1: semi_executor.SemiExecutor,
30-
engine2: semi_executor.SemiExecutor,
31-
):
32-
e1_result = engine1.execute(node, ordered=True)
33-
e2_result = engine2.execute(node, ordered=True)
34-
assert e1_result is not None
35-
assert e2_result is not None
36-
# Schemas might have extra nullity markers, normalize to node expected schema, which should be looser
37-
e1_table = e1_result.to_arrow_table().cast(node.schema.to_pyarrow())
38-
e2_table = e2_result.to_arrow_table().cast(node.schema.to_pyarrow())
39-
assert e1_table.equals(e2_table), f"{e1_table} is not equal to {e2_table}"
40-
41-
4228
def test_engines_read_local(
4329
fake_session: bigframes.Session,
4430
managed_data_source: local_data.ManagedArrowTable,
@@ -51,7 +37,7 @@ def test_engines_read_local(
5137
local_node = nodes.ReadLocalNode(
5238
managed_data_source, scan_list, fake_session, offsets_col=None
5339
)
54-
ensure_equivalence(local_node, REFERENCE_ENGINE, engine)
40+
assert_equivalence_execution(local_node, REFERENCE_ENGINE, engine)
5541

5642

5743
def test_engines_read_local_w_offsets(
@@ -69,7 +55,7 @@ def test_engines_read_local_w_offsets(
6955
fake_session,
7056
offsets_col=identifiers.ColumnId("offsets"),
7157
)
72-
ensure_equivalence(local_node, REFERENCE_ENGINE, engine)
58+
assert_equivalence_execution(local_node, REFERENCE_ENGINE, engine)
7359

7460

7561
def test_engines_read_local_w_col_subset(
@@ -84,7 +70,7 @@ def test_engines_read_local_w_col_subset(
8470
local_node = nodes.ReadLocalNode(
8571
managed_data_source, scan_list, fake_session, offsets_col=None
8672
)
87-
ensure_equivalence(local_node, REFERENCE_ENGINE, engine)
73+
assert_equivalence_execution(local_node, REFERENCE_ENGINE, engine)
8874

8975

9076
def test_engines_read_local_w_zero_row_source(
@@ -99,7 +85,7 @@ def test_engines_read_local_w_zero_row_source(
9985
local_node = nodes.ReadLocalNode(
10086
zero_row_source, scan_list, fake_session, offsets_col=None
10187
)
102-
ensure_equivalence(local_node, REFERENCE_ENGINE, engine)
88+
assert_equivalence_execution(local_node, REFERENCE_ENGINE, engine)
10389

10490

10591
def test_engines_read_local_w_nested_source(
@@ -114,7 +100,7 @@ def test_engines_read_local_w_nested_source(
114100
local_node = nodes.ReadLocalNode(
115101
nested_data_source, scan_list, fake_session, offsets_col=None
116102
)
117-
ensure_equivalence(local_node, REFERENCE_ENGINE, engine)
103+
assert_equivalence_execution(local_node, REFERENCE_ENGINE, engine)
118104

119105

120106
def test_engines_read_local_w_repeated_source(
@@ -129,4 +115,4 @@ def test_engines_read_local_w_repeated_source(
129115
local_node = nodes.ReadLocalNode(
130116
repeated_data_source, scan_list, fake_session, offsets_col=None
131117
)
132-
ensure_equivalence(local_node, REFERENCE_ENGINE, engine)
118+
assert_equivalence_execution(local_node, REFERENCE_ENGINE, engine)
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import pytest
16+
17+
from bigframes.core import array_value, expression, identifiers, nodes
18+
from bigframes.session import polars_executor
19+
from tests.system.small.engines.engine_utils import assert_equivalence_execution
20+
21+
pytest.importorskip("polars")
22+
23+
# Polars used as reference as its fast and local. Generally though, prefer gbq engine where they disagree.
24+
REFERENCE_ENGINE = polars_executor.PolarsExecutor()
25+
26+
27+
def test_engines_select_identity(
28+
scalars_array_value: array_value.ArrayValue,
29+
engine,
30+
):
31+
selection = tuple(
32+
nodes.AliasedRef(expression.deref(col), identifiers.ColumnId(col))
33+
for col in scalars_array_value.column_ids
34+
)
35+
node = nodes.SelectionNode(scalars_array_value.node, selection)
36+
assert_equivalence_execution(node, REFERENCE_ENGINE, engine)
37+
38+
39+
def test_engines_select_rename(
40+
scalars_array_value: array_value.ArrayValue,
41+
engine,
42+
):
43+
selection = tuple(
44+
nodes.AliasedRef(expression.deref(col), identifiers.ColumnId(f"renamed_{col}"))
45+
for col in scalars_array_value.column_ids
46+
)
47+
node = nodes.SelectionNode(scalars_array_value.node, selection)
48+
assert_equivalence_execution(node, REFERENCE_ENGINE, engine)
49+
50+
51+
def test_engines_select_reorder_rename_drop(
52+
scalars_array_value: array_value.ArrayValue,
53+
engine,
54+
):
55+
selection = tuple(
56+
nodes.AliasedRef(expression.deref(col), identifiers.ColumnId(f"renamed_{col}"))
57+
for col in scalars_array_value.column_ids[::-2]
58+
)
59+
node = nodes.SelectionNode(scalars_array_value.node, selection)
60+
assert_equivalence_execution(node, REFERENCE_ENGINE, engine)

0 commit comments

Comments
 (0)