Skip to content

Commit 1d45646

Browse files
test: Add dataframe unit test suite (#1751)
1 parent c31f67b commit 1d45646

File tree

9 files changed

+4855
-110
lines changed

9 files changed

+4855
-110
lines changed

bigframes/core/compile/polars/compiler.py

Lines changed: 332 additions & 90 deletions
Large diffs are not rendered by default.

bigframes/core/global_session.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,3 +112,23 @@ def get_global_session():
112112

113113
def with_default_session(func: Callable[..., _T], *args, **kwargs) -> _T:
114114
return func(get_global_session(), *args, **kwargs)
115+
116+
117+
class _GlobalSessionContext:
118+
"""
119+
Context manager for testing that sets global session.
120+
"""
121+
122+
def __init__(self, session: bigframes.session.Session):
123+
self._session = session
124+
125+
def __enter__(self):
126+
global _global_session, _global_session_lock
127+
with _global_session_lock:
128+
self._previous_session = _global_session
129+
_global_session = self._session
130+
131+
def __exit__(self, *exc_details):
132+
global _global_session, _global_session_lock
133+
with _global_session_lock:
134+
_global_session = self._previous_session

bigframes/core/rewrite/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
)
2525
from bigframes.core.rewrite.slices import pull_out_limit, pull_up_limits, rewrite_slice
2626
from bigframes.core.rewrite.timedeltas import rewrite_timedelta_expressions
27-
from bigframes.core.rewrite.windows import rewrite_range_rolling
27+
from bigframes.core.rewrite.windows import pull_out_window_order, rewrite_range_rolling
2828

2929
__all__ = [
3030
"legacy_join_as_projection",
@@ -41,4 +41,5 @@
4141
"bake_order",
4242
"try_reduce_to_local_scan",
4343
"fold_row_counts",
44+
"pull_out_window_order",
4445
]

bigframes/core/rewrite/windows.py

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
import dataclasses
1818

1919
from bigframes import operations as ops
20-
from bigframes.core import nodes
20+
from bigframes.core import guid, identifiers, nodes, ordering
2121

2222

2323
def rewrite_range_rolling(node: nodes.BigFrameNode) -> nodes.BigFrameNode:
@@ -43,3 +43,34 @@ def rewrite_range_rolling(node: nodes.BigFrameNode) -> nodes.BigFrameNode:
4343
node,
4444
window_spec=dataclasses.replace(node.window_spec, ordering=(new_ordering,)),
4545
)
46+
47+
48+
def pull_out_window_order(root: nodes.BigFrameNode) -> nodes.BigFrameNode:
49+
return root.bottom_up(rewrite_window_node)
50+
51+
52+
def rewrite_window_node(node: nodes.BigFrameNode) -> nodes.BigFrameNode:
53+
if not isinstance(node, nodes.WindowOpNode):
54+
return node
55+
if len(node.window_spec.ordering) == 0:
56+
return node
57+
else:
58+
offsets_id = guid.generate_guid()
59+
w_offsets = nodes.PromoteOffsetsNode(
60+
node.child, identifiers.ColumnId(offsets_id)
61+
)
62+
sorted_child = nodes.OrderByNode(w_offsets, node.window_spec.ordering)
63+
new_window_node = dataclasses.replace(
64+
node,
65+
child=sorted_child,
66+
window_spec=node.window_spec.without_order(force=True),
67+
)
68+
w_resetted_order = nodes.OrderByNode(
69+
new_window_node,
70+
by=(ordering.ascending_over(identifiers.ColumnId(offsets_id)),),
71+
is_total_order=True,
72+
)
73+
w_offsets_dropped = nodes.SelectionNode(
74+
w_resetted_order, tuple(nodes.AliasedRef.identity(id) for id in node.ids)
75+
)
76+
return w_offsets_dropped

bigframes/core/window_spec.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -234,7 +234,9 @@ def is_row_bounded(self):
234234
This is relevant for determining whether the window requires a total order
235235
to calculate deterministically.
236236
"""
237-
return isinstance(self.bounds, RowsWindowBounds)
237+
return isinstance(self.bounds, RowsWindowBounds) and (
238+
(self.bounds.start is not None) or (self.bounds.end is not None)
239+
)
238240

239241
@property
240242
def is_range_bounded(self):
@@ -254,7 +256,9 @@ def is_unbounded(self):
254256
This is relevant for determining whether the window requires a total order
255257
to calculate deterministically.
256258
"""
257-
return self.bounds is None
259+
return self.bounds is None or (
260+
self.bounds.start is None and self.bounds.end is None
261+
)
258262

259263
@property
260264
def all_referenced_columns(self) -> Set[ids.ColumnId]:
@@ -266,9 +270,9 @@ def all_referenced_columns(self) -> Set[ids.ColumnId]:
266270
)
267271
return set(itertools.chain((i.id for i in self.grouping_keys), ordering_vars))
268272

269-
def without_order(self) -> WindowSpec:
273+
def without_order(self, force: bool = False) -> WindowSpec:
270274
"""Removes ordering clause if ordering isn't required to define bounds."""
271-
if self.is_row_bounded:
275+
if self.is_row_bounded and not force:
272276
raise ValueError("Cannot remove order from row-bounded window")
273277
return replace(self, ordering=())
274278

bigframes/operations/aggregations.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -439,7 +439,6 @@ def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionT
439439
return dtypes.INT_DTYPE
440440

441441

442-
# TODO: Convert to NullaryWindowOp
443442
@dataclasses.dataclass(frozen=True)
444443
class RankOp(UnaryWindowOp):
445444
name: ClassVar[str] = "rank"
@@ -456,7 +455,6 @@ def implicitly_inherits_order(self):
456455
return False
457456

458457

459-
# TODO: Convert to NullaryWindowOp
460458
@dataclasses.dataclass(frozen=True)
461459
class DenseRankOp(UnaryWindowOp):
462460
@property

bigframes/testing/polars_session.py

Lines changed: 33 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,9 @@
2020
import polars
2121

2222
import bigframes
23-
import bigframes.clients
2423
import bigframes.core.blocks
2524
import bigframes.core.compile.polars
26-
import bigframes.core.ordering
2725
import bigframes.dataframe
28-
import bigframes.session.clients
2926
import bigframes.session.executor
3027
import bigframes.session.metrics
3128

@@ -35,6 +32,26 @@
3532
class TestExecutor(bigframes.session.executor.Executor):
3633
compiler = bigframes.core.compile.polars.PolarsCompiler()
3734

35+
def peek(
36+
self,
37+
array_value: bigframes.core.ArrayValue,
38+
n_rows: int,
39+
use_explicit_destination: Optional[bool] = False,
40+
):
41+
"""
42+
A 'peek' efficiently accesses a small number of rows in the dataframe.
43+
"""
44+
lazy_frame: polars.LazyFrame = self.compiler.compile(array_value)
45+
pa_table = lazy_frame.collect().limit(n_rows).to_arrow()
46+
# Currently, pyarrow types might not quite be exactly the ones in the bigframes schema.
47+
# Nullability may be different, and might use large versions of list, string datatypes.
48+
return bigframes.session.executor.ExecuteResult(
49+
arrow_batches=pa_table.to_batches(),
50+
schema=array_value.schema,
51+
total_bytes=pa_table.nbytes,
52+
total_rows=pa_table.num_rows,
53+
)
54+
3855
def execute(
3956
self,
4057
array_value: bigframes.core.ArrayValue,
@@ -58,6 +75,14 @@ def execute(
5875
total_rows=pa_table.num_rows,
5976
)
6077

78+
def cached(
79+
self,
80+
array_value: bigframes.core.ArrayValue,
81+
*,
82+
config,
83+
) -> None:
84+
return
85+
6186

6287
class TestSession(bigframes.session.Session):
6388
def __init__(self):
@@ -92,3 +117,8 @@ def read_pandas(self, pandas_dataframe, write_engine="default"):
92117
pandas_dataframe = pandas_dataframe.to_frame()
93118
local_block = bigframes.core.blocks.Block.from_local(pandas_dataframe, self)
94119
return bigframes.dataframe.DataFrame(local_block)
120+
121+
@property
122+
def bqclient(self):
123+
# prevents logger from trying to call bq upon any errors
124+
return None

noxfile.py

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@
7979
UNIT_TEST_DEPENDENCIES: List[str] = []
8080
UNIT_TEST_EXTRAS: List[str] = ["tests"]
8181
UNIT_TEST_EXTRAS_BY_PYTHON: Dict[str, List[str]] = {
82-
"3.12": ["polars", "scikit-learn"],
82+
"3.12": ["tests", "polars", "scikit-learn"],
8383
}
8484

8585
# 3.10 is needed for Windows tests as it is the only version installed in the
@@ -202,14 +202,11 @@ def install_unittest_dependencies(session, install_test_extra, *constraints):
202202
if UNIT_TEST_LOCAL_DEPENDENCIES:
203203
session.install(*UNIT_TEST_LOCAL_DEPENDENCIES, *constraints)
204204

205-
if install_test_extra and UNIT_TEST_EXTRAS_BY_PYTHON:
206-
extras = UNIT_TEST_EXTRAS_BY_PYTHON.get(session.python, [])
207-
if install_test_extra and UNIT_TEST_EXTRAS:
208-
extras = UNIT_TEST_EXTRAS
209-
else:
210-
extras = []
211-
212-
if extras:
205+
if install_test_extra:
206+
if session.python in UNIT_TEST_EXTRAS_BY_PYTHON:
207+
extras = UNIT_TEST_EXTRAS_BY_PYTHON[session.python]
208+
else:
209+
extras = UNIT_TEST_EXTRAS
213210
session.install("-e", f".[{','.join(extras)}]", *constraints)
214211
else:
215212
session.install("-e", ".", *constraints)

0 commit comments

Comments
 (0)