Skip to content

Commit 6278e0b

Browse files
test: Run several tests in unordered mode as well as ordered (#831)
1 parent ab0dabc commit 6278e0b

File tree

3 files changed

+76
-40
lines changed

3 files changed

+76
-40
lines changed

tests/system/conftest.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,16 @@ def session() -> Generator[bigframes.Session, None, None]:
139139
session.close() # close generated session at cleanup time
140140

141141

142+
@pytest.fixture(scope="session", params=["ordered", "unordered"])
143+
def maybe_ordered_session(request) -> Generator[bigframes.Session, None, None]:
144+
context = bigframes.BigQueryOptions(
145+
location="US", _strictly_ordered=request.param == "ordered"
146+
)
147+
session = bigframes.Session(context=context)
148+
yield session
149+
session.close() # close generated session at cleanup type
150+
151+
142152
@pytest.fixture(scope="session")
143153
def unordered_session() -> Generator[bigframes.Session, None, None]:
144154
context = bigframes.BigQueryOptions(location="US", _strictly_ordered=False)
@@ -467,6 +477,17 @@ def scalars_dfs(
467477
return scalars_df_index, scalars_pandas_df_index
468478

469479

480+
@pytest.fixture(scope="session")
481+
def scalars_dfs_maybe_ordered(
482+
maybe_ordered_session,
483+
scalars_pandas_df_index,
484+
):
485+
return (
486+
maybe_ordered_session.read_pandas(scalars_pandas_df_index),
487+
scalars_pandas_df_index,
488+
)
489+
490+
470491
@pytest.fixture(scope="session")
471492
def hockey_df(
472493
hockey_table_id: str, session: bigframes.Session

tests/system/small/test_dataframe.py

Lines changed: 37 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,10 @@
3333
import bigframes.pandas as bpd
3434
import bigframes.series as series
3535
from tests.system.utils import (
36+
assert_dfs_equivalent,
3637
assert_pandas_df_equal,
3738
assert_series_equal,
39+
assert_series_equivalent,
3840
skip_legacy_pandas,
3941
)
4042

@@ -75,7 +77,7 @@ def test_df_construct_large_strings():
7577
pandas.testing.assert_frame_equal(bf_result, pd_result, check_index_type=False)
7678

7779

78-
def test_df_construct_pandas_load_job(scalars_dfs):
80+
def test_df_construct_pandas_load_job(scalars_dfs_maybe_ordered):
7981
# This should trigger the inlined codepath
8082
columns = [
8183
"int64_too",
@@ -91,10 +93,10 @@ def test_df_construct_pandas_load_job(scalars_dfs):
9193
"timestamp_col",
9294
"geography_col",
9395
]
94-
_, scalars_pandas_df = scalars_dfs
95-
bf_result = dataframe.DataFrame(scalars_pandas_df, columns=columns).to_pandas()
96+
_, scalars_pandas_df = scalars_dfs_maybe_ordered
97+
bf_result = dataframe.DataFrame(scalars_pandas_df, columns=columns)
9698
pd_result = pd.DataFrame(scalars_pandas_df, columns=columns)
97-
pandas.testing.assert_frame_equal(bf_result, pd_result)
99+
assert_dfs_equivalent(pd_result, bf_result)
98100

99101

100102
def test_df_construct_pandas_set_dtype(scalars_dfs):
@@ -112,17 +114,17 @@ def test_df_construct_pandas_set_dtype(scalars_dfs):
112114
pandas.testing.assert_frame_equal(bf_result, pd_result)
113115

114116

115-
def test_df_construct_from_series(scalars_dfs):
116-
scalars_df, scalars_pandas_df = scalars_dfs
117+
def test_df_construct_from_series(scalars_dfs_maybe_ordered):
118+
scalars_df, scalars_pandas_df = scalars_dfs_maybe_ordered
117119
bf_result = dataframe.DataFrame(
118120
{"a": scalars_df["int64_col"], "b": scalars_df["string_col"]},
119121
dtype="string[pyarrow]",
120-
).to_pandas()
122+
)
121123
pd_result = pd.DataFrame(
122124
{"a": scalars_pandas_df["int64_col"], "b": scalars_pandas_df["string_col"]},
123125
dtype="string[pyarrow]",
124126
)
125-
pandas.testing.assert_frame_equal(bf_result, pd_result)
127+
assert_dfs_equivalent(pd_result, bf_result)
126128

127129

128130
def test_df_construct_from_dict():
@@ -505,8 +507,8 @@ def test_rename(scalars_dfs):
505507
)
506508

507509

508-
def test_df_peek(scalars_dfs):
509-
scalars_df, scalars_pandas_df = scalars_dfs
510+
def test_df_peek(scalars_dfs_maybe_ordered):
511+
scalars_df, scalars_pandas_df = scalars_dfs_maybe_ordered
510512
peek_result = scalars_df.peek(n=3, force=False)
511513
pd.testing.assert_index_equal(scalars_pandas_df.columns, peek_result.columns)
512514
assert len(peek_result) == 3
@@ -1709,14 +1711,14 @@ def test_sort_index(scalars_dfs, ascending, na_position):
17091711
pandas.testing.assert_frame_equal(bf_result, pd_result)
17101712

17111713

1712-
def test_df_abs(scalars_dfs):
1713-
scalars_df, scalars_pandas_df = scalars_dfs
1714+
def test_df_abs(scalars_dfs_maybe_ordered):
1715+
scalars_df, scalars_pandas_df = scalars_dfs_maybe_ordered
17141716
columns = ["int64_col", "int64_too", "float64_col"]
17151717

1716-
bf_result = scalars_df[columns].abs().to_pandas()
1718+
bf_result = scalars_df[columns].abs()
17171719
pd_result = scalars_pandas_df[columns].abs()
17181720

1719-
assert_pandas_df_equal(bf_result, pd_result)
1721+
assert_dfs_equivalent(pd_result, bf_result)
17201722

17211723

17221724
def test_df_pos(scalars_dfs):
@@ -2268,8 +2270,10 @@ def test_series_binop_add_different_table(
22682270

22692271

22702272
@all_joins
2271-
def test_join_same_table(scalars_dfs, how):
2272-
bf_df, pd_df = scalars_dfs
2273+
def test_join_same_table(scalars_dfs_maybe_ordered, how):
2274+
bf_df, pd_df = scalars_dfs_maybe_ordered
2275+
if not bf_df._session._strictly_ordered and how == "cross":
2276+
pytest.skip("Cross join not supported in unordered mode.")
22732277

22742278
bf_df_a = bf_df.set_index("int64_too")[["string_col", "int64_col"]]
22752279
bf_df_a = bf_df_a.sort_index()
@@ -2503,7 +2507,7 @@ def test_dataframe_agg_int_single_string(scalars_dfs, agg):
25032507
)
25042508

25052509

2506-
def test_dataframe_agg_multi_string(scalars_dfs):
2510+
def test_dataframe_agg_multi_string(scalars_dfs_maybe_ordered):
25072511
numeric_cols = ["int64_col", "int64_too", "float64_col"]
25082512
aggregations = [
25092513
"sum",
@@ -2516,8 +2520,8 @@ def test_dataframe_agg_multi_string(scalars_dfs):
25162520
"nunique",
25172521
"count",
25182522
]
2519-
scalars_df, scalars_pandas_df = scalars_dfs
2520-
bf_result = scalars_df[numeric_cols].agg(aggregations).to_pandas()
2523+
scalars_df, scalars_pandas_df = scalars_dfs_maybe_ordered
2524+
bf_result = scalars_df[numeric_cols].agg(aggregations)
25212525
pd_result = scalars_pandas_df[numeric_cols].agg(aggregations)
25222526

25232527
# Pandas may produce narrower numeric types, but bigframes always produces Float64
@@ -2528,7 +2532,7 @@ def test_dataframe_agg_multi_string(scalars_dfs):
25282532
bf_result = bf_result.drop(labels=["median"])
25292533
pd_result = pd_result.drop(labels=["median"])
25302534

2531-
pd.testing.assert_frame_equal(pd_result, bf_result, check_index_type=False)
2535+
assert_dfs_equivalent(pd_result, bf_result, check_index_type=False)
25322536

25332537
# Double-check that median is at least plausible.
25342538
assert (
@@ -3205,13 +3209,6 @@ def test_dataframe_aggregate_bool(scalars_df_index, scalars_pandas_df_index, col
32053209
assert_series_equal(pd_result, bf_result, check_dtype=False, check_index_type=False)
32063210

32073211

3208-
@pytest.mark.parametrize(
3209-
("ordered"),
3210-
[
3211-
(True),
3212-
(False),
3213-
],
3214-
)
32153212
@pytest.mark.parametrize(
32163213
("op", "bf_dtype"),
32173214
[
@@ -3226,12 +3223,11 @@ def test_dataframe_aggregate_bool(scalars_df_index, scalars_pandas_df_index, col
32263223
],
32273224
ids=["sum", "mean", "min", "max", "std", "var", "count", "nunique"],
32283225
)
3229-
def test_dataframe_aggregates(
3230-
scalars_df_index, scalars_pandas_df_index, op, bf_dtype, ordered
3231-
):
3226+
def test_dataframe_aggregates(scalars_dfs_maybe_ordered, op, bf_dtype):
3227+
scalars_df_index, scalars_pandas_df_index = scalars_dfs_maybe_ordered
32323228
col_names = ["int64_too", "float64_col", "string_col", "int64_col", "bool_col"]
32333229
bf_series = op(scalars_df_index[col_names])
3234-
bf_result = bf_series.to_pandas(ordered=ordered)
3230+
bf_result = bf_series
32353231
pd_result = op(scalars_pandas_df_index[col_names])
32363232

32373233
# Check dtype separately
@@ -3240,12 +3236,11 @@ def test_dataframe_aggregates(
32403236
# Pandas may produce narrower numeric types, but bigframes always produces Float64
32413237
# Pandas has object index type
32423238
pd_result.index = pd_result.index.astype("string[pyarrow]")
3243-
assert_series_equal(
3239+
assert_series_equivalent(
32443240
pd_result,
32453241
bf_result,
32463242
check_dtype=False,
32473243
check_index_type=False,
3248-
ignore_order=not ordered,
32493244
)
32503245

32513246

@@ -3597,16 +3592,17 @@ def test_df_rows_filter_regex(scalars_df_index, scalars_pandas_df_index):
35973592
)
35983593

35993594

3600-
def test_df_reindex_rows_list(scalars_df_index, scalars_pandas_df_index):
3601-
bf_result = scalars_df_index.reindex(index=[5, 1, 3, 99, 1]).to_pandas()
3595+
def test_df_reindex_rows_list(scalars_dfs_maybe_ordered):
3596+
scalars_df_index, scalars_pandas_df_index = scalars_dfs_maybe_ordered
3597+
bf_result = scalars_df_index.reindex(index=[5, 1, 3, 99, 1])
36023598

36033599
pd_result = scalars_pandas_df_index.reindex(index=[5, 1, 3, 99, 1])
36043600

36053601
# Pandas uses int64 instead of Int64 (nullable) dtype.
36063602
pd_result.index = pd_result.index.astype(pd.Int64Dtype())
3607-
pd.testing.assert_frame_equal(
3608-
bf_result,
3603+
assert_dfs_equivalent(
36093604
pd_result,
3605+
bf_result,
36103606
)
36113607

36123608

@@ -3861,7 +3857,8 @@ def test_loc_list_integer_index(scalars_df_index, scalars_pandas_df_index):
38613857
)
38623858

38633859

3864-
def test_loc_list_multiindex(scalars_df_index, scalars_pandas_df_index):
3860+
def test_loc_list_multiindex(scalars_dfs_maybe_ordered):
3861+
scalars_df_index, scalars_pandas_df_index = scalars_dfs_maybe_ordered
38653862
scalars_df_multiindex = scalars_df_index.set_index(["string_col", "int64_col"])
38663863
scalars_pandas_df_multiindex = scalars_pandas_df_index.set_index(
38673864
["string_col", "int64_col"]
@@ -3871,9 +3868,9 @@ def test_loc_list_multiindex(scalars_df_index, scalars_pandas_df_index):
38713868
bf_result = scalars_df_multiindex.loc[index_list]
38723869
pd_result = scalars_pandas_df_multiindex.loc[index_list]
38733870

3874-
pd.testing.assert_frame_equal(
3875-
bf_result.to_pandas(),
3871+
assert_dfs_equivalent(
38763872
pd_result,
3873+
bf_result,
38773874
)
38783875

38793876

tests/system/utils.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
import pytest
2828

2929
from bigframes.functions import remote_function
30+
import bigframes.pandas
3031

3132
ML_REGRESSION_METRICS = [
3233
"mean_absolute_error",
@@ -56,6 +57,23 @@ def wrapper(*args, **kwds):
5657
return wrapper
5758

5859

60+
# Prefer this function for tests that run in both ordered and unordered mode
61+
def assert_dfs_equivalent(
62+
pd_df: pd.DataFrame, bf_df: bigframes.pandas.DataFrame, **kwargs
63+
):
64+
bf_df_local = bf_df.to_pandas()
65+
ignore_order = not bf_df._session._strictly_ordered
66+
assert_pandas_df_equal(bf_df_local, pd_df, ignore_order=ignore_order, **kwargs)
67+
68+
69+
def assert_series_equivalent(
70+
pd_series: pd.Series, bf_series: bigframes.pandas.Series, **kwargs
71+
):
72+
bf_df_local = bf_series.to_pandas()
73+
ignore_order = not bf_series._session._strictly_ordered
74+
assert_series_equal(bf_df_local, pd_series, ignore_order=ignore_order, **kwargs)
75+
76+
5977
def assert_pandas_df_equal(df0, df1, ignore_order: bool = False, **kwargs):
6078
if ignore_order:
6179
# Sort by a column to get consistent results.

0 commit comments

Comments
 (0)