33
33
import bigframes .pandas as bpd
34
34
import bigframes .series as series
35
35
from tests .system .utils import (
36
+ assert_dfs_equivalent ,
36
37
assert_pandas_df_equal ,
37
38
assert_series_equal ,
39
+ assert_series_equivalent ,
38
40
skip_legacy_pandas ,
39
41
)
40
42
@@ -75,7 +77,7 @@ def test_df_construct_large_strings():
75
77
pandas .testing .assert_frame_equal (bf_result , pd_result , check_index_type = False )
76
78
77
79
78
- def test_df_construct_pandas_load_job (scalars_dfs ):
80
+ def test_df_construct_pandas_load_job (scalars_dfs_maybe_ordered ):
79
81
# This should trigger the inlined codepath
80
82
columns = [
81
83
"int64_too" ,
@@ -91,10 +93,10 @@ def test_df_construct_pandas_load_job(scalars_dfs):
91
93
"timestamp_col" ,
92
94
"geography_col" ,
93
95
]
94
- _ , scalars_pandas_df = scalars_dfs
95
- bf_result = dataframe .DataFrame (scalars_pandas_df , columns = columns ). to_pandas ()
96
+ _ , scalars_pandas_df = scalars_dfs_maybe_ordered
97
+ bf_result = dataframe .DataFrame (scalars_pandas_df , columns = columns )
96
98
pd_result = pd .DataFrame (scalars_pandas_df , columns = columns )
97
- pandas . testing . assert_frame_equal ( bf_result , pd_result )
99
+ assert_dfs_equivalent ( pd_result , bf_result )
98
100
99
101
100
102
def test_df_construct_pandas_set_dtype (scalars_dfs ):
@@ -112,17 +114,17 @@ def test_df_construct_pandas_set_dtype(scalars_dfs):
112
114
pandas .testing .assert_frame_equal (bf_result , pd_result )
113
115
114
116
115
- def test_df_construct_from_series (scalars_dfs ):
116
- scalars_df , scalars_pandas_df = scalars_dfs
117
+ def test_df_construct_from_series (scalars_dfs_maybe_ordered ):
118
+ scalars_df , scalars_pandas_df = scalars_dfs_maybe_ordered
117
119
bf_result = dataframe .DataFrame (
118
120
{"a" : scalars_df ["int64_col" ], "b" : scalars_df ["string_col" ]},
119
121
dtype = "string[pyarrow]" ,
120
- ). to_pandas ()
122
+ )
121
123
pd_result = pd .DataFrame (
122
124
{"a" : scalars_pandas_df ["int64_col" ], "b" : scalars_pandas_df ["string_col" ]},
123
125
dtype = "string[pyarrow]" ,
124
126
)
125
- pandas . testing . assert_frame_equal ( bf_result , pd_result )
127
+ assert_dfs_equivalent ( pd_result , bf_result )
126
128
127
129
128
130
def test_df_construct_from_dict ():
@@ -505,8 +507,8 @@ def test_rename(scalars_dfs):
505
507
)
506
508
507
509
508
- def test_df_peek (scalars_dfs ):
509
- scalars_df , scalars_pandas_df = scalars_dfs
510
+ def test_df_peek (scalars_dfs_maybe_ordered ):
511
+ scalars_df , scalars_pandas_df = scalars_dfs_maybe_ordered
510
512
peek_result = scalars_df .peek (n = 3 , force = False )
511
513
pd .testing .assert_index_equal (scalars_pandas_df .columns , peek_result .columns )
512
514
assert len (peek_result ) == 3
@@ -1709,14 +1711,14 @@ def test_sort_index(scalars_dfs, ascending, na_position):
1709
1711
pandas .testing .assert_frame_equal (bf_result , pd_result )
1710
1712
1711
1713
1712
- def test_df_abs (scalars_dfs ):
1713
- scalars_df , scalars_pandas_df = scalars_dfs
1714
+ def test_df_abs (scalars_dfs_maybe_ordered ):
1715
+ scalars_df , scalars_pandas_df = scalars_dfs_maybe_ordered
1714
1716
columns = ["int64_col" , "int64_too" , "float64_col" ]
1715
1717
1716
- bf_result = scalars_df [columns ].abs (). to_pandas ()
1718
+ bf_result = scalars_df [columns ].abs ()
1717
1719
pd_result = scalars_pandas_df [columns ].abs ()
1718
1720
1719
- assert_pandas_df_equal ( bf_result , pd_result )
1721
+ assert_dfs_equivalent ( pd_result , bf_result )
1720
1722
1721
1723
1722
1724
def test_df_pos (scalars_dfs ):
@@ -2268,8 +2270,10 @@ def test_series_binop_add_different_table(
2268
2270
2269
2271
2270
2272
@all_joins
2271
- def test_join_same_table (scalars_dfs , how ):
2272
- bf_df , pd_df = scalars_dfs
2273
+ def test_join_same_table (scalars_dfs_maybe_ordered , how ):
2274
+ bf_df , pd_df = scalars_dfs_maybe_ordered
2275
+ if not bf_df ._session ._strictly_ordered and how == "cross" :
2276
+ pytest .skip ("Cross join not supported in unordered mode." )
2273
2277
2274
2278
bf_df_a = bf_df .set_index ("int64_too" )[["string_col" , "int64_col" ]]
2275
2279
bf_df_a = bf_df_a .sort_index ()
@@ -2503,7 +2507,7 @@ def test_dataframe_agg_int_single_string(scalars_dfs, agg):
2503
2507
)
2504
2508
2505
2509
2506
- def test_dataframe_agg_multi_string (scalars_dfs ):
2510
+ def test_dataframe_agg_multi_string (scalars_dfs_maybe_ordered ):
2507
2511
numeric_cols = ["int64_col" , "int64_too" , "float64_col" ]
2508
2512
aggregations = [
2509
2513
"sum" ,
@@ -2516,8 +2520,8 @@ def test_dataframe_agg_multi_string(scalars_dfs):
2516
2520
"nunique" ,
2517
2521
"count" ,
2518
2522
]
2519
- scalars_df , scalars_pandas_df = scalars_dfs
2520
- bf_result = scalars_df [numeric_cols ].agg (aggregations ). to_pandas ()
2523
+ scalars_df , scalars_pandas_df = scalars_dfs_maybe_ordered
2524
+ bf_result = scalars_df [numeric_cols ].agg (aggregations )
2521
2525
pd_result = scalars_pandas_df [numeric_cols ].agg (aggregations )
2522
2526
2523
2527
# Pandas may produce narrower numeric types, but bigframes always produces Float64
@@ -2528,7 +2532,7 @@ def test_dataframe_agg_multi_string(scalars_dfs):
2528
2532
bf_result = bf_result .drop (labels = ["median" ])
2529
2533
pd_result = pd_result .drop (labels = ["median" ])
2530
2534
2531
- pd . testing . assert_frame_equal (pd_result , bf_result , check_index_type = False )
2535
+ assert_dfs_equivalent (pd_result , bf_result , check_index_type = False )
2532
2536
2533
2537
# Double-check that median is at least plausible.
2534
2538
assert (
@@ -3205,13 +3209,6 @@ def test_dataframe_aggregate_bool(scalars_df_index, scalars_pandas_df_index, col
3205
3209
assert_series_equal (pd_result , bf_result , check_dtype = False , check_index_type = False )
3206
3210
3207
3211
3208
- @pytest .mark .parametrize (
3209
- ("ordered" ),
3210
- [
3211
- (True ),
3212
- (False ),
3213
- ],
3214
- )
3215
3212
@pytest .mark .parametrize (
3216
3213
("op" , "bf_dtype" ),
3217
3214
[
@@ -3226,12 +3223,11 @@ def test_dataframe_aggregate_bool(scalars_df_index, scalars_pandas_df_index, col
3226
3223
],
3227
3224
ids = ["sum" , "mean" , "min" , "max" , "std" , "var" , "count" , "nunique" ],
3228
3225
)
3229
- def test_dataframe_aggregates (
3230
- scalars_df_index , scalars_pandas_df_index , op , bf_dtype , ordered
3231
- ):
3226
+ def test_dataframe_aggregates (scalars_dfs_maybe_ordered , op , bf_dtype ):
3227
+ scalars_df_index , scalars_pandas_df_index = scalars_dfs_maybe_ordered
3232
3228
col_names = ["int64_too" , "float64_col" , "string_col" , "int64_col" , "bool_col" ]
3233
3229
bf_series = op (scalars_df_index [col_names ])
3234
- bf_result = bf_series . to_pandas ( ordered = ordered )
3230
+ bf_result = bf_series
3235
3231
pd_result = op (scalars_pandas_df_index [col_names ])
3236
3232
3237
3233
# Check dtype separately
@@ -3240,12 +3236,11 @@ def test_dataframe_aggregates(
3240
3236
# Pandas may produce narrower numeric types, but bigframes always produces Float64
3241
3237
# Pandas has object index type
3242
3238
pd_result .index = pd_result .index .astype ("string[pyarrow]" )
3243
- assert_series_equal (
3239
+ assert_series_equivalent (
3244
3240
pd_result ,
3245
3241
bf_result ,
3246
3242
check_dtype = False ,
3247
3243
check_index_type = False ,
3248
- ignore_order = not ordered ,
3249
3244
)
3250
3245
3251
3246
@@ -3597,16 +3592,17 @@ def test_df_rows_filter_regex(scalars_df_index, scalars_pandas_df_index):
3597
3592
)
3598
3593
3599
3594
3600
- def test_df_reindex_rows_list (scalars_df_index , scalars_pandas_df_index ):
3601
- bf_result = scalars_df_index .reindex (index = [5 , 1 , 3 , 99 , 1 ]).to_pandas ()
3595
+ def test_df_reindex_rows_list (scalars_dfs_maybe_ordered ):
3596
+ scalars_df_index , scalars_pandas_df_index = scalars_dfs_maybe_ordered
3597
+ bf_result = scalars_df_index .reindex (index = [5 , 1 , 3 , 99 , 1 ])
3602
3598
3603
3599
pd_result = scalars_pandas_df_index .reindex (index = [5 , 1 , 3 , 99 , 1 ])
3604
3600
3605
3601
# Pandas uses int64 instead of Int64 (nullable) dtype.
3606
3602
pd_result .index = pd_result .index .astype (pd .Int64Dtype ())
3607
- pd .testing .assert_frame_equal (
3608
- bf_result ,
3603
+ assert_dfs_equivalent (
3609
3604
pd_result ,
3605
+ bf_result ,
3610
3606
)
3611
3607
3612
3608
@@ -3861,7 +3857,8 @@ def test_loc_list_integer_index(scalars_df_index, scalars_pandas_df_index):
3861
3857
)
3862
3858
3863
3859
3864
- def test_loc_list_multiindex (scalars_df_index , scalars_pandas_df_index ):
3860
+ def test_loc_list_multiindex (scalars_dfs_maybe_ordered ):
3861
+ scalars_df_index , scalars_pandas_df_index = scalars_dfs_maybe_ordered
3865
3862
scalars_df_multiindex = scalars_df_index .set_index (["string_col" , "int64_col" ])
3866
3863
scalars_pandas_df_multiindex = scalars_pandas_df_index .set_index (
3867
3864
["string_col" , "int64_col" ]
@@ -3871,9 +3868,9 @@ def test_loc_list_multiindex(scalars_df_index, scalars_pandas_df_index):
3871
3868
bf_result = scalars_df_multiindex .loc [index_list ]
3872
3869
pd_result = scalars_pandas_df_multiindex .loc [index_list ]
3873
3870
3874
- pd .testing .assert_frame_equal (
3875
- bf_result .to_pandas (),
3871
+ assert_dfs_equivalent (
3876
3872
pd_result ,
3873
+ bf_result ,
3877
3874
)
3878
3875
3879
3876
0 commit comments