Skip to content

Commit 6d6b59b

Browse files
committed
testing how smart cursor is
1 parent 9735d67 commit 6d6b59b

File tree

14 files changed

+120
-93
lines changed

14 files changed

+120
-93
lines changed

tests/integ/modin/binary/test_binary_op.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2094,7 +2094,7 @@ def test_binary_add_dataframe_and_series_axis0(df, s):
20942094
snow_s = pd.Series(s)
20952095

20962096
# DataFrame <op> Series
2097-
with SqlCounter(query_count=1):
2097+
with SqlCounter(query_count=1, join_count=1):
20982098
ans = df.add(s, axis=0)
20992099
snow_ans = snow_df.add(snow_s, axis=0)
21002100

@@ -2160,7 +2160,7 @@ def test_binary_op_between_dataframe_and_series_axis0(opname, df, s):
21602160
snow_s = pd.Series(s)
21612161

21622162
# DataFrame <op> Series
2163-
with SqlCounter(query_count=1):
2163+
with SqlCounter(query_count=1, join_count=1):
21642164
ans = getattr(df, opname)(s, axis=0)
21652165
snow_ans = getattr(snow_df, opname)(snow_s, axis=0)
21662166

@@ -2385,12 +2385,12 @@ def test_binary_sub_dataframe_and_dataframe(df1, df2):
23852385
snow_df1 = pd.DataFrame(df1)
23862386
snow_df2 = pd.DataFrame(df2)
23872387

2388-
with SqlCounter(query_count=1):
2388+
with SqlCounter(query_count=1, join_count=1):
23892389
eval_snowpark_pandas_result(
23902390
(snow_df1, snow_df2), (df1, df2), lambda t: t[0] - t[1]
23912391
)
23922392

2393-
with SqlCounter(query_count=1):
2393+
with SqlCounter(query_count=1, join_count=1):
23942394
eval_snowpark_pandas_result(
23952395
(snow_df2, snow_df1), (df2, df1), lambda t: t[0] - t[1]
23962396
)
@@ -2402,14 +2402,14 @@ def test_binary_sub_dataframe_and_dataframe_with_fill_value(df1, df2, fill_value
24022402
snow_df1 = pd.DataFrame(df1)
24032403
snow_df2 = pd.DataFrame(df2)
24042404

2405-
with SqlCounter(query_count=1):
2405+
with SqlCounter(query_count=1, join_count=1):
24062406
eval_snowpark_pandas_result(
24072407
(snow_df1, snow_df2),
24082408
(df1, df2),
24092409
lambda t: t[0].sub(t[1], fill_value=fill_value),
24102410
)
24112411

2412-
with SqlCounter(query_count=1):
2412+
with SqlCounter(query_count=1, join_count=1):
24132413
eval_snowpark_pandas_result(
24142414
(snow_df2, snow_df1),
24152415
(df2, df1),

tests/integ/modin/frame/test_apply.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,12 @@ def test_axis_1_basic_types_without_type_hints(data, func, return_type):
9393
native_df = native_pd.DataFrame(data, columns=["A", "b"])
9494
snow_df = pd.DataFrame(data, columns=["A", "b"])
9595
# np.min is mapped to sql builtin function.
96-
with SqlCounter(query_count=1 if func == np.min else 5):
96+
query_count = 1 if func == np.min else 5
97+
join_count = 0 if func == np.min else 2
98+
udtf_count = 0 if func == np.min else 1
99+
with SqlCounter(
100+
query_count=query_count, join_count=join_count, udtf_count=udtf_count
101+
):
97102
eval_snowpark_pandas_result(snow_df, native_df, lambda x: x.apply(func, axis=1))
98103

99104

@@ -107,7 +112,7 @@ def test_axis_1_basic_types_with_type_hints(data, func, return_type):
107112
snow_df = pd.DataFrame(data, columns=["A", "b"])
108113
func_with_type_hint = create_func_with_return_type_hint(func, return_type)
109114
# Invoking a single UDF typically requires 3 queries (package management, code upload, UDF registration) upfront.
110-
with SqlCounter(query_count=4, join_count=0, udtf_count=0):
115+
with SqlCounter(query_count=4, join_count=0, udtf_count=0, udf_count=1):
111116
eval_snowpark_pandas_result(
112117
snow_df, native_df, lambda x: x.apply(func_with_type_hint, axis=1)
113118
)
@@ -144,7 +149,7 @@ def foo(row) -> str:
144149

145150
snow_df = pd.DataFrame(df)
146151
# Invoking a single UDF typically requires 3 queries (package management, code upload, UDF registration) upfront.
147-
with SqlCounter(query_count=4, join_count=0, udtf_count=0):
152+
with SqlCounter(query_count=4, join_count=0, udtf_count=0, udf_count=1):
148153
eval_snowpark_pandas_result(snow_df, df, lambda x: x.apply(foo, axis=1))
149154

150155

@@ -640,6 +645,8 @@ def test_basic_dataframe_transform(data, apply_func, expected_query_count):
640645
snow_df = pd.DataFrame(data)
641646
with SqlCounter(
642647
query_count=expected_query_count,
648+
join_count=5,
649+
udtf_count=3,
643650
high_count_expected=expected_query_count > 8,
644651
high_count_reason=msg,
645652
):
@@ -900,6 +907,7 @@ def test_apply_axis1_with_3rd_party_libraries_and_decorator(
900907

901908
with SqlCounter(
902909
query_count=expected_query_count,
910+
udf_count=1,
903911
high_count_expected=True,
904912
high_count_reason="Snowpark package upload requires many queries.",
905913
):

tests/integ/modin/frame/test_apply_axis_0.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ def test_axis_0_basic_types_without_type_hints(data, func, return_type):
8888
query_count=1 if func == np.min else 11,
8989
join_count=0 if func == np.min else 2,
9090
udtf_count=0 if func == np.min else 2,
91+
union_count=1,
9192
high_count_expected=func != np.min,
9293
high_count_reason="SNOW-1650644 & SNOW-1345395: Avoid extra caching and repeatedly creating same temp function",
9394
):
@@ -108,6 +109,7 @@ def test_axis_0_basic_types_with_type_hints(data, func, return_type):
108109
query_count=11,
109110
join_count=2,
110111
udtf_count=2,
112+
union_count=1,
111113
high_count_expected=True,
112114
high_count_reason="SNOW-1650644 & SNOW-1345395: Avoid extra caching and repeatedly creating same temp function",
113115
):
@@ -161,6 +163,7 @@ def foo(row) -> str:
161163
query_count=11,
162164
join_count=2,
163165
udtf_count=2,
166+
union_count=1,
164167
high_count_expected=True,
165168
high_count_reason="SNOW-1650644 & SNOW-1345395: Avoid extra caching and repeatedly creating same temp function",
166169
):

tests/integ/modin/frame/test_iloc.py

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -166,7 +166,12 @@ def test_df_row_return_dataframe(
166166
default_index_snowpark_pandas_df,
167167
default_index_native_df,
168168
):
169-
with SqlCounter(query_count=expected_query_count, join_count=expected_join_count):
169+
union_count = 1 if isinstance(key, (list, slice)) else 0
170+
with SqlCounter(
171+
query_count=expected_query_count,
172+
join_count=expected_join_count,
173+
union_count=union_count,
174+
):
170175
eval_snowpark_pandas_result(
171176
default_index_snowpark_pandas_df,
172177
default_index_native_df,
@@ -406,7 +411,7 @@ def test_df_iloc_get_empty_key(
406411
default_index_snowpark_pandas_df,
407412
default_index_native_df,
408413
):
409-
with SqlCounter(query_count=1):
414+
with SqlCounter(query_count=1, join_count=2):
410415
eval_snowpark_pandas_result(
411416
empty_snowpark_pandas_df,
412417
native_pd.DataFrame(),
@@ -416,7 +421,7 @@ def test_df_iloc_get_empty_key(
416421
# from native pandas (Snowpark pandas gives "empty" vs. native pandas "integer")
417422
check_column_type=False,
418423
)
419-
with SqlCounter(query_count=1):
424+
with SqlCounter(query_count=1, join_count=2):
420425
eval_snowpark_pandas_result(
421426
default_index_snowpark_pandas_df,
422427
default_index_native_df,
@@ -1079,7 +1084,7 @@ def iloc_helper(df):
10791084

10801085
# test df with default index
10811086
num_cols = 7
1082-
with SqlCounter(query_count=1):
1087+
with SqlCounter(query_count=1, union_count=1):
10831088
eval_snowpark_pandas_result(
10841089
default_index_snowpark_pandas_df,
10851090
default_index_native_df,
@@ -1089,7 +1094,7 @@ def iloc_helper(df):
10891094

10901095
# test df with non-default index
10911096
num_cols = 6 # set_index() makes the number of columns 6
1092-
with SqlCounter(query_count=1):
1097+
with SqlCounter(query_count=1, union_count=1):
10931098
eval_snowpark_pandas_result(
10941099
default_index_snowpark_pandas_df.set_index("D"),
10951100
default_index_native_df.set_index("D"),
@@ -1102,7 +1107,7 @@ def iloc_helper(df):
11021107
num_cols = 7
11031108
native_df = default_index_native_df.set_index(multiindex_native)
11041109
snowpark_df = pd.DataFrame(native_df)
1105-
with SqlCounter(query_count=1):
1110+
with SqlCounter(query_count=1, union_count=1):
11061111
eval_snowpark_pandas_result(
11071112
snowpark_df,
11081113
native_df,
@@ -1116,7 +1121,7 @@ def iloc_helper(df):
11161121
native_df_with_multiindex_columns
11171122
)
11181123
in_range = True if (-8 < key < 7) else False
1119-
with SqlCounter(query_count=1):
1124+
with SqlCounter(query_count=1, union_count=1):
11201125
if axis == "row" or in_range: # series result
11211126
eval_snowpark_pandas_result(
11221127
snowpark_df_with_multiindex_columns,
@@ -1138,7 +1143,7 @@ def iloc_helper(df):
11381143
# test df with MultiIndex on both index and columns
11391144
native_df = native_df_with_multiindex_columns.set_index(multiindex_native)
11401145
snowpark_df = pd.DataFrame(native_df)
1141-
with SqlCounter(query_count=1):
1146+
with SqlCounter(query_count=1, union_count=1):
11421147
if axis == "row" or in_range: # series result
11431148
eval_snowpark_pandas_result(
11441149
snowpark_df,

tests/integ/modin/frame/test_isin.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,8 @@ def _test_isin_with_snowflake_logic(df: Any, values, query_count=0): # noqa: E3
3838
# set expected query counts to 0 if native pandas is used.
3939
query_count = 0
4040

41-
with SqlCounter(query_count=query_count):
41+
join_count = 1 if isinstance(df, pd.DataFrame) else 0
42+
with SqlCounter(query_count=query_count, join_count=join_count):
4243
ans = df.isin(values)
4344

4445
# Following code is to emulate Snowflake behavior:

tests/integ/modin/frame/test_loc.py

Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,7 @@ def test_df_loc_get_tuple_key(
152152

153153
with SqlCounter(
154154
query_count=query_count,
155+
join_count=2,
155156
):
156157
eval_snowpark_pandas_result(
157158
str_index_snowpark_pandas_df,
@@ -314,15 +315,15 @@ def test_df_loc_get_empty_key(
314315
default_index_native_df,
315316
):
316317

317-
with SqlCounter(query_count=1):
318+
with SqlCounter(query_count=1, join_count=1):
318319
eval_snowpark_pandas_result(
319320
empty_snowpark_pandas_df,
320321
native_pd.DataFrame(),
321322
lambda df: df.loc[key],
322323
comparator=assert_snowpark_pandas_equal_to_pandas,
323324
check_column_type=False,
324325
)
325-
with SqlCounter(query_count=1):
326+
with SqlCounter(query_count=1, join_count=1):
326327
eval_snowpark_pandas_result(
327328
default_index_snowpark_pandas_df,
328329
default_index_native_df,
@@ -1913,6 +1914,7 @@ def _test_df_loc_with_1k_shape(
19131914
with SqlCounter(
19141915
query_count=query_count,
19151916
join_count=1,
1917+
union_count=1,
19161918
high_count_expected=high_count_expected,
19171919
high_count_reason=high_count_reason,
19181920
):
@@ -2575,7 +2577,7 @@ def test_empty_df_loc_set_scalar():
25752577
with pytest.raises(ValueError, match="cannot set a frame with no defined columns"):
25762578
native_df.loc[0] = 1
25772579

2578-
with SqlCounter(query_count=1):
2580+
with SqlCounter(query_count=1, join_count=1):
25792581
snow_df.loc[0] = 1
25802582
assert_snowpark_pandas_equal_to_pandas(
25812583
snow_df,
@@ -2603,23 +2605,23 @@ def col_loc(df):
26032605
native_df = native_pd.DataFrame(index=[0, 1, 2])
26042606
snow_df = pd.DataFrame(native_df)
26052607
# Check `loc` with row scalar on empty DataFrame with non-empty index.
2606-
with SqlCounter(query_count=1):
2608+
with SqlCounter(query_count=1, join_count=1):
26072609
eval_snowpark_pandas_result(
26082610
snow_df, native_df, row_loc, inplace=True, check_column_type=False
26092611
)
26102612

26112613
native_df = native_pd.DataFrame(index=[0, 1, 2])
26122614
snow_df = pd.DataFrame(native_df)
26132615
# Check `loc` with column scalar on empty DataFrame with non-empty index.
2614-
with SqlCounter(query_count=1):
2616+
with SqlCounter(query_count=1, join_count=1):
26152617
eval_snowpark_pandas_result(
26162618
snow_df, native_df, col_loc, inplace=True, check_column_type=False
26172619
)
26182620

26192621
native_df = native_pd.DataFrame(columns=["A", "B", "C"])
26202622
snow_df = pd.DataFrame(native_df)
26212623
# Check `loc` with row scalar on empty DataFrame with non-empty columns.
2622-
with SqlCounter(query_count=1):
2624+
with SqlCounter(query_count=1, join_count=1):
26232625
eval_snowpark_pandas_result(
26242626
snow_df,
26252627
native_df,
@@ -2630,7 +2632,7 @@ def col_loc(df):
26302632
native_df = native_pd.DataFrame(columns=["A", "B", "C"])
26312633
snow_df = pd.DataFrame(native_df)
26322634
# Check `loc` with column scalar on empty DataFrame with non-empty columns.
2633-
with SqlCounter(query_count=1):
2635+
with SqlCounter(query_count=1, join_count=1):
26342636
col_loc(snow_df)
26352637
assert_snowpark_pandas_equal_to_pandas(
26362638
snow_df,
@@ -2642,7 +2644,7 @@ def col_loc(df):
26422644
native_df = native_pd.DataFrame(index=[0, 1, 2], columns=["A", "B", "C"])
26432645
snow_df = pd.DataFrame(native_df)
26442646
# Check `loc` with row scalar on empty DataFrame with non-empty index and columns.
2645-
with SqlCounter(query_count=1):
2647+
with SqlCounter(query_count=1, join_count=1):
26462648
eval_snowpark_pandas_result(
26472649
snow_df,
26482650
native_df,
@@ -2653,7 +2655,7 @@ def col_loc(df):
26532655
native_df = native_pd.DataFrame(index=[0, 1, 2], columns=["A", "B", "C"])
26542656
snow_df = pd.DataFrame(native_df)
26552657
# Check `loc` with column scalar on empty DataFrame with non-empty index and columns.
2656-
with SqlCounter(query_count=1):
2658+
with SqlCounter(query_count=1, join_count=1):
26572659
eval_snowpark_pandas_result(
26582660
snow_df,
26592661
native_df,
@@ -3959,7 +3961,12 @@ def test_df_loc_get_with_timedelta(key, query_count, join_count):
39593961
idx = ["1 days", "2 days", "3 days"]
39603962
native_df = native_pd.DataFrame(data, index=native_pd.to_timedelta(idx))
39613963
snow_df = pd.DataFrame(data, index=pd.to_timedelta(idx))
3962-
with SqlCounter(query_count=query_count, join_count=join_count):
3964+
union_count = (
3965+
1 if isinstance(key, (list, slice)) and not isinstance(key, str) else 0
3966+
)
3967+
with SqlCounter(
3968+
query_count=query_count, join_count=join_count, union_count=union_count
3969+
):
39633970
eval_snowpark_pandas_result(snow_df, native_df, lambda df: df.loc[key])
39643971

39653972

tests/integ/modin/frame/test_nunique.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ def test_dataframe_nunique(axes_slices, dropna):
4242
]
4343
)
4444

45-
with SqlCounter(query_count=1):
45+
with SqlCounter(query_count=1, union_count=1 if axes_slices[0] == 0 else 0):
4646
eval_snowpark_pandas_result(
4747
df,
4848
native_df,

tests/integ/modin/frame/test_squeeze.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,15 +31,15 @@ def test_n_by_1(axis, dtype):
3131

3232
@pytest.mark.parametrize("dtype", ["int", "timedelta64[ns]"])
3333
def test_1_by_n(axis, dtype):
34-
with SqlCounter(query_count=1):
34+
with SqlCounter(query_count=1, union_count=1 if axis in (0, "index", None) else 0):
3535
eval_snowpark_pandas_result(
3636
*create_test_dfs({"a": [1], "b": [2], "c": [3]}, dtype=dtype),
3737
lambda df: df.squeeze(axis=axis),
3838
)
3939

4040

4141
def test_2d(axis):
42-
with SqlCounter(query_count=1):
42+
with SqlCounter(query_count=1, union_count=1 if axis in (0, "index", None) else 0):
4343
eval_snowpark_pandas_result(
4444
*create_test_dfs(
4545
{

0 commit comments

Comments
 (0)