snowflakedb
diff --git a/‎tests/integ/modin/binary/test_binary_op.py‎
Lines changed: 8 additions & 8 deletions b/‎tests/integ/modin/binary/test_binary_op.py‎
Lines changed: 8 additions & 8 deletions
diff --git a/‎tests/integ/modin/frame/test_apply.py‎
Lines changed: 36 additions & 20 deletions b/‎tests/integ/modin/frame/test_apply.py‎
Lines changed: 36 additions & 20 deletions
diff --git a/‎tests/integ/modin/frame/test_apply_axis_0.py‎
Lines changed: 12 additions & 0 deletions b/‎tests/integ/modin/frame/test_apply_axis_0.py‎
Lines changed: 12 additions & 0 deletions
diff --git a/‎tests/integ/modin/frame/test_applymap.py‎
Lines changed: 1 addition & 1 deletion b/‎tests/integ/modin/frame/test_applymap.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎tests/integ/modin/frame/test_axis.py‎
Lines changed: 3 additions & 3 deletions b/‎tests/integ/modin/frame/test_axis.py‎
Lines changed: 3 additions & 3 deletions
@@ -2094,7 +2094,7 @@ def test_binary_add_dataframe_and_series_axis0(df, s):
     snow_s = pd.Series(s)
 
     # DataFrame <op> Series
-    with SqlCounter(query_count=1):
+    with SqlCounter(query_count=1, join_count=1):
         ans = df.add(s, axis=0)
         snow_ans = snow_df.add(snow_s, axis=0)
 
@@ -2160,7 +2160,7 @@ def test_binary_op_between_dataframe_and_series_axis0(opname, df, s):
     snow_s = pd.Series(s)
 
     # DataFrame <op> Series
-    with SqlCounter(query_count=1):
+    with SqlCounter(query_count=1, join_count=1):
         ans = getattr(df, opname)(s, axis=0)
         snow_ans = getattr(snow_df, opname)(snow_s, axis=0)
 
@@ -2385,12 +2385,12 @@ def test_binary_sub_dataframe_and_dataframe(df1, df2):
     snow_df1 = pd.DataFrame(df1)
     snow_df2 = pd.DataFrame(df2)
 
-    with SqlCounter(query_count=1):
+    with SqlCounter(query_count=1, join_count=1):
         eval_snowpark_pandas_result(
             (snow_df1, snow_df2), (df1, df2), lambda t: t[0] - t[1]
         )
 
-    with SqlCounter(query_count=1):
+    with SqlCounter(query_count=1, join_count=1):
         eval_snowpark_pandas_result(
             (snow_df2, snow_df1), (df2, df1), lambda t: t[0] - t[1]
         )
@@ -2402,14 +2402,14 @@ def test_binary_sub_dataframe_and_dataframe_with_fill_value(df1, df2, fill_value
     snow_df1 = pd.DataFrame(df1)
     snow_df2 = pd.DataFrame(df2)
 
-    with SqlCounter(query_count=1):
+    with SqlCounter(query_count=1, join_count=1):
         eval_snowpark_pandas_result(
             (snow_df1, snow_df2),
             (df1, df2),
             lambda t: t[0].sub(t[1], fill_value=fill_value),
         )
 
-    with SqlCounter(query_count=1):
+    with SqlCounter(query_count=1, join_count=1):
         eval_snowpark_pandas_result(
             (snow_df2, snow_df1),
             (df2, df1),
@@ -2508,14 +2508,14 @@ def test_binary_op_between_dataframe_and_dataframe_exhaustive(
     snow_df1 = pd.DataFrame(df1)
     snow_df2 = pd.DataFrame(df2)
 
-    with SqlCounter(query_count=1):
+    with SqlCounter(query_count=1, join_count=1):
         eval_snowpark_pandas_result(
             (snow_df1, snow_df2),
             (df1, df2),
             lambda t: getattr(t[0], opname)(t[1], fill_value=fill_value),
         )
 
-    with SqlCounter(query_count=1):
+    with SqlCounter(query_count=1, join_count=1):
         eval_snowpark_pandas_result(
             (snow_df2, snow_df1),
             (df2, df1),
 
@@ -93,7 +93,12 @@ def test_axis_1_basic_types_without_type_hints(data, func, return_type):
     native_df = native_pd.DataFrame(data, columns=["A", "b"])
     snow_df = pd.DataFrame(data, columns=["A", "b"])
     # np.min is mapped to sql builtin function.
-    with SqlCounter(query_count=1 if func == np.min else 5):
+    query_count = 1 if func == np.min else 5
+    join_count = 0 if func == np.min else 2
+    udtf_count = 0 if func == np.min else 1
+    with SqlCounter(
+        query_count=query_count, join_count=join_count, udtf_count=udtf_count
+    ):
         eval_snowpark_pandas_result(snow_df, native_df, lambda x: x.apply(func, axis=1))
 
 
@@ -107,7 +112,7 @@ def test_axis_1_basic_types_with_type_hints(data, func, return_type):
     snow_df = pd.DataFrame(data, columns=["A", "b"])
     func_with_type_hint = create_func_with_return_type_hint(func, return_type)
     #  Invoking a single UDF typically requires 3 queries (package management, code upload, UDF registration) upfront.
-    with SqlCounter(query_count=4, join_count=0, udtf_count=0):
+    with SqlCounter(query_count=4, join_count=0, udtf_count=0, udf_count=1):
         eval_snowpark_pandas_result(
             snow_df, native_df, lambda x: x.apply(func_with_type_hint, axis=1)
         )
@@ -144,7 +149,7 @@ def foo(row) -> str:
 
     snow_df = pd.DataFrame(df)
     #  Invoking a single UDF typically requires 3 queries (package management, code upload, UDF registration) upfront.
-    with SqlCounter(query_count=4, join_count=0, udtf_count=0):
+    with SqlCounter(query_count=4, join_count=0, udtf_count=0, udf_count=1):
         eval_snowpark_pandas_result(snow_df, df, lambda x: x.apply(foo, axis=1))
 
 
@@ -350,12 +355,12 @@ def f(x, y, z=1) -> int:
             assert_exception_equal=False,
         )
 
-    with SqlCounter(query_count=4):
+    with SqlCounter(query_count=4, udf_count=1):
         eval_snowpark_pandas_result(
             snow_df, native_df, lambda x: x.apply(f, axis=1, args=(1,))
         )
 
-    with SqlCounter(query_count=4):
+    with SqlCounter(query_count=4, udf_count=1):
         eval_snowpark_pandas_result(
             snow_df, native_df, lambda x: x.apply(f, axis=1, args=(1,), z=2)
         )
@@ -640,6 +645,12 @@ def test_basic_dataframe_transform(data, apply_func, expected_query_count):
         snow_df = pd.DataFrame(data)
         with SqlCounter(
             query_count=expected_query_count,
+            join_count=5
+            if expected_query_count == 16
+            else (3 if expected_query_count > 1 else 0),
+            udtf_count=3
+            if expected_query_count == 16
+            else (2 if expected_query_count > 1 else 0),
             high_count_expected=expected_query_count > 8,
             high_count_reason=msg,
         ):
@@ -900,6 +911,7 @@ def test_apply_axis1_with_3rd_party_libraries_and_decorator(
 
     with SqlCounter(
         query_count=expected_query_count,
+        udf_count=1,
         high_count_expected=True,
         high_count_reason="Snowpark package upload requires many queries.",
     ):
@@ -1052,6 +1064,7 @@ def f(s, default_arg=2):
 
     axis_0_no_cache_kwargs = {
         "query_count": 16,
+        "join_count": 5,
         "udtf_count": 3,
         "high_count_expected": True,
         "high_count_reason": "UDTF creation on multiple columns",
@@ -1065,13 +1078,13 @@ def f(s, default_arg=2):
         )
     # second application should trigger cache hit, even with explicit axis argument or via transform call
     # unclear why SQL counter still parses UDTF creations here despite having lower query counts
-    with SqlCounter(query_count=7, udtf_count=3):
+    with SqlCounter(query_count=7, join_count=5, udtf_count=3):
         eval_snowpark_pandas_result(
             snow_df,
             native_df,
             lambda df: df.transform(f),
         )
-    with SqlCounter(query_count=7, udtf_count=3):
+    with SqlCounter(query_count=7, join_count=5, udtf_count=3):
         eval_snowpark_pandas_result(
             snow_df,
             native_df,
@@ -1085,27 +1098,27 @@ def f(s, default_arg=2):
             lambda df: df.apply(f, default_arg=3),
         )
     # application on a new dataframe with the same schema should hit the cache
-    with SqlCounter(query_count=7, udtf_count=3):
+    with SqlCounter(query_count=7, join_count=5, udtf_count=3):
         eval_snowpark_pandas_result(
             *create_test_dfs(test_data),
             lambda df: df.transform(f),
         )
     # calling on axis=1 creates a new UDTF
-    with SqlCounter(query_count=5, udtf_count=1):
+    with SqlCounter(query_count=5, join_count=2, udtf_count=1):
         eval_snowpark_pandas_result(
             snow_df,
             native_df,
             lambda df: df.apply(f, axis=1),
         )
     # a second call with axis=1 does hit the cache (not sure why SQL counter registers a udtf creation)
-    with SqlCounter(query_count=2, udtf_count=1):
+    with SqlCounter(query_count=2, join_count=2, udtf_count=1):
         eval_snowpark_pandas_result(
             snow_df,
             native_df,
             lambda df: df.apply(f, axis=1),
         )
     # calling on axis=1 with different argument value does not hit the cache
-    with SqlCounter(query_count=5, udtf_count=1):
+    with SqlCounter(query_count=5, join_count=2, udtf_count=1):
         eval_snowpark_pandas_result(
             snow_df,
             native_df,
@@ -1128,6 +1141,7 @@ def f(s, default_arg=2):
     with SqlCounter(
         query_count=11,
         udtf_count=2,
+        join_count=3,
         high_count_expected=True,
         high_count_reason="UDTF creation on multiple columns",
     ):
@@ -1137,21 +1151,21 @@ def f(s, default_arg=2):
             lambda df: df.apply(f, axis=0),
         )
     # A second call hits the cache.
-    with SqlCounter(query_count=5, udtf_count=2):
+    with SqlCounter(query_count=5, udtf_count=2, join_count=3):
         eval_snowpark_pandas_result(
             snow_df,
             native_df,
             lambda df: df.apply(f, axis=0),
         )
     # The same rules apply with a different axis argument.
-    with SqlCounter(query_count=5, udtf_count=1):
+    with SqlCounter(query_count=5, udtf_count=1, join_count=2):
         eval_snowpark_pandas_result(
             snow_df,
             native_df,
             lambda df: df.apply(f, axis=1),
         )
     # A second call still does not hit the cache.
-    with SqlCounter(query_count=2, udtf_count=1):
+    with SqlCounter(query_count=2, udtf_count=1, join_count=2):
         eval_snowpark_pandas_result(
             snow_df,
             native_df,
@@ -1174,6 +1188,7 @@ def f(s, default_arg=2):
     with SqlCounter(
         query_count=11,
         udtf_count=2,
+        join_count=3,
         high_count_expected=True,
         high_count_reason="UDTF creation on multiple columns",
     ):
@@ -1183,21 +1198,21 @@ def f(s, default_arg=2):
             lambda df: df.apply(f, axis=0),
         )
     # A second call hits the cache.
-    with SqlCounter(query_count=5, udtf_count=2):
+    with SqlCounter(query_count=5, udtf_count=2, join_count=3):
         eval_snowpark_pandas_result(
             snow_df,
             native_df,
             lambda df: df.apply(f, axis=0),
         )
     # The same rules apply with a different axis argument.
-    with SqlCounter(query_count=5, udtf_count=1):
+    with SqlCounter(query_count=5, udtf_count=1, join_count=2):
         eval_snowpark_pandas_result(
             snow_df,
             native_df,
             lambda df: df.apply(f, axis=1),
         )
     # A second call hits the cache.
-    with SqlCounter(query_count=2, udtf_count=1):
+    with SqlCounter(query_count=2, udtf_count=1, join_count=2):
         eval_snowpark_pandas_result(
             snow_df,
             native_df,
@@ -1219,14 +1234,14 @@ def __init__(self) -> None:
     def operation(col, arg):
         return col + sum(arg.x)
 
-    with SqlCounter(query_count=6, udtf_count=1):
+    with SqlCounter(query_count=6, join_count=1, udtf_count=1):
         eval_snowpark_pandas_result(
             *create_test_dfs(test_data), lambda df: df.apply(operation, arg=arg)
         )
 
     # Mutate arg.x, preventing a cache entry from being created
     arg.x.append(10)
-    with SqlCounter(query_count=6, udtf_count=1):
+    with SqlCounter(query_count=6, join_count=1, udtf_count=1):
         eval_snowpark_pandas_result(
             *create_test_dfs(test_data), lambda df: df.apply(operation, arg=arg)
         )
@@ -1235,6 +1250,7 @@ def operation(col, arg):
     with SqlCounter(
         query_count=11,
         udtf_count=2,
+        join_count=2,
         high_count_expected=True,
         high_count_reason="multiple apply calls in sequence",
     ):
@@ -1251,7 +1267,7 @@ def operation(col, arg):
     # pickling creates different binary blobs.
     arg2 = A()
     arg2.x.append(10)
-    with SqlCounter(query_count=3, udtf_count=1):
+    with SqlCounter(query_count=3, join_count=1, udtf_count=1):
         eval_snowpark_pandas_result(
             *create_test_dfs(test_data), lambda df: df.apply(operation, arg=arg2)
         )
 
@@ -88,6 +88,7 @@ def test_axis_0_basic_types_without_type_hints(data, func, return_type):
         query_count=1 if func == np.min else 11,
         join_count=0 if func == np.min else 2,
         udtf_count=0 if func == np.min else 2,
+        union_count=0 if func == np.min else 1,
         high_count_expected=func != np.min,
         high_count_reason="SNOW-1650644 & SNOW-1345395: Avoid extra caching and repeatedly creating same temp function",
     ):
@@ -108,6 +109,7 @@ def test_axis_0_basic_types_with_type_hints(data, func, return_type):
         query_count=11,
         join_count=2,
         udtf_count=2,
+        union_count=1,
         high_count_expected=True,
         high_count_reason="SNOW-1650644 & SNOW-1345395: Avoid extra caching and repeatedly creating same temp function",
     ):
@@ -161,6 +163,7 @@ def foo(row) -> str:
         query_count=11,
         join_count=2,
         udtf_count=2,
+        union_count=1,
         high_count_expected=True,
         high_count_reason="SNOW-1650644 & SNOW-1345395: Avoid extra caching and repeatedly creating same temp function",
     ):
@@ -526,6 +529,9 @@ def f(x, y, z=1) -> int:
 
     with SqlCounter(
         query_count=11,
+        union_count=1,
+        join_count=2,
+        udtf_count=2,
         high_count_expected=True,
         high_count_reason="SNOW-1650644 & SNOW-1345395: Avoid extra caching and repeatedly creating same temp function",
     ):
@@ -535,6 +541,9 @@ def f(x, y, z=1) -> int:
 
     with SqlCounter(
         query_count=11,
+        union_count=1,
+        join_count=2,
+        udtf_count=2,
         high_count_expected=True,
         high_count_reason="SNOW-1650644 & SNOW-1345395: Avoid extra caching and repeatedly creating same temp function",
     ):
@@ -676,6 +685,9 @@ def test_apply_axis0_with_3rd_party_libraries_and_decorator(
 
     with SqlCounter(
         query_count=expected_query_count,
+        join_count=5,
+        udtf_count=5,
+        union_count=4,
         high_count_expected=True,
         high_count_reason="SNOW-1650644 & SNOW-1345395: Avoid extra caching and repeatedly creating same temp function",
     ):
 
@@ -211,7 +211,7 @@ def test_map_udf_caching():
         eval_snowpark_pandas_result(
             *create_test_dfs(test_data), lambda df: df.map(operation)
         )
-    with SqlCounter(query_count=1):
+    with SqlCounter(query_count=1, udf_count=1):
         # A second call to a frame with the same column signatures does not create any new UDFs.
         eval_snowpark_pandas_result(
             *create_test_dfs(test_data), lambda df: df.map(operation)
 
@@ -821,7 +821,7 @@ def test_df_set_axis_with_quoted_index():
     # check first that operation result is the same
     snow_df = pd.DataFrame(data)
     native_df = native_pd.DataFrame(data)
-    with SqlCounter(query_count=1):
+    with SqlCounter(query_count=1, join_count=1):
         eval_snowpark_pandas_result(snow_df, native_df, helper)
 
     # then, explicitly compare axes
@@ -830,10 +830,10 @@ def test_df_set_axis_with_quoted_index():
 
     native_ans = helper(native_df)
 
-    with SqlCounter(query_count=1):
+    with SqlCounter(query_count=1, join_count=1):
         assert_axes_result_equal(ans.axes, native_ans.axes)
 
     assert list(native_ans.index) == labels
     # extra query for tolist
-    with SqlCounter(query_count=2):
+    with SqlCounter(query_count=2, join_count=2):
         assert list(ans.index) == labels
Original file line number	Diff line number	Diff line change
`@@ -211,7 +211,7 @@ def test_map_udf_caching():`
`211`	`211`	`eval_snowpark_pandas_result(`
`212`	`212`	`*create_test_dfs(test_data), lambda df: df.map(operation)`
`213`	`213`	`)`
`214`		`- with SqlCounter(query_count=1):`
	`214`	`+ with SqlCounter(query_count=1, udf_count=1):`
`215`	`215`	`# A second call to a frame with the same column signatures does not create any new UDFs.`
`216`	`216`	`eval_snowpark_pandas_result(`
`217`	`217`	`*create_test_dfs(test_data), lambda df: df.map(operation)`