From 5296ddf176ec4432bdc2a96d909350c849bb96ed Mon Sep 17 00:00:00 2001
From: Rehan Durrani <rehan.durrani@snowflake.com>
Date: Thu, 12 Dec 2024 19:44:07 -0800
Subject: [PATCH 1/9] [SNOW-1705797]: Use cached metadata to make `repr` faster
 on simple DataFrames

---
 .../plugin/_internal/ordered_dataframe.py     | 39 +++++++++++++++++--
 1 file changed, 35 insertions(+), 4 deletions(-)

diff --git a/src/snowflake/snowpark/modin/plugin/_internal/ordered_dataframe.py b/src/snowflake/snowpark/modin/plugin/_internal/ordered_dataframe.py
index aa111b8a96..b62bad3710 100644
--- a/src/snowflake/snowpark/modin/plugin/_internal/ordered_dataframe.py
+++ b/src/snowflake/snowpark/modin/plugin/_internal/ordered_dataframe.py
@@ -421,10 +421,21 @@ def ensure_row_count_column(self) -> "OrderedDataFrame":
                 wrap_double_underscore=True,
             )[0]
         )
-        ordered_dataframe = self.select(
-            *self.projected_column_snowflake_quoted_identifiers,
-            count("*").over().as_(row_count_snowflake_quoted_identifier),
-        )
+        if not self.is_projection_of_table():
+            ordered_dataframe = self.select(
+                *self.projected_column_snowflake_quoted_identifiers,
+                count("*").over().as_(row_count_snowflake_quoted_identifier),
+            )
+        else:
+            from snowflake.snowpark.modin.plugin._internal.utils import pandas_lit
+
+            row_count = self.select(count("*").as_("__count_of_rows__"),).collect()[
+                0
+            ][0]
+            ordered_dataframe = self.select(
+                *self.projected_column_snowflake_quoted_identifiers,
+                pandas_lit(row_count).as_(row_count_snowflake_quoted_identifier),
+            )
         # inplace update so dataframe_ref can be shared. Note that we keep
         # the original ordering columns.
         ordered_dataframe.row_count_snowflake_quoted_identifier = (
@@ -2020,3 +2031,23 @@ def sample(self, n: Optional[int], frac: Optional[float]) -> "OrderedDataFrame":
                 ordering_columns=self.ordering_columns,
             )
         )
+
+    def is_projection_of_table(self) -> bool:
+        """
+        Return whether or not the current OrderedDataFrame is simply a projection of a table.
+
+        Returns:
+            bool
+            True if the current OrderedDataFrame is simply a projection of a table. False if it represents
+            a more complex operation.
+        """
+        # If we have only performed projections since creating this DataFrame, it will only contain
+        # 1 API call in the plan - either `Session.sql` for DataFrames based off of I/O operations
+        # e.g. `read_snowflake` or `read_csv`, or `Session.create_dataframe` for DataFrames created
+        # out of Python objects.
+        snowpark_df = self._dataframe_ref.snowpark_dataframe
+        snowpark_plan = snowpark_df._plan
+        return len(snowpark_plan.api_calls) == 1 and any(
+            accepted_api in snowpark_plan.api_calls[0]["name"]
+            for accepted_api in ["Session.sql", "Session.create_dataframe"]
+        )

From b0c271e5eed9c20bbb038553b63de0f75976d255 Mon Sep 17 00:00:00 2001
From: Rehan Durrani <rehan.durrani@snowflake.com>
Date: Thu, 12 Dec 2024 20:19:19 -0800
Subject: [PATCH 2/9] Use count correctly

---
 .../snowpark/modin/plugin/_internal/ordered_dataframe.py      | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/snowflake/snowpark/modin/plugin/_internal/ordered_dataframe.py b/src/snowflake/snowpark/modin/plugin/_internal/ordered_dataframe.py
index b62bad3710..650ceff90a 100644
--- a/src/snowflake/snowpark/modin/plugin/_internal/ordered_dataframe.py
+++ b/src/snowflake/snowpark/modin/plugin/_internal/ordered_dataframe.py
@@ -429,9 +429,7 @@ def ensure_row_count_column(self) -> "OrderedDataFrame":
         else:
             from snowflake.snowpark.modin.plugin._internal.utils import pandas_lit
 
-            row_count = self.select(count("*").as_("__count_of_rows__"),).collect()[
-                0
-            ][0]
+            row_count = self._dataframe_ref.snowpark_dataframe.count()
             ordered_dataframe = self.select(
                 *self.projected_column_snowflake_quoted_identifiers,
                 pandas_lit(row_count).as_(row_count_snowflake_quoted_identifier),

From 64359f6254b6ba4092bb7b4be4aa5441bdf27123 Mon Sep 17 00:00:00 2001
From: Rehan Durrani <rehan.durrani@snowflake.com>
Date: Tue, 17 Dec 2024 15:30:45 -0800
Subject: [PATCH 3/9] Fix first 10 files' query counts

---
 tests/integ/modin/frame/test_add_suffix.py    |   2 +-
 tests/integ/modin/frame/test_aggregate.py     |   2 +-
 tests/integ/modin/frame/test_attrs.py         |   2 +-
 tests/integ/modin/frame/test_empty.py         |   2 +-
 tests/integ/modin/frame/test_getitem.py       |  27 ++--
 tests/integ/modin/frame/test_head_tail.py     |  60 ++++-----
 tests/integ/modin/frame/test_iat.py           | 119 ++++++++++--------
 tests/integ/modin/frame/test_insert.py        |  54 ++++----
 tests/integ/modin/test_chained_operations.py  |   2 +-
 .../integ/modin/test_from_pandas_to_pandas.py |   2 +-
 tests/integ/modin/test_numpy.py               |   6 +-
 11 files changed, 148 insertions(+), 130 deletions(-)

diff --git a/tests/integ/modin/frame/test_add_suffix.py b/tests/integ/modin/frame/test_add_suffix.py
index 3cb338ca6d..5b6c2a91f7 100644
--- a/tests/integ/modin/frame/test_add_suffix.py
+++ b/tests/integ/modin/frame/test_add_suffix.py
@@ -71,7 +71,7 @@ def test_df_add_suffix_snowpark_pandas_series(
     )
 
 
-@sql_count_checker(query_count=2)
+@sql_count_checker(query_count=3)
 def test_df_add_prefix_snowpark_pandas_df(
     default_index_snowpark_pandas_df, default_index_native_df
 ):
diff --git a/tests/integ/modin/frame/test_aggregate.py b/tests/integ/modin/frame/test_aggregate.py
index 7003c802d5..4a14241237 100644
--- a/tests/integ/modin/frame/test_aggregate.py
+++ b/tests/integ/modin/frame/test_aggregate.py
@@ -170,7 +170,7 @@ def test_string_sum(data, numeric_only_kwargs):
     )
 
 
-@sql_count_checker(query_count=1)
+@sql_count_checker(query_count=2)
 def test_string_sum_of_reversed_df():
     # check that we get the string concatenation right even when the dataframe
     # is not in its original order.
diff --git a/tests/integ/modin/frame/test_attrs.py b/tests/integ/modin/frame/test_attrs.py
index bfa972d16a..b89bd7d8d4 100644
--- a/tests/integ/modin/frame/test_attrs.py
+++ b/tests/integ/modin/frame/test_attrs.py
@@ -68,7 +68,7 @@ def func(df):
 
 # Tests that attrs is preserved across `take`, a unary operation that returns a Snowpark pandas object.
 # Other unary operators are checked by other tests in the `eval_snowpark_pandas_result` method.
-@sql_count_checker(query_count=0)
+@sql_count_checker(query_count=1)
 def test_df_attrs_take():
     def func(df):
         df.attrs = {"A": [1], "B": "check me"}
diff --git a/tests/integ/modin/frame/test_empty.py b/tests/integ/modin/frame/test_empty.py
index 9eaecc36dd..842d072064 100644
--- a/tests/integ/modin/frame/test_empty.py
+++ b/tests/integ/modin/frame/test_empty.py
@@ -27,7 +27,7 @@
         ({"A": [np.nan]}, "np nan column"),
     ],
 )
-@sql_count_checker(query_count=1)
+@sql_count_checker(query_count=2)
 def test_dataframe_empty_param(dataframe_input, test_case_name):
     eval_snowpark_pandas_result(
         pd.DataFrame(dataframe_input),
diff --git a/tests/integ/modin/frame/test_getitem.py b/tests/integ/modin/frame/test_getitem.py
index e1f676b4ff..abf9e9834f 100644
--- a/tests/integ/modin/frame/test_getitem.py
+++ b/tests/integ/modin/frame/test_getitem.py
@@ -85,7 +85,7 @@ def get_helper(df):
             return df[key]
 
     # 5 extra queries for iter
-    with SqlCounter(query_count=6 if isinstance(key, native_pd.Index) else 1):
+    with SqlCounter(query_count=7 if isinstance(key, native_pd.Index) else 1):
         eval_snowpark_pandas_result(
             default_index_snowpark_pandas_df,
             default_index_native_df,
@@ -120,7 +120,7 @@ def get_helper(df):
     snowpark_df = pd.DataFrame(native_df)
 
     # 5 extra queries for iter
-    with SqlCounter(query_count=6 if isinstance(key, native_pd.Index) else 1):
+    with SqlCounter(query_count=7 if isinstance(key, native_pd.Index) else 1):
         eval_snowpark_pandas_result(
             snowpark_df,
             native_df,
@@ -321,15 +321,19 @@ def test_df_getitem_calls_getitem():
         slice(-100, None, -2),
     ],
 )
-@sql_count_checker(query_count=1)
 def test_df_getitem_with_slice(
     key, default_index_snowpark_pandas_df, default_index_native_df
 ):
-    eval_snowpark_pandas_result(
-        default_index_snowpark_pandas_df,
-        default_index_native_df,
-        lambda df: df[key],
-    )
+    if key.start is None:
+        expected_query_count = 1
+    else:
+        expected_query_count = 2
+    with SqlCounter(query_count=expected_query_count):
+        eval_snowpark_pandas_result(
+            default_index_snowpark_pandas_df,
+            default_index_native_df,
+            lambda df: df[key],
+        )
 
 
 @pytest.mark.parametrize(
@@ -364,6 +368,7 @@ def test_df_getitem_with_non_int_slice(key):
 def test_df_getitem_with_multiindex(
     key, default_index_native_df, multiindex_native, native_df_with_multiindex_columns
 ):
+    expected_query_count = 2 if isinstance(key, slice) else 1
     # Test __getitem__ with df with MultiIndex index.
     native_df = default_index_native_df.set_index(multiindex_native)
     snowpark_df = pd.DataFrame(native_df)
@@ -377,13 +382,13 @@ def test_df_getitem_with_multiindex(
         )
         else _key
     )
-    with SqlCounter(query_count=1):
+    with SqlCounter(query_count=expected_query_count):
         eval_snowpark_pandas_result(snowpark_df, native_df, lambda df: df[_key])
 
     # Test __getitem__ with df with MultiIndex columns.
     native_df = native_df_with_multiindex_columns
     snowpark_df = pd.DataFrame(native_df)
-    with SqlCounter(query_count=1):
+    with SqlCounter(query_count=expected_query_count):
         eval_snowpark_pandas_result(
             snowpark_df, native_df, lambda df: df[key], check_column_type=False
         )
@@ -391,7 +396,7 @@ def test_df_getitem_with_multiindex(
     # Test __getitem__ with df with MultiIndex index.
     native_df = native_df_with_multiindex_columns.set_index(multiindex_native)
     snowpark_df = pd.DataFrame(native_df)
-    with SqlCounter(query_count=1):
+    with SqlCounter(query_count=expected_query_count):
         eval_snowpark_pandas_result(
             snowpark_df, native_df, lambda df: df[key], check_column_type=False
         )
diff --git a/tests/integ/modin/frame/test_head_tail.py b/tests/integ/modin/frame/test_head_tail.py
index 231bec17ca..2f38bb1d76 100644
--- a/tests/integ/modin/frame/test_head_tail.py
+++ b/tests/integ/modin/frame/test_head_tail.py
@@ -13,7 +13,7 @@
     assert_snowpark_pandas_equals_to_pandas_without_dtypecheck,
     eval_snowpark_pandas_result,
 )
-from tests.integ.utils.sql_counter import sql_count_checker
+from tests.integ.utils.sql_counter import SqlCounter
 
 
 def eval_result_and_query_with_no_join(
@@ -34,41 +34,43 @@ def eval_result_and_query_with_no_join(
     "n",
     [1, None, 0, -1, -10, 5, 10],
 )
-@sql_count_checker(query_count=2)
 def test_head_tail(n, default_index_snowpark_pandas_df, default_index_native_df):
-    eval_snowpark_pandas_result(
-        default_index_snowpark_pandas_df,
-        default_index_native_df,
-        lambda df: (df.head() if n is None else df.head(n)),
-        comparator=eval_result_and_query_with_no_join,
-    )
+    expected_query_count = 2 if n == 0 else 3
+    with SqlCounter(query_count=expected_query_count):
+        eval_snowpark_pandas_result(
+            default_index_snowpark_pandas_df,
+            default_index_native_df,
+            lambda df: (df.head() if n is None else df.head(n)),
+            comparator=eval_result_and_query_with_no_join,
+        )
 
-    eval_snowpark_pandas_result(
-        default_index_snowpark_pandas_df,
-        default_index_native_df,
-        lambda df: (df.tail() if n is None else df.tail(n)),
-        comparator=eval_result_and_query_with_no_join,
-    )
+        eval_snowpark_pandas_result(
+            default_index_snowpark_pandas_df,
+            default_index_native_df,
+            lambda df: (df.tail() if n is None else df.tail(n)),
+            comparator=eval_result_and_query_with_no_join,
+        )
 
 
 @pytest.mark.parametrize(
     "n",
     [1, None, 0, -1, -10, 5, 10],
 )
-@sql_count_checker(query_count=2)
 def test_empty_dataframe(n, empty_snowpark_pandas_df):
-    eval_snowpark_pandas_result(
-        empty_snowpark_pandas_df,
-        native_pd.DataFrame(),
-        lambda df: (df.head() if n is None else df.head(n)),
-        comparator=eval_result_and_query_with_no_join,
-        check_column_type=False,
-    )
+    expected_query_count = 2 if n == 0 else 3
+    with SqlCounter(query_count=expected_query_count):
+        eval_snowpark_pandas_result(
+            empty_snowpark_pandas_df,
+            native_pd.DataFrame(),
+            lambda df: (df.head() if n is None else df.head(n)),
+            comparator=eval_result_and_query_with_no_join,
+            check_column_type=False,
+        )
 
-    eval_snowpark_pandas_result(
-        empty_snowpark_pandas_df,
-        native_pd.DataFrame(),
-        lambda df: (df.tail() if n is None else df.tail(n)),
-        comparator=eval_result_and_query_with_no_join,
-        check_column_type=False,
-    )
+        eval_snowpark_pandas_result(
+            empty_snowpark_pandas_df,
+            native_pd.DataFrame(),
+            lambda df: (df.tail() if n is None else df.tail(n)),
+            comparator=eval_result_and_query_with_no_join,
+            check_column_type=False,
+        )
diff --git a/tests/integ/modin/frame/test_iat.py b/tests/integ/modin/frame/test_iat.py
index 4f2c685e89..a1c42540dc 100644
--- a/tests/integ/modin/frame/test_iat.py
+++ b/tests/integ/modin/frame/test_iat.py
@@ -7,25 +7,29 @@
 from pandas.errors import IndexingError
 
 from tests.integ.modin.utils import eval_snowpark_pandas_result  # noqa: F401
-from tests.integ.utils.sql_counter import sql_count_checker
+from tests.integ.utils.sql_counter import sql_count_checker, SqlCounter
 
 
 @pytest.mark.parametrize(
-    "key",
+    "key, expected_query_count",
     [
-        (0, 0),
-        (0, -7),
-        (-7, 0),
-        (-7, -7),
+        ((0, 0), 1),
+        ((0, -7), 1),
+        ((-7, 0), 2),
+        ((-7, -7), 2),
     ],
 )
-@sql_count_checker(query_count=1)
 def test_iat_get_default_index_str_columns(
     key,
+    expected_query_count,
     default_index_snowpark_pandas_df,
     default_index_native_df,
 ):
-    assert default_index_snowpark_pandas_df.iat[key] == default_index_native_df.iat[key]
+    with SqlCounter(query_count=expected_query_count):
+        assert (
+            default_index_snowpark_pandas_df.iat[key]
+            == default_index_native_df.iat[key]
+        )
 
 
 @pytest.mark.parametrize(
@@ -37,7 +41,7 @@ def test_iat_get_default_index_str_columns(
         (-7, -7),
     ],
 )
-@sql_count_checker(query_count=1, join_count=2)
+@sql_count_checker(query_count=2, join_count=2)
 def test_iat_set_default_index_str_columns(
     key,
     default_index_snowpark_pandas_df,
@@ -55,21 +59,22 @@ def iat_set_helper(df):
 
 
 @pytest.mark.parametrize(
-    "key",
+    "key, expected_query_count",
     [
-        (0, 0),
-        (0, -7),
-        (-7, 0),
-        (-7, -7),
+        ((0, 0), 1),
+        ((0, -7), 1),
+        ((-7, 0), 2),
+        ((-7, -7), 2),
     ],
 )
-@sql_count_checker(query_count=1)
 def test_iat_get_str_index_str_columns(
     key,
+    expected_query_count,
     str_index_snowpark_pandas_df,
     str_index_native_df,
 ):
-    assert str_index_snowpark_pandas_df.iat[key] == str_index_native_df.iat[key]
+    with SqlCounter(query_count=expected_query_count):
+        assert str_index_snowpark_pandas_df.iat[key] == str_index_native_df.iat[key]
 
 
 @pytest.mark.parametrize(
@@ -81,7 +86,7 @@ def test_iat_get_str_index_str_columns(
         (-7, -7),
     ],
 )
-@sql_count_checker(query_count=1, join_count=2)
+@sql_count_checker(query_count=2, join_count=2)
 def test_iat_set_str_index_str_columns(
     key,
     str_index_snowpark_pandas_df,
@@ -96,21 +101,22 @@ def iat_set_helper(df):
 
 
 @pytest.mark.parametrize(
-    "key",
+    "key, expected_query_count",
     [
-        (0, 0),
-        (0, -7),
-        (-7, 0),
-        (-7, -7),
+        ((0, 0), 1),
+        ((0, -7), 1),
+        ((-7, 0), 2),
+        ((-7, -7), 2),
     ],
 )
-@sql_count_checker(query_count=1)
 def test_iat_get_time_index_time_columns(
     key,
+    expected_query_count,
     time_index_snowpark_pandas_df,
     time_index_native_df,
 ):
-    assert time_index_snowpark_pandas_df.iat[key] == time_index_native_df.iat[key]
+    with SqlCounter(query_count=expected_query_count):
+        assert time_index_snowpark_pandas_df.iat[key] == time_index_native_df.iat[key]
 
 
 @pytest.mark.parametrize(
@@ -122,7 +128,7 @@ def test_iat_get_time_index_time_columns(
         (-7, -7),
     ],
 )
-@sql_count_checker(query_count=1, join_count=2)
+@sql_count_checker(query_count=2, join_count=2)
 def test_iat_set_time_index_time_columns(
     key,
     time_index_snowpark_pandas_df,
@@ -140,23 +146,24 @@ def iat_set_helper(df):
 
 
 @pytest.mark.parametrize(
-    "key",
+    "key, expected_query_count",
     [
-        (0, 0),
-        (0, -7),
-        (-7, 0),
-        (-7, -7),
+        ((0, 0), 1),
+        ((0, -7), 1),
+        ((-7, 0), 2),
+        ((-7, -7), 2),
     ],
 )
-@sql_count_checker(query_count=1)
 def test_iat_get_multiindex_index_str_columns(
     key,
+    expected_query_count,
     default_index_native_df,
     multiindex_native,
 ):
-    native_df = default_index_native_df.set_index(multiindex_native)
-    snowpark_df = pd.DataFrame(native_df)
-    assert snowpark_df.iat[key] == native_df.iat[key]
+    with SqlCounter(query_count=expected_query_count):
+        native_df = default_index_native_df.set_index(multiindex_native)
+        snowpark_df = pd.DataFrame(native_df)
+        assert snowpark_df.iat[key] == native_df.iat[key]
 
 
 @pytest.mark.parametrize(
@@ -168,7 +175,7 @@ def test_iat_get_multiindex_index_str_columns(
         (-7, -7),
     ],
 )
-@sql_count_checker(query_count=1, join_count=2)
+@sql_count_checker(query_count=2, join_count=2)
 def test_iat_set_multiindex_index_str_columns(
     key,
     default_index_native_df,
@@ -183,22 +190,23 @@ def at_set_helper(df):
 
 
 @pytest.mark.parametrize(
-    "key",
+    "key, expected_query_count",
     [
-        (0, 0),
-        (0, -7),
-        (-7, 0),
-        (-7, -7),
+        ((0, 0), 1),
+        ((0, -7), 1),
+        ((-7, 0), 2),
+        ((-7, -7), 2),
     ],
 )
-@sql_count_checker(query_count=1)
 def test_iat_get_default_index_multiindex_columns(
     key,
+    expected_query_count,
     native_df_with_multiindex_columns,
 ):
-    native_df = native_df_with_multiindex_columns
-    snowpark_df = pd.DataFrame(native_df)
-    assert snowpark_df.iat[key] == native_df.iat[key]
+    with SqlCounter(query_count=expected_query_count):
+        native_df = native_df_with_multiindex_columns
+        snowpark_df = pd.DataFrame(native_df)
+        assert snowpark_df.iat[key] == native_df.iat[key]
 
 
 @pytest.mark.parametrize(
@@ -210,7 +218,7 @@ def test_iat_get_default_index_multiindex_columns(
         (-7, -7),
     ],
 )
-@sql_count_checker(query_count=1, join_count=2)
+@sql_count_checker(query_count=2, join_count=2)
 def test_iat_set_default_index_multiindex_columns(
     key,
     native_df_with_multiindex_columns,
@@ -224,23 +232,24 @@ def at_set_helper(df):
 
 
 @pytest.mark.parametrize(
-    "key",
+    "key, expected_query_count",
     [
-        (0, 0),
-        (0, -7),
-        (-7, 0),
-        (-7, -7),
+        ((0, 0), 1),
+        ((0, -7), 1),
+        ((-7, 0), 2),
+        ((-7, -7), 2),
     ],
 )
-@sql_count_checker(query_count=1)
 def test_iat_get_multiindex_index_multiindex_columns(
     key,
+    expected_query_count,
     native_df_with_multiindex_columns,
     multiindex_native,
 ):
-    native_df = native_df_with_multiindex_columns.set_index(multiindex_native)
-    snowpark_df = pd.DataFrame(native_df)
-    assert snowpark_df.iat[key] == native_df.iat[key]
+    with SqlCounter(query_count=expected_query_count):
+        native_df = native_df_with_multiindex_columns.set_index(multiindex_native)
+        snowpark_df = pd.DataFrame(native_df)
+        assert snowpark_df.iat[key] == native_df.iat[key]
 
 
 @pytest.mark.parametrize(
@@ -252,7 +261,7 @@ def test_iat_get_multiindex_index_multiindex_columns(
         (-7, -7),
     ],
 )
-@sql_count_checker(query_count=1, join_count=2)
+@sql_count_checker(query_count=2, join_count=2)
 def test_iat_set_multiindex_index_multiindex_columns(
     key,
     native_df_with_multiindex_columns,
diff --git a/tests/integ/modin/frame/test_insert.py b/tests/integ/modin/frame/test_insert.py
index 974823d02c..f9a421323e 100644
--- a/tests/integ/modin/frame/test_insert.py
+++ b/tests/integ/modin/frame/test_insert.py
@@ -54,7 +54,7 @@ def native_df():
         ),
     ],
 )
-@sql_count_checker(query_count=5, join_count=3)
+@sql_count_checker(query_count=6, join_count=3)
 def test_insert_snowpark_pandas_objects(native_df, native_value):
     snow_df = pd.DataFrame(native_df)
     value = pd.DataFrame(native_value)
@@ -99,7 +99,7 @@ def test_insert_snowpark_pandas_objects(native_df, native_value):
         ),
     ],
 )
-@sql_count_checker(query_count=2, join_count=1)
+@sql_count_checker(query_count=3, join_count=1)
 def test_insert_one_to_many(native_df, native_value):
     snow_df = pd.DataFrame(native_df)
     value = pd.DataFrame(native_value)
@@ -128,11 +128,11 @@ def test_insert_one_to_many(native_df, native_value):
 @pytest.mark.parametrize(
     "value, expected_query_count, expected_join_count",
     [
-        (np.array(["a", "b", "c"]), 2, 1),  # numpy array of shape (N,)
-        (np.array([["a"], ["b"], ["c"]]), 2, 1),  # numpy array of shape (N, 1)
-        (["a", "b", "c"], 2, 1),  # python list
+        (np.array(["a", "b", "c"]), 3, 1),  # numpy array of shape (N,)
+        (np.array([["a"], ["b"], ["c"]]), 3, 1),  # numpy array of shape (N, 1)
+        (["a", "b", "c"], 3, 1),  # python list
         ({0: 1, 1: 2, 4: 3}, 1, 1),  # python dict
-        (("a", "b", "c"), 2, 1),  # python tuple
+        (("a", "b", "c"), 3, 1),  # python tuple
     ],
 )
 def test_insert_array_like(native_df, value, expected_query_count, expected_join_count):
@@ -187,7 +187,7 @@ def test_insert_pandas_types_negative(snow_df):
         snow_df.insert(0, "col3", value)
 
 
-@sql_count_checker(query_count=2)
+@sql_count_checker(query_count=3)
 def test_insert_dataframe_shape_negative(native_df):
     # DataFrame with more than one column
     snow_df = pd.DataFrame(native_df)
@@ -209,10 +209,10 @@ def test_insert_dataframe_shape_negative(native_df):
     [
         # NOTE: Accepted numpy array shapes are (N,) or (N, 1) where N = number of rows = 3
         (np.ones((3, 2)), 0),
-        (np.ones((6, 1)), 1),
-        (np.ones((1, 1)), 1),
-        ([1, 2], 1),  # len < number of rows
-        ((6, 7, 8, 9), 1),  # len > number of rows
+        (np.ones((6, 1)), 2),
+        (np.ones((1, 1)), 2),
+        ([1, 2], 2),  # len < number of rows
+        ((6, 7, 8, 9), 2),  # len > number of rows
         ({"a", "b", "c"}, 0),  # python set
     ],
 )
@@ -227,7 +227,7 @@ def test_insert_value_negative(native_df, value, expected_query_count):
         )
 
 
-@sql_count_checker(query_count=2, join_count=1)
+@sql_count_checker(query_count=3, join_count=1)
 def test_insert_duplicate_label(native_df):
     snow_df = pd.DataFrame(native_df)
     eval_snowpark_pandas_result(
@@ -250,7 +250,7 @@ def test_insert_duplicate_label_negative(native_df):
 
 
 @pytest.mark.parametrize("loc", [0, 1, 2])
-@sql_count_checker(query_count=2, join_count=1)
+@sql_count_checker(query_count=3, join_count=1)
 def test_insert_loc(native_df, loc):
     snow_df = pd.DataFrame(native_df)
     eval_snowpark_pandas_result(
@@ -262,7 +262,7 @@ def test_insert_loc(native_df, loc):
 
 
 @pytest.mark.parametrize(
-    "loc, expected_query_count", [(-99, 1), (-1, 1), (99, 1), ("1", 0)]
+    "loc, expected_query_count", [(-99, 2), (-1, 2), (99, 2), ("1", 0)]
 )
 def test_insert_loc_negative(native_df, loc, expected_query_count):
     with SqlCounter(query_count=expected_query_count):
@@ -278,10 +278,10 @@ def test_insert_loc_negative(native_df, loc, expected_query_count):
 @pytest.mark.parametrize(
     "value, expected_query_count, expected_join_count",
     [
-        (np.array(["a", "b", "c", "d"]), 2, 1),  # numpy array of shape (N,)
-        (np.array([["a"], ["b"], ["c"], ["d"]]), 2, 1),  # numpy array of shape (N, 1)
-        (["a", "b", "c", "d"], 2, 1),  # python list
-        (("a", "b", "c", "d"), 2, 1),  # python tuple
+        (np.array(["a", "b", "c", "d"]), 3, 1),  # numpy array of shape (N,)
+        (np.array([["a"], ["b"], ["c"], ["d"]]), 3, 1),  # numpy array of shape (N, 1)
+        (["a", "b", "c", "d"], 3, 1),  # python list
+        (("a", "b", "c", "d"), 3, 1),  # python tuple
         ({(3, 1): 1}, 1, 1),  # python dict
         ("abc", 1, 0),  # sting scalar
         (1, 1, 0),  # int scalar
@@ -311,7 +311,7 @@ def test_insert_multiindex_array_like_and_scalar(
         ("a", "b", "c", "d"),  # python tuple
     ],
 )
-@sql_count_checker(query_count=2, join_count=1)
+@sql_count_checker(query_count=3, join_count=1)
 def test_insert_empty_multiindex_frame(value):
     mi = pd.MultiIndex.from_arrays([np.array([], dtype=int), np.array([], dtype=int)])
     snow_df = pd.DataFrame([], index=mi)
@@ -352,7 +352,7 @@ def test_insert_multiindex_dict_negative():
         ([1.0, 2.5, 3.0], [1, 2, 3]),  # Long and Double can be joined
     ],
 )
-@sql_count_checker(query_count=4, join_count=1)
+@sql_count_checker(query_count=5, join_count=1)
 def test_insert_compatible_index(df_index, value_index):
     snow_df = pd.DataFrame({"col1": ["p", "q", "r"]}, index=native_pd.Index(df_index))
     value = pd.DataFrame({"col2": ["x", "y", "z"]}, index=native_pd.Index(value_index))
@@ -383,7 +383,7 @@ def test_insert_compatible_index(df_index, value_index):
         ),  # length and type mismatch
     ],
 )
-@sql_count_checker(query_count=1)
+@sql_count_checker(query_count=2)
 def test_insert_index_num_levels_mismatch_negative(df_index, value_index):
     snow_df = pd.DataFrame({"col1": ["p", "q", "r"]}, index=native_pd.Index(df_index))
     value = pd.DataFrame({"col2": ["w", "x", "y"]}, index=native_pd.Index(value_index))
@@ -408,7 +408,7 @@ def test_insert_index_num_levels_mismatch_negative(df_index, value_index):
         ),  # type mismatch boolean != long
     ],
 )
-@sql_count_checker(query_count=2, join_count=1)
+@sql_count_checker(query_count=3, join_count=1)
 def test_insert_index_type_mismatch(df_index, value_index, expected_index):
     # Note: This is different behavior than native pandas. In native pandas when
     # index datatype mismatch new columns in inserted will all NULL values.
@@ -467,7 +467,7 @@ def test_insert_multiple_null():
     "index, value, expected_query_count, expected_join_count",
     [
         ([1, 2], native_pd.Series([1, 2], index=[2, 3]), 1, 1),
-        ([1, 2], [3, 4], 2, 1),
+        ([1, 2], [3, 4], 3, 1),
     ],
 )
 def test_insert_into_empty_dataframe_with_index(
@@ -528,6 +528,8 @@ def test_insert_into_empty_dataframe(
         expected_join_count = 0
     if isinstance(value, list) or isinstance(value, np.ndarray):
         expected_query_count = 2
+    if isinstance(value, (list, np.ndarray)):
+        expected_query_count += 1
     snow_df = pd.DataFrame(data=data, columns=columns)
     native_df = native_pd.DataFrame(data=data, columns=columns)
 
@@ -574,7 +576,7 @@ def test_insert_into_empty_dataframe_index_dtype_mismatch():
         snow_df.to_pandas()
 
 
-@sql_count_checker(query_count=2, join_count=1)
+@sql_count_checker(query_count=3, join_count=1)
 def test_insert_empty_list_into_empty_dataframe():
     snow_df = pd.DataFrame()
     native_df = native_pd.DataFrame()
@@ -596,7 +598,7 @@ def test_insert_empty_list_into_empty_dataframe():
         ([], ["A", "B", "C"]),
     ],
 )
-@sql_count_checker(query_count=1)
+@sql_count_checker(query_count=2)
 def test_insert_into_empty_dataframe_negative(loc, data, columns):
     snow_df = pd.DataFrame(data=data, columns=columns)
     native_df = native_pd.DataFrame(data=data, columns=columns)
@@ -771,7 +773,7 @@ def insert_op(df):
         assert_frame_equal(snow_res, expected_res, check_dtype=False)
 
 
-@sql_count_checker(query_count=4, join_count=6)
+@sql_count_checker(query_count=5, join_count=6)
 def test_insert_timedelta():
     native_df = native_pd.DataFrame({"col1": [1, 2], "col2": [3, 4]})
     snow_df = pd.DataFrame(native_df)
diff --git a/tests/integ/modin/test_chained_operations.py b/tests/integ/modin/test_chained_operations.py
index 9e496a2608..072b5fe455 100644
--- a/tests/integ/modin/test_chained_operations.py
+++ b/tests/integ/modin/test_chained_operations.py
@@ -11,7 +11,7 @@
 from tests.utils import multithreaded_run
 
 
-@sql_count_checker(query_count=2)
+@sql_count_checker(query_count=3)
 def test_chained_op1():
     # bug fix SNOW-1348886
     data = {"X": [1, 2, 3], "Y": [4, 5, 6]}
diff --git a/tests/integ/modin/test_from_pandas_to_pandas.py b/tests/integ/modin/test_from_pandas_to_pandas.py
index 469ea5ee52..14d2eb942e 100644
--- a/tests/integ/modin/test_from_pandas_to_pandas.py
+++ b/tests/integ/modin/test_from_pandas_to_pandas.py
@@ -536,7 +536,7 @@ def test_series_to_pandas():
     assert_series_equal(snow_series.to_pandas(), pandas_series)
 
 
-@sql_count_checker(query_count=2, union_count=1)
+@sql_count_checker(query_count=3, union_count=1)
 def test_single_row_frame_to_series_to_pandas():
     # create a Snowpark pandas with single row
     native_df = native_pd.DataFrame(
diff --git a/tests/integ/modin/test_numpy.py b/tests/integ/modin/test_numpy.py
index 7a322d1671..f907e2979e 100644
--- a/tests/integ/modin/test_numpy.py
+++ b/tests/integ/modin/test_numpy.py
@@ -66,7 +66,7 @@ def test_full_like():
     snow_df = pd.DataFrame(data)
     pandas_df = native_pd.DataFrame(data)
 
-    with SqlCounter(query_count=2):
+    with SqlCounter(query_count=3):
         snow_result = np.full_like(snow_df, 1234)
         pandas_result = np.full_like(pandas_df, 1234)
         assert_array_equal(np.array(snow_result), np.array(pandas_result))
@@ -232,7 +232,7 @@ def test_np_ufunc_unary_operators(np_ufunc):
 
 
 # The query count here is from the argument logging performed by numpy on error
-@sql_count_checker(query_count=2)
+@sql_count_checker(query_count=3)
 def test_np_ufunc_notimplemented():
     data = {
         "A": [1],
@@ -244,7 +244,7 @@ def test_np_ufunc_notimplemented():
 
 
 # The query count here is from the argument logging performed by numpy on error
-@sql_count_checker(query_count=3)
+@sql_count_checker(query_count=4)
 def test_np_ufunc_with_out_notimpl():
     data = {
         "A": [1],

From e09978be6c6f2e31ccc7f33ec66c0bb1b9d228f2 Mon Sep 17 00:00:00 2001
From: Rehan Durrani <rehan.durrani@snowflake.com>
Date: Tue, 17 Dec 2024 15:33:20 -0800
Subject: [PATCH 4/9] Add 3 more query counts

---
 tests/integ/modin/frame/test_iterrows.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/integ/modin/frame/test_iterrows.py b/tests/integ/modin/frame/test_iterrows.py
index dc7a0e6d3d..e1dee762ff 100644
--- a/tests/integ/modin/frame/test_iterrows.py
+++ b/tests/integ/modin/frame/test_iterrows.py
@@ -61,7 +61,7 @@ def test_df_iterrows(native_df):
     snowpark_df = pd.DataFrame(native_df)
     # One query is used to get the number of rows. One query is used to retrieve each row - each query has 4 JOIN
     # operations performed due to iloc.
-    with SqlCounter(query_count=len(native_df) + 1):
+    with SqlCounter(query_count=len(native_df) + 2):
         eval_snowpark_pandas_result(
             snowpark_df,
             native_df,
@@ -70,7 +70,7 @@ def test_df_iterrows(native_df):
         )
 
 
-@sql_count_checker(query_count=8, union_count=7)
+@sql_count_checker(query_count=9, union_count=7)
 def test_df_iterrows_mixed_types(default_index_native_df):
     # Same test as above on bigger df with mixed types.
     # One query is used to get the number of rows. One query is used to retrieve each row - each query has 4 JOIN
@@ -85,7 +85,7 @@ def test_df_iterrows_mixed_types(default_index_native_df):
     )
 
 
-@sql_count_checker(query_count=7, union_count=6)
+@sql_count_checker(query_count=8, union_count=6)
 def test_df_iterrows_multindex_df():
     # Create df with a MultiIndex index.
     # One query is used to get the number of rows. One query is used to retrieve each row - each query has 4 JOIN

From df51d38d5c5e067cdafa8e4760c668dd3effba73 Mon Sep 17 00:00:00 2001
From: Jonathan Shi <jonathan.shi@snowflake.com>
Date: Tue, 11 Feb 2025 15:30:10 -0800
Subject: [PATCH 5/9] bump sql query counts

---
 .../modin/interoperability/scikit-learn/test_scikit_learn.py  | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/integ/modin/interoperability/scikit-learn/test_scikit_learn.py b/tests/integ/modin/interoperability/scikit-learn/test_scikit_learn.py
index 7cff7c7803..a8e0138a42 100644
--- a/tests/integ/modin/interoperability/scikit-learn/test_scikit_learn.py
+++ b/tests/integ/modin/interoperability/scikit-learn/test_scikit_learn.py
@@ -137,7 +137,7 @@ def get_cluster_centers(df) -> np.ndarray:
 
 
 class TestDimensionalityReduction:
-    @sql_count_checker(query_count=3)
+    @sql_count_checker(query_count=4)
     def test_principal_component_analysis(self, test_dfs):
         def get_principal_components(df) -> np.ndarray:
             return PCA(n_components=2).fit(df).components_
@@ -192,7 +192,7 @@ def validate_search_results(snow_estimator, pandas_estimator):
 
 
 class TestPreprocessing:
-    @sql_count_checker(query_count=5)
+    @sql_count_checker(query_count=6)
     def test_maxabs(self, test_dfs):
         eval_snowpark_pandas_result(
             *test_dfs,

From 3337f4032e3ca066e8965911142bcb18e158d11e Mon Sep 17 00:00:00 2001
From: Jonathan Shi <jonathan.shi@snowflake.com>
Date: Thu, 27 Mar 2025 13:20:35 -0700
Subject: [PATCH 6/9] materialize row count for simple projection repr

---
 .../plugin/_internal/ordered_dataframe.py     | 11 +++
 .../compiler/snowflake_query_compiler.py      | 86 +++++++++++--------
 2 files changed, 62 insertions(+), 35 deletions(-)

diff --git a/src/snowflake/snowpark/modin/plugin/_internal/ordered_dataframe.py b/src/snowflake/snowpark/modin/plugin/_internal/ordered_dataframe.py
index 650ceff90a..b8afc843eb 100644
--- a/src/snowflake/snowpark/modin/plugin/_internal/ordered_dataframe.py
+++ b/src/snowflake/snowpark/modin/plugin/_internal/ordered_dataframe.py
@@ -441,6 +441,17 @@ def ensure_row_count_column(self) -> "OrderedDataFrame":
         )
         return ordered_dataframe
 
+    def materialize_row_count(self) -> int:
+        """
+        Perform a query to retrieve the row count of this OrderedDataFrame.
+
+        Use this function in place of ensure_row_count_column() in scenarios where the extra
+        query is acceptable, and the embedded `COUNT(*) OVER()` window operation would be too expensive.
+        Performing a naked `COUNT(*)` and avoiding a potential window function or cross join is
+        more performance in these scenarios.
+        """
+        return self._dataframe_ref.snowpark_dataframe.count()
+
     def generate_snowflake_quoted_identifiers(
         self,
         *,
diff --git a/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py b/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py
index c48cebefec..1f04b48355 100644
--- a/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py
+++ b/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py
@@ -13058,42 +13058,54 @@ def build_repr_df(
             `row_count` holds the number of rows the DataFrame has, `col_count` the number of columns the DataFrame has, and
             the pandas dataset with `num_rows` or fewer rows and `num_cols` or fewer columns.
         """
-        # In order to issue less queries, use following trick:
-        # 1. add the row count column holding COUNT(*) OVER () over the snowpark dataframe
-        # 2. retrieve all columns
-        # 3. filter on rows with recursive count
+        # build_repr_df needs to know the row count of the underlying data, as the displayed representation will
+        # include the last few rows of the frame.
+        #
+        # To maximize performance, we use two distinct code paths.
+        # If the underlying OrderedDataFrame is a simple projection of a table:
+        #   1. Perform a query to retrieve the row count. This query will be cheap because the SQL engine can
+        #      retrieve the value from table metadata.
+        #   2. Directly embed the row count into the filter query as a literal.
+        # If the underlying data is NOT a simple projection, we opt to perform fewer queries:
+        #   1. add the row count column holding COUNT(*) OVER () over the snowpark dataframe
+        #   2. retrieve all columns
+        #   3. filter on rows with recursive count
 
         # Previously, 2 queries were issued, and a first version replaced them with a single query and a join
         # the solution here uses a window function. This may lead to perf regressions, track these here SNOW-984177.
         # Ensure that our reference to self._modin_frame is updated with cached row count and position.
-        self._modin_frame = (
-            self._modin_frame.ensure_row_position_column().ensure_row_count_column()
-        )
-        row_count_pandas_label = (
-            ROW_COUNT_COLUMN_LABEL
-            if len(self._modin_frame.data_column_pandas_index_names) == 1
-            else (ROW_COUNT_COLUMN_LABEL,)
-            * len(self._modin_frame.data_column_pandas_index_names)
-        )
-        frame_with_row_count_and_position = InternalFrame.create(
-            ordered_dataframe=self._modin_frame.ordered_dataframe,
-            data_column_pandas_labels=self._modin_frame.data_column_pandas_labels
-            + [row_count_pandas_label],
-            data_column_snowflake_quoted_identifiers=self._modin_frame.data_column_snowflake_quoted_identifiers
-            + [self._modin_frame.row_count_snowflake_quoted_identifier],
-            data_column_pandas_index_names=self._modin_frame.data_column_pandas_index_names,
-            index_column_pandas_labels=self._modin_frame.index_column_pandas_labels,
-            index_column_snowflake_quoted_identifiers=self._modin_frame.index_column_snowflake_quoted_identifiers,
-            data_column_types=self._modin_frame.cached_data_column_snowpark_pandas_types
-            + [None],
-            index_column_types=self._modin_frame.cached_index_column_snowpark_pandas_types,
-        )
+        row_count_value = None
+        if self._modin_frame.ordered_dataframe.is_projection_of_table():
+            frame = self._modin_frame.ensure_row_position_column()
+            row_count_value = frame.ordered_dataframe.materialize_row_count()
+            row_count_expr = pandas_lit(row_count_value)
+        else:
+            frame = (
+                self._modin_frame.ensure_row_position_column().ensure_row_count_column()
+            )
+            row_count_pandas_label = (
+                ROW_COUNT_COLUMN_LABEL
+                if len(frame.data_column_pandas_index_names) == 1
+                else (ROW_COUNT_COLUMN_LABEL,)
+                * len(frame.data_column_pandas_index_names)
+            )
+            frame = InternalFrame.create(
+                ordered_dataframe=frame.ordered_dataframe,
+                data_column_pandas_labels=frame.data_column_pandas_labels
+                + [row_count_pandas_label],
+                data_column_snowflake_quoted_identifiers=frame.data_column_snowflake_quoted_identifiers
+                + [frame.row_count_snowflake_quoted_identifier],
+                data_column_pandas_index_names=frame.data_column_pandas_index_names,
+                index_column_pandas_labels=frame.index_column_pandas_labels,
+                index_column_snowflake_quoted_identifiers=frame.index_column_snowflake_quoted_identifiers,
+                data_column_types=frame.cached_data_column_snowpark_pandas_types
+                + [None],
+                index_column_types=frame.cached_index_column_snowpark_pandas_types,
+            )
 
-        row_count_identifier = (
-            frame_with_row_count_and_position.row_count_snowflake_quoted_identifier
-        )
+            row_count_expr = col(frame.row_count_snowflake_quoted_identifier)
         row_position_snowflake_quoted_identifier = (
-            frame_with_row_count_and_position.row_position_snowflake_quoted_identifier
+            frame.row_position_snowflake_quoted_identifier
         )
 
         # filter frame based on num_rows.
@@ -13101,14 +13113,14 @@ def build_repr_df(
         # in the future could analyze plan to see whether retrieving column count would trigger a query, if not
         # simply filter out based on static schema
         num_rows_for_head_and_tail = num_rows_to_display // 2 + 1
-        new_frame = frame_with_row_count_and_position.filter(
+        new_frame = frame.filter(
             (
                 col(row_position_snowflake_quoted_identifier)
                 <= num_rows_for_head_and_tail
             )
             | (
                 col(row_position_snowflake_quoted_identifier)
-                >= col(row_count_identifier) - num_rows_for_head_and_tail
+                >= row_count_expr - num_rows_for_head_and_tail
             )
         )
 
@@ -13116,9 +13128,13 @@ def build_repr_df(
         new_qc = SnowflakeQueryCompiler(new_frame)
         pandas_frame = new_qc.to_pandas()
 
-        # remove last column after first retrieving row count
-        row_count = 0 if 0 == len(pandas_frame) else pandas_frame.iat[0, -1]
-        pandas_frame = pandas_frame.iloc[:, :-1]
+        if row_count_value is None:
+            # if we appended the row count column instead of directly doing a COUNT(*), splice it off
+            # remove last column after first retrieving row count
+            row_count = 0 if 0 == len(pandas_frame) else pandas_frame.iat[0, -1]
+            pandas_frame = pandas_frame.iloc[:, :-1]
+        else:
+            row_count = row_count_value
         col_count = len(pandas_frame.columns)
 
         return row_count, col_count, pandas_frame

From cbf394f51e37d5864be02acf714def741a88c7d1 Mon Sep 17 00:00:00 2001
From: Jonathan Shi <jonathan.shi@snowflake.com>
Date: Thu, 27 Mar 2025 14:40:51 -0700
Subject: [PATCH 7/9] revert query counts (need to fix test_repr)

---
 .../plugin/_internal/ordered_dataframe.py     |  16 +--
 tests/integ/modin/frame/test_add_suffix.py    |   2 +-
 tests/integ/modin/frame/test_aggregate.py     |   2 +-
 tests/integ/modin/frame/test_attrs.py         |   2 +-
 tests/integ/modin/frame/test_empty.py         |   2 +-
 tests/integ/modin/frame/test_getitem.py       |  27 ++--
 tests/integ/modin/frame/test_head_tail.py     |  60 +++++----
 tests/integ/modin/frame/test_iat.py           | 119 ++++++++----------
 tests/integ/modin/frame/test_insert.py        |  54 ++++----
 tests/integ/modin/frame/test_iterrows.py      |   6 +-
 tests/integ/modin/frame/test_repr.py          |  19 +--
 .../scikit-learn/test_scikit_learn.py         |   4 +-
 tests/integ/modin/test_chained_operations.py  |   2 +-
 .../integ/modin/test_from_pandas_to_pandas.py |   2 +-
 tests/integ/modin/test_numpy.py               |   6 +-
 15 files changed, 149 insertions(+), 174 deletions(-)

diff --git a/src/snowflake/snowpark/modin/plugin/_internal/ordered_dataframe.py b/src/snowflake/snowpark/modin/plugin/_internal/ordered_dataframe.py
index b8afc843eb..1987e807a2 100644
--- a/src/snowflake/snowpark/modin/plugin/_internal/ordered_dataframe.py
+++ b/src/snowflake/snowpark/modin/plugin/_internal/ordered_dataframe.py
@@ -421,19 +421,11 @@ def ensure_row_count_column(self) -> "OrderedDataFrame":
                 wrap_double_underscore=True,
             )[0]
         )
-        if not self.is_projection_of_table():
-            ordered_dataframe = self.select(
-                *self.projected_column_snowflake_quoted_identifiers,
-                count("*").over().as_(row_count_snowflake_quoted_identifier),
-            )
-        else:
-            from snowflake.snowpark.modin.plugin._internal.utils import pandas_lit
+        ordered_dataframe = self.select(
+            *self.projected_column_snowflake_quoted_identifiers,
+            count("*").over().as_(row_count_snowflake_quoted_identifier),
+        )
 
-            row_count = self._dataframe_ref.snowpark_dataframe.count()
-            ordered_dataframe = self.select(
-                *self.projected_column_snowflake_quoted_identifiers,
-                pandas_lit(row_count).as_(row_count_snowflake_quoted_identifier),
-            )
         # inplace update so dataframe_ref can be shared. Note that we keep
         # the original ordering columns.
         ordered_dataframe.row_count_snowflake_quoted_identifier = (
diff --git a/tests/integ/modin/frame/test_add_suffix.py b/tests/integ/modin/frame/test_add_suffix.py
index 5b6c2a91f7..3cb338ca6d 100644
--- a/tests/integ/modin/frame/test_add_suffix.py
+++ b/tests/integ/modin/frame/test_add_suffix.py
@@ -71,7 +71,7 @@ def test_df_add_suffix_snowpark_pandas_series(
     )
 
 
-@sql_count_checker(query_count=3)
+@sql_count_checker(query_count=2)
 def test_df_add_prefix_snowpark_pandas_df(
     default_index_snowpark_pandas_df, default_index_native_df
 ):
diff --git a/tests/integ/modin/frame/test_aggregate.py b/tests/integ/modin/frame/test_aggregate.py
index 4a14241237..7003c802d5 100644
--- a/tests/integ/modin/frame/test_aggregate.py
+++ b/tests/integ/modin/frame/test_aggregate.py
@@ -170,7 +170,7 @@ def test_string_sum(data, numeric_only_kwargs):
     )
 
 
-@sql_count_checker(query_count=2)
+@sql_count_checker(query_count=1)
 def test_string_sum_of_reversed_df():
     # check that we get the string concatenation right even when the dataframe
     # is not in its original order.
diff --git a/tests/integ/modin/frame/test_attrs.py b/tests/integ/modin/frame/test_attrs.py
index b89bd7d8d4..bfa972d16a 100644
--- a/tests/integ/modin/frame/test_attrs.py
+++ b/tests/integ/modin/frame/test_attrs.py
@@ -68,7 +68,7 @@ def func(df):
 
 # Tests that attrs is preserved across `take`, a unary operation that returns a Snowpark pandas object.
 # Other unary operators are checked by other tests in the `eval_snowpark_pandas_result` method.
-@sql_count_checker(query_count=1)
+@sql_count_checker(query_count=0)
 def test_df_attrs_take():
     def func(df):
         df.attrs = {"A": [1], "B": "check me"}
diff --git a/tests/integ/modin/frame/test_empty.py b/tests/integ/modin/frame/test_empty.py
index 842d072064..9eaecc36dd 100644
--- a/tests/integ/modin/frame/test_empty.py
+++ b/tests/integ/modin/frame/test_empty.py
@@ -27,7 +27,7 @@
         ({"A": [np.nan]}, "np nan column"),
     ],
 )
-@sql_count_checker(query_count=2)
+@sql_count_checker(query_count=1)
 def test_dataframe_empty_param(dataframe_input, test_case_name):
     eval_snowpark_pandas_result(
         pd.DataFrame(dataframe_input),
diff --git a/tests/integ/modin/frame/test_getitem.py b/tests/integ/modin/frame/test_getitem.py
index abf9e9834f..e1f676b4ff 100644
--- a/tests/integ/modin/frame/test_getitem.py
+++ b/tests/integ/modin/frame/test_getitem.py
@@ -85,7 +85,7 @@ def get_helper(df):
             return df[key]
 
     # 5 extra queries for iter
-    with SqlCounter(query_count=7 if isinstance(key, native_pd.Index) else 1):
+    with SqlCounter(query_count=6 if isinstance(key, native_pd.Index) else 1):
         eval_snowpark_pandas_result(
             default_index_snowpark_pandas_df,
             default_index_native_df,
@@ -120,7 +120,7 @@ def get_helper(df):
     snowpark_df = pd.DataFrame(native_df)
 
     # 5 extra queries for iter
-    with SqlCounter(query_count=7 if isinstance(key, native_pd.Index) else 1):
+    with SqlCounter(query_count=6 if isinstance(key, native_pd.Index) else 1):
         eval_snowpark_pandas_result(
             snowpark_df,
             native_df,
@@ -321,19 +321,15 @@ def test_df_getitem_calls_getitem():
         slice(-100, None, -2),
     ],
 )
+@sql_count_checker(query_count=1)
 def test_df_getitem_with_slice(
     key, default_index_snowpark_pandas_df, default_index_native_df
 ):
-    if key.start is None:
-        expected_query_count = 1
-    else:
-        expected_query_count = 2
-    with SqlCounter(query_count=expected_query_count):
-        eval_snowpark_pandas_result(
-            default_index_snowpark_pandas_df,
-            default_index_native_df,
-            lambda df: df[key],
-        )
+    eval_snowpark_pandas_result(
+        default_index_snowpark_pandas_df,
+        default_index_native_df,
+        lambda df: df[key],
+    )
 
 
 @pytest.mark.parametrize(
@@ -368,7 +364,6 @@ def test_df_getitem_with_non_int_slice(key):
 def test_df_getitem_with_multiindex(
     key, default_index_native_df, multiindex_native, native_df_with_multiindex_columns
 ):
-    expected_query_count = 2 if isinstance(key, slice) else 1
     # Test __getitem__ with df with MultiIndex index.
     native_df = default_index_native_df.set_index(multiindex_native)
     snowpark_df = pd.DataFrame(native_df)
@@ -382,13 +377,13 @@ def test_df_getitem_with_multiindex(
         )
         else _key
     )
-    with SqlCounter(query_count=expected_query_count):
+    with SqlCounter(query_count=1):
         eval_snowpark_pandas_result(snowpark_df, native_df, lambda df: df[_key])
 
     # Test __getitem__ with df with MultiIndex columns.
     native_df = native_df_with_multiindex_columns
     snowpark_df = pd.DataFrame(native_df)
-    with SqlCounter(query_count=expected_query_count):
+    with SqlCounter(query_count=1):
         eval_snowpark_pandas_result(
             snowpark_df, native_df, lambda df: df[key], check_column_type=False
         )
@@ -396,7 +391,7 @@ def test_df_getitem_with_multiindex(
     # Test __getitem__ with df with MultiIndex index.
     native_df = native_df_with_multiindex_columns.set_index(multiindex_native)
     snowpark_df = pd.DataFrame(native_df)
-    with SqlCounter(query_count=expected_query_count):
+    with SqlCounter(query_count=1):
         eval_snowpark_pandas_result(
             snowpark_df, native_df, lambda df: df[key], check_column_type=False
         )
diff --git a/tests/integ/modin/frame/test_head_tail.py b/tests/integ/modin/frame/test_head_tail.py
index 2f38bb1d76..231bec17ca 100644
--- a/tests/integ/modin/frame/test_head_tail.py
+++ b/tests/integ/modin/frame/test_head_tail.py
@@ -13,7 +13,7 @@
     assert_snowpark_pandas_equals_to_pandas_without_dtypecheck,
     eval_snowpark_pandas_result,
 )
-from tests.integ.utils.sql_counter import SqlCounter
+from tests.integ.utils.sql_counter import sql_count_checker
 
 
 def eval_result_and_query_with_no_join(
@@ -34,43 +34,41 @@ def eval_result_and_query_with_no_join(
     "n",
     [1, None, 0, -1, -10, 5, 10],
 )
+@sql_count_checker(query_count=2)
 def test_head_tail(n, default_index_snowpark_pandas_df, default_index_native_df):
-    expected_query_count = 2 if n == 0 else 3
-    with SqlCounter(query_count=expected_query_count):
-        eval_snowpark_pandas_result(
-            default_index_snowpark_pandas_df,
-            default_index_native_df,
-            lambda df: (df.head() if n is None else df.head(n)),
-            comparator=eval_result_and_query_with_no_join,
-        )
+    eval_snowpark_pandas_result(
+        default_index_snowpark_pandas_df,
+        default_index_native_df,
+        lambda df: (df.head() if n is None else df.head(n)),
+        comparator=eval_result_and_query_with_no_join,
+    )
 
-        eval_snowpark_pandas_result(
-            default_index_snowpark_pandas_df,
-            default_index_native_df,
-            lambda df: (df.tail() if n is None else df.tail(n)),
-            comparator=eval_result_and_query_with_no_join,
-        )
+    eval_snowpark_pandas_result(
+        default_index_snowpark_pandas_df,
+        default_index_native_df,
+        lambda df: (df.tail() if n is None else df.tail(n)),
+        comparator=eval_result_and_query_with_no_join,
+    )
 
 
 @pytest.mark.parametrize(
     "n",
     [1, None, 0, -1, -10, 5, 10],
 )
+@sql_count_checker(query_count=2)
 def test_empty_dataframe(n, empty_snowpark_pandas_df):
-    expected_query_count = 2 if n == 0 else 3
-    with SqlCounter(query_count=expected_query_count):
-        eval_snowpark_pandas_result(
-            empty_snowpark_pandas_df,
-            native_pd.DataFrame(),
-            lambda df: (df.head() if n is None else df.head(n)),
-            comparator=eval_result_and_query_with_no_join,
-            check_column_type=False,
-        )
+    eval_snowpark_pandas_result(
+        empty_snowpark_pandas_df,
+        native_pd.DataFrame(),
+        lambda df: (df.head() if n is None else df.head(n)),
+        comparator=eval_result_and_query_with_no_join,
+        check_column_type=False,
+    )
 
-        eval_snowpark_pandas_result(
-            empty_snowpark_pandas_df,
-            native_pd.DataFrame(),
-            lambda df: (df.tail() if n is None else df.tail(n)),
-            comparator=eval_result_and_query_with_no_join,
-            check_column_type=False,
-        )
+    eval_snowpark_pandas_result(
+        empty_snowpark_pandas_df,
+        native_pd.DataFrame(),
+        lambda df: (df.tail() if n is None else df.tail(n)),
+        comparator=eval_result_and_query_with_no_join,
+        check_column_type=False,
+    )
diff --git a/tests/integ/modin/frame/test_iat.py b/tests/integ/modin/frame/test_iat.py
index a1c42540dc..4f2c685e89 100644
--- a/tests/integ/modin/frame/test_iat.py
+++ b/tests/integ/modin/frame/test_iat.py
@@ -7,29 +7,25 @@
 from pandas.errors import IndexingError
 
 from tests.integ.modin.utils import eval_snowpark_pandas_result  # noqa: F401
-from tests.integ.utils.sql_counter import sql_count_checker, SqlCounter
+from tests.integ.utils.sql_counter import sql_count_checker
 
 
 @pytest.mark.parametrize(
-    "key, expected_query_count",
+    "key",
     [
-        ((0, 0), 1),
-        ((0, -7), 1),
-        ((-7, 0), 2),
-        ((-7, -7), 2),
+        (0, 0),
+        (0, -7),
+        (-7, 0),
+        (-7, -7),
     ],
 )
+@sql_count_checker(query_count=1)
 def test_iat_get_default_index_str_columns(
     key,
-    expected_query_count,
     default_index_snowpark_pandas_df,
     default_index_native_df,
 ):
-    with SqlCounter(query_count=expected_query_count):
-        assert (
-            default_index_snowpark_pandas_df.iat[key]
-            == default_index_native_df.iat[key]
-        )
+    assert default_index_snowpark_pandas_df.iat[key] == default_index_native_df.iat[key]
 
 
 @pytest.mark.parametrize(
@@ -41,7 +37,7 @@ def test_iat_get_default_index_str_columns(
         (-7, -7),
     ],
 )
-@sql_count_checker(query_count=2, join_count=2)
+@sql_count_checker(query_count=1, join_count=2)
 def test_iat_set_default_index_str_columns(
     key,
     default_index_snowpark_pandas_df,
@@ -59,22 +55,21 @@ def iat_set_helper(df):
 
 
 @pytest.mark.parametrize(
-    "key, expected_query_count",
+    "key",
     [
-        ((0, 0), 1),
-        ((0, -7), 1),
-        ((-7, 0), 2),
-        ((-7, -7), 2),
+        (0, 0),
+        (0, -7),
+        (-7, 0),
+        (-7, -7),
     ],
 )
+@sql_count_checker(query_count=1)
 def test_iat_get_str_index_str_columns(
     key,
-    expected_query_count,
     str_index_snowpark_pandas_df,
     str_index_native_df,
 ):
-    with SqlCounter(query_count=expected_query_count):
-        assert str_index_snowpark_pandas_df.iat[key] == str_index_native_df.iat[key]
+    assert str_index_snowpark_pandas_df.iat[key] == str_index_native_df.iat[key]
 
 
 @pytest.mark.parametrize(
@@ -86,7 +81,7 @@ def test_iat_get_str_index_str_columns(
         (-7, -7),
     ],
 )
-@sql_count_checker(query_count=2, join_count=2)
+@sql_count_checker(query_count=1, join_count=2)
 def test_iat_set_str_index_str_columns(
     key,
     str_index_snowpark_pandas_df,
@@ -101,22 +96,21 @@ def iat_set_helper(df):
 
 
 @pytest.mark.parametrize(
-    "key, expected_query_count",
+    "key",
     [
-        ((0, 0), 1),
-        ((0, -7), 1),
-        ((-7, 0), 2),
-        ((-7, -7), 2),
+        (0, 0),
+        (0, -7),
+        (-7, 0),
+        (-7, -7),
     ],
 )
+@sql_count_checker(query_count=1)
 def test_iat_get_time_index_time_columns(
     key,
-    expected_query_count,
     time_index_snowpark_pandas_df,
     time_index_native_df,
 ):
-    with SqlCounter(query_count=expected_query_count):
-        assert time_index_snowpark_pandas_df.iat[key] == time_index_native_df.iat[key]
+    assert time_index_snowpark_pandas_df.iat[key] == time_index_native_df.iat[key]
 
 
 @pytest.mark.parametrize(
@@ -128,7 +122,7 @@ def test_iat_get_time_index_time_columns(
         (-7, -7),
     ],
 )
-@sql_count_checker(query_count=2, join_count=2)
+@sql_count_checker(query_count=1, join_count=2)
 def test_iat_set_time_index_time_columns(
     key,
     time_index_snowpark_pandas_df,
@@ -146,24 +140,23 @@ def iat_set_helper(df):
 
 
 @pytest.mark.parametrize(
-    "key, expected_query_count",
+    "key",
     [
-        ((0, 0), 1),
-        ((0, -7), 1),
-        ((-7, 0), 2),
-        ((-7, -7), 2),
+        (0, 0),
+        (0, -7),
+        (-7, 0),
+        (-7, -7),
     ],
 )
+@sql_count_checker(query_count=1)
 def test_iat_get_multiindex_index_str_columns(
     key,
-    expected_query_count,
     default_index_native_df,
     multiindex_native,
 ):
-    with SqlCounter(query_count=expected_query_count):
-        native_df = default_index_native_df.set_index(multiindex_native)
-        snowpark_df = pd.DataFrame(native_df)
-        assert snowpark_df.iat[key] == native_df.iat[key]
+    native_df = default_index_native_df.set_index(multiindex_native)
+    snowpark_df = pd.DataFrame(native_df)
+    assert snowpark_df.iat[key] == native_df.iat[key]
 
 
 @pytest.mark.parametrize(
@@ -175,7 +168,7 @@ def test_iat_get_multiindex_index_str_columns(
         (-7, -7),
     ],
 )
-@sql_count_checker(query_count=2, join_count=2)
+@sql_count_checker(query_count=1, join_count=2)
 def test_iat_set_multiindex_index_str_columns(
     key,
     default_index_native_df,
@@ -190,23 +183,22 @@ def at_set_helper(df):
 
 
 @pytest.mark.parametrize(
-    "key, expected_query_count",
+    "key",
     [
-        ((0, 0), 1),
-        ((0, -7), 1),
-        ((-7, 0), 2),
-        ((-7, -7), 2),
+        (0, 0),
+        (0, -7),
+        (-7, 0),
+        (-7, -7),
     ],
 )
+@sql_count_checker(query_count=1)
 def test_iat_get_default_index_multiindex_columns(
     key,
-    expected_query_count,
     native_df_with_multiindex_columns,
 ):
-    with SqlCounter(query_count=expected_query_count):
-        native_df = native_df_with_multiindex_columns
-        snowpark_df = pd.DataFrame(native_df)
-        assert snowpark_df.iat[key] == native_df.iat[key]
+    native_df = native_df_with_multiindex_columns
+    snowpark_df = pd.DataFrame(native_df)
+    assert snowpark_df.iat[key] == native_df.iat[key]
 
 
 @pytest.mark.parametrize(
@@ -218,7 +210,7 @@ def test_iat_get_default_index_multiindex_columns(
         (-7, -7),
     ],
 )
-@sql_count_checker(query_count=2, join_count=2)
+@sql_count_checker(query_count=1, join_count=2)
 def test_iat_set_default_index_multiindex_columns(
     key,
     native_df_with_multiindex_columns,
@@ -232,24 +224,23 @@ def at_set_helper(df):
 
 
 @pytest.mark.parametrize(
-    "key, expected_query_count",
+    "key",
     [
-        ((0, 0), 1),
-        ((0, -7), 1),
-        ((-7, 0), 2),
-        ((-7, -7), 2),
+        (0, 0),
+        (0, -7),
+        (-7, 0),
+        (-7, -7),
     ],
 )
+@sql_count_checker(query_count=1)
 def test_iat_get_multiindex_index_multiindex_columns(
     key,
-    expected_query_count,
     native_df_with_multiindex_columns,
     multiindex_native,
 ):
-    with SqlCounter(query_count=expected_query_count):
-        native_df = native_df_with_multiindex_columns.set_index(multiindex_native)
-        snowpark_df = pd.DataFrame(native_df)
-        assert snowpark_df.iat[key] == native_df.iat[key]
+    native_df = native_df_with_multiindex_columns.set_index(multiindex_native)
+    snowpark_df = pd.DataFrame(native_df)
+    assert snowpark_df.iat[key] == native_df.iat[key]
 
 
 @pytest.mark.parametrize(
@@ -261,7 +252,7 @@ def test_iat_get_multiindex_index_multiindex_columns(
         (-7, -7),
     ],
 )
-@sql_count_checker(query_count=2, join_count=2)
+@sql_count_checker(query_count=1, join_count=2)
 def test_iat_set_multiindex_index_multiindex_columns(
     key,
     native_df_with_multiindex_columns,
diff --git a/tests/integ/modin/frame/test_insert.py b/tests/integ/modin/frame/test_insert.py
index f9a421323e..974823d02c 100644
--- a/tests/integ/modin/frame/test_insert.py
+++ b/tests/integ/modin/frame/test_insert.py
@@ -54,7 +54,7 @@ def native_df():
         ),
     ],
 )
-@sql_count_checker(query_count=6, join_count=3)
+@sql_count_checker(query_count=5, join_count=3)
 def test_insert_snowpark_pandas_objects(native_df, native_value):
     snow_df = pd.DataFrame(native_df)
     value = pd.DataFrame(native_value)
@@ -99,7 +99,7 @@ def test_insert_snowpark_pandas_objects(native_df, native_value):
         ),
     ],
 )
-@sql_count_checker(query_count=3, join_count=1)
+@sql_count_checker(query_count=2, join_count=1)
 def test_insert_one_to_many(native_df, native_value):
     snow_df = pd.DataFrame(native_df)
     value = pd.DataFrame(native_value)
@@ -128,11 +128,11 @@ def test_insert_one_to_many(native_df, native_value):
 @pytest.mark.parametrize(
     "value, expected_query_count, expected_join_count",
     [
-        (np.array(["a", "b", "c"]), 3, 1),  # numpy array of shape (N,)
-        (np.array([["a"], ["b"], ["c"]]), 3, 1),  # numpy array of shape (N, 1)
-        (["a", "b", "c"], 3, 1),  # python list
+        (np.array(["a", "b", "c"]), 2, 1),  # numpy array of shape (N,)
+        (np.array([["a"], ["b"], ["c"]]), 2, 1),  # numpy array of shape (N, 1)
+        (["a", "b", "c"], 2, 1),  # python list
         ({0: 1, 1: 2, 4: 3}, 1, 1),  # python dict
-        (("a", "b", "c"), 3, 1),  # python tuple
+        (("a", "b", "c"), 2, 1),  # python tuple
     ],
 )
 def test_insert_array_like(native_df, value, expected_query_count, expected_join_count):
@@ -187,7 +187,7 @@ def test_insert_pandas_types_negative(snow_df):
         snow_df.insert(0, "col3", value)
 
 
-@sql_count_checker(query_count=3)
+@sql_count_checker(query_count=2)
 def test_insert_dataframe_shape_negative(native_df):
     # DataFrame with more than one column
     snow_df = pd.DataFrame(native_df)
@@ -209,10 +209,10 @@ def test_insert_dataframe_shape_negative(native_df):
     [
         # NOTE: Accepted numpy array shapes are (N,) or (N, 1) where N = number of rows = 3
         (np.ones((3, 2)), 0),
-        (np.ones((6, 1)), 2),
-        (np.ones((1, 1)), 2),
-        ([1, 2], 2),  # len < number of rows
-        ((6, 7, 8, 9), 2),  # len > number of rows
+        (np.ones((6, 1)), 1),
+        (np.ones((1, 1)), 1),
+        ([1, 2], 1),  # len < number of rows
+        ((6, 7, 8, 9), 1),  # len > number of rows
         ({"a", "b", "c"}, 0),  # python set
     ],
 )
@@ -227,7 +227,7 @@ def test_insert_value_negative(native_df, value, expected_query_count):
         )
 
 
-@sql_count_checker(query_count=3, join_count=1)
+@sql_count_checker(query_count=2, join_count=1)
 def test_insert_duplicate_label(native_df):
     snow_df = pd.DataFrame(native_df)
     eval_snowpark_pandas_result(
@@ -250,7 +250,7 @@ def test_insert_duplicate_label_negative(native_df):
 
 
 @pytest.mark.parametrize("loc", [0, 1, 2])
-@sql_count_checker(query_count=3, join_count=1)
+@sql_count_checker(query_count=2, join_count=1)
 def test_insert_loc(native_df, loc):
     snow_df = pd.DataFrame(native_df)
     eval_snowpark_pandas_result(
@@ -262,7 +262,7 @@ def test_insert_loc(native_df, loc):
 
 
 @pytest.mark.parametrize(
-    "loc, expected_query_count", [(-99, 2), (-1, 2), (99, 2), ("1", 0)]
+    "loc, expected_query_count", [(-99, 1), (-1, 1), (99, 1), ("1", 0)]
 )
 def test_insert_loc_negative(native_df, loc, expected_query_count):
     with SqlCounter(query_count=expected_query_count):
@@ -278,10 +278,10 @@ def test_insert_loc_negative(native_df, loc, expected_query_count):
 @pytest.mark.parametrize(
     "value, expected_query_count, expected_join_count",
     [
-        (np.array(["a", "b", "c", "d"]), 3, 1),  # numpy array of shape (N,)
-        (np.array([["a"], ["b"], ["c"], ["d"]]), 3, 1),  # numpy array of shape (N, 1)
-        (["a", "b", "c", "d"], 3, 1),  # python list
-        (("a", "b", "c", "d"), 3, 1),  # python tuple
+        (np.array(["a", "b", "c", "d"]), 2, 1),  # numpy array of shape (N,)
+        (np.array([["a"], ["b"], ["c"], ["d"]]), 2, 1),  # numpy array of shape (N, 1)
+        (["a", "b", "c", "d"], 2, 1),  # python list
+        (("a", "b", "c", "d"), 2, 1),  # python tuple
         ({(3, 1): 1}, 1, 1),  # python dict
         ("abc", 1, 0),  # sting scalar
         (1, 1, 0),  # int scalar
@@ -311,7 +311,7 @@ def test_insert_multiindex_array_like_and_scalar(
         ("a", "b", "c", "d"),  # python tuple
     ],
 )
-@sql_count_checker(query_count=3, join_count=1)
+@sql_count_checker(query_count=2, join_count=1)
 def test_insert_empty_multiindex_frame(value):
     mi = pd.MultiIndex.from_arrays([np.array([], dtype=int), np.array([], dtype=int)])
     snow_df = pd.DataFrame([], index=mi)
@@ -352,7 +352,7 @@ def test_insert_multiindex_dict_negative():
         ([1.0, 2.5, 3.0], [1, 2, 3]),  # Long and Double can be joined
     ],
 )
-@sql_count_checker(query_count=5, join_count=1)
+@sql_count_checker(query_count=4, join_count=1)
 def test_insert_compatible_index(df_index, value_index):
     snow_df = pd.DataFrame({"col1": ["p", "q", "r"]}, index=native_pd.Index(df_index))
     value = pd.DataFrame({"col2": ["x", "y", "z"]}, index=native_pd.Index(value_index))
@@ -383,7 +383,7 @@ def test_insert_compatible_index(df_index, value_index):
         ),  # length and type mismatch
     ],
 )
-@sql_count_checker(query_count=2)
+@sql_count_checker(query_count=1)
 def test_insert_index_num_levels_mismatch_negative(df_index, value_index):
     snow_df = pd.DataFrame({"col1": ["p", "q", "r"]}, index=native_pd.Index(df_index))
     value = pd.DataFrame({"col2": ["w", "x", "y"]}, index=native_pd.Index(value_index))
@@ -408,7 +408,7 @@ def test_insert_index_num_levels_mismatch_negative(df_index, value_index):
         ),  # type mismatch boolean != long
     ],
 )
-@sql_count_checker(query_count=3, join_count=1)
+@sql_count_checker(query_count=2, join_count=1)
 def test_insert_index_type_mismatch(df_index, value_index, expected_index):
     # Note: This is different behavior than native pandas. In native pandas when
     # index datatype mismatch new columns in inserted will all NULL values.
@@ -467,7 +467,7 @@ def test_insert_multiple_null():
     "index, value, expected_query_count, expected_join_count",
     [
         ([1, 2], native_pd.Series([1, 2], index=[2, 3]), 1, 1),
-        ([1, 2], [3, 4], 3, 1),
+        ([1, 2], [3, 4], 2, 1),
     ],
 )
 def test_insert_into_empty_dataframe_with_index(
@@ -528,8 +528,6 @@ def test_insert_into_empty_dataframe(
         expected_join_count = 0
     if isinstance(value, list) or isinstance(value, np.ndarray):
         expected_query_count = 2
-    if isinstance(value, (list, np.ndarray)):
-        expected_query_count += 1
     snow_df = pd.DataFrame(data=data, columns=columns)
     native_df = native_pd.DataFrame(data=data, columns=columns)
 
@@ -576,7 +574,7 @@ def test_insert_into_empty_dataframe_index_dtype_mismatch():
         snow_df.to_pandas()
 
 
-@sql_count_checker(query_count=3, join_count=1)
+@sql_count_checker(query_count=2, join_count=1)
 def test_insert_empty_list_into_empty_dataframe():
     snow_df = pd.DataFrame()
     native_df = native_pd.DataFrame()
@@ -598,7 +596,7 @@ def test_insert_empty_list_into_empty_dataframe():
         ([], ["A", "B", "C"]),
     ],
 )
-@sql_count_checker(query_count=2)
+@sql_count_checker(query_count=1)
 def test_insert_into_empty_dataframe_negative(loc, data, columns):
     snow_df = pd.DataFrame(data=data, columns=columns)
     native_df = native_pd.DataFrame(data=data, columns=columns)
@@ -773,7 +771,7 @@ def insert_op(df):
         assert_frame_equal(snow_res, expected_res, check_dtype=False)
 
 
-@sql_count_checker(query_count=5, join_count=6)
+@sql_count_checker(query_count=4, join_count=6)
 def test_insert_timedelta():
     native_df = native_pd.DataFrame({"col1": [1, 2], "col2": [3, 4]})
     snow_df = pd.DataFrame(native_df)
diff --git a/tests/integ/modin/frame/test_iterrows.py b/tests/integ/modin/frame/test_iterrows.py
index e1dee762ff..dc7a0e6d3d 100644
--- a/tests/integ/modin/frame/test_iterrows.py
+++ b/tests/integ/modin/frame/test_iterrows.py
@@ -61,7 +61,7 @@ def test_df_iterrows(native_df):
     snowpark_df = pd.DataFrame(native_df)
     # One query is used to get the number of rows. One query is used to retrieve each row - each query has 4 JOIN
     # operations performed due to iloc.
-    with SqlCounter(query_count=len(native_df) + 2):
+    with SqlCounter(query_count=len(native_df) + 1):
         eval_snowpark_pandas_result(
             snowpark_df,
             native_df,
@@ -70,7 +70,7 @@ def test_df_iterrows(native_df):
         )
 
 
-@sql_count_checker(query_count=9, union_count=7)
+@sql_count_checker(query_count=8, union_count=7)
 def test_df_iterrows_mixed_types(default_index_native_df):
     # Same test as above on bigger df with mixed types.
     # One query is used to get the number of rows. One query is used to retrieve each row - each query has 4 JOIN
@@ -85,7 +85,7 @@ def test_df_iterrows_mixed_types(default_index_native_df):
     )
 
 
-@sql_count_checker(query_count=8, union_count=6)
+@sql_count_checker(query_count=7, union_count=6)
 def test_df_iterrows_multindex_df():
     # Create df with a MultiIndex index.
     # One query is used to get the number of rows. One query is used to retrieve each row - each query has 4 JOIN
diff --git a/tests/integ/modin/frame/test_repr.py b/tests/integ/modin/frame/test_repr.py
index 80abf4fd3e..f4ada1b7e9 100644
--- a/tests/integ/modin/frame/test_repr.py
+++ b/tests/integ/modin/frame/test_repr.py
@@ -15,6 +15,7 @@
 from tests.integ.utils.sql_counter import SqlCounter, sql_count_checker
 
 # expected_query_count is for test_repr_html paramterized SqlCounter test
+# an additional query + select may be incurred to eagerly retrieve the row count
 _DATAFRAMES_TO_TEST = [
     (
         native_pd.DataFrame(
@@ -29,41 +30,41 @@
                 ],
             }
         ),
-        1,
+        2,
     ),
     (
         native_pd.DataFrame([1, 2], index=[pd.Timedelta(1), pd.Timedelta(-1)]),
-        1,
+        2,
     ),
     (
         IRIS_DF,
-        4,
+        5,
     ),
     (
         native_pd.DataFrame(),
-        1,
+        2,
     ),
     (
         native_pd.DataFrame(
             {"A": list(range(10000)), "B": np.random.normal(size=10000)}
         ),
-        4,
+        5,
     ),
     (
         native_pd.DataFrame(columns=["A", "B", "C", "D", "C", "B", "A"]),
-        1,
+        2,
     ),
     # one large dataframe to test many columns
     (
         native_pd.DataFrame(columns=[f"x{i}" for i in range(300)]),
-        1,
+        2,
     ),
     # one large dataframe to test both columns/rows
     (
         native_pd.DataFrame(
             data=np.zeros(shape=(300, 300)), columns=[f"x{i}" for i in range(300)]
         ),
-        4,
+        5,
     ),
 ]
 
@@ -75,7 +76,7 @@ def test_repr(native_df, expected_query_count):
     native_str = repr(native_df)
     # only measure select statements here, creation of dfs may yield a couple
     # CREATE TEMPORARY TABLE/INSERT INTO queries
-    with SqlCounter(query_count=expected_query_count, select_count=1):
+    with SqlCounter(query_count=expected_query_count, select_count=2):
         snow_str = repr(snow_df)
 
         assert native_str == snow_str
diff --git a/tests/integ/modin/interoperability/scikit-learn/test_scikit_learn.py b/tests/integ/modin/interoperability/scikit-learn/test_scikit_learn.py
index a8e0138a42..7cff7c7803 100644
--- a/tests/integ/modin/interoperability/scikit-learn/test_scikit_learn.py
+++ b/tests/integ/modin/interoperability/scikit-learn/test_scikit_learn.py
@@ -137,7 +137,7 @@ def get_cluster_centers(df) -> np.ndarray:
 
 
 class TestDimensionalityReduction:
-    @sql_count_checker(query_count=4)
+    @sql_count_checker(query_count=3)
     def test_principal_component_analysis(self, test_dfs):
         def get_principal_components(df) -> np.ndarray:
             return PCA(n_components=2).fit(df).components_
@@ -192,7 +192,7 @@ def validate_search_results(snow_estimator, pandas_estimator):
 
 
 class TestPreprocessing:
-    @sql_count_checker(query_count=6)
+    @sql_count_checker(query_count=5)
     def test_maxabs(self, test_dfs):
         eval_snowpark_pandas_result(
             *test_dfs,
diff --git a/tests/integ/modin/test_chained_operations.py b/tests/integ/modin/test_chained_operations.py
index 072b5fe455..9e496a2608 100644
--- a/tests/integ/modin/test_chained_operations.py
+++ b/tests/integ/modin/test_chained_operations.py
@@ -11,7 +11,7 @@
 from tests.utils import multithreaded_run
 
 
-@sql_count_checker(query_count=3)
+@sql_count_checker(query_count=2)
 def test_chained_op1():
     # bug fix SNOW-1348886
     data = {"X": [1, 2, 3], "Y": [4, 5, 6]}
diff --git a/tests/integ/modin/test_from_pandas_to_pandas.py b/tests/integ/modin/test_from_pandas_to_pandas.py
index 14d2eb942e..469ea5ee52 100644
--- a/tests/integ/modin/test_from_pandas_to_pandas.py
+++ b/tests/integ/modin/test_from_pandas_to_pandas.py
@@ -536,7 +536,7 @@ def test_series_to_pandas():
     assert_series_equal(snow_series.to_pandas(), pandas_series)
 
 
-@sql_count_checker(query_count=3, union_count=1)
+@sql_count_checker(query_count=2, union_count=1)
 def test_single_row_frame_to_series_to_pandas():
     # create a Snowpark pandas with single row
     native_df = native_pd.DataFrame(
diff --git a/tests/integ/modin/test_numpy.py b/tests/integ/modin/test_numpy.py
index f907e2979e..7a322d1671 100644
--- a/tests/integ/modin/test_numpy.py
+++ b/tests/integ/modin/test_numpy.py
@@ -66,7 +66,7 @@ def test_full_like():
     snow_df = pd.DataFrame(data)
     pandas_df = native_pd.DataFrame(data)
 
-    with SqlCounter(query_count=3):
+    with SqlCounter(query_count=2):
         snow_result = np.full_like(snow_df, 1234)
         pandas_result = np.full_like(pandas_df, 1234)
         assert_array_equal(np.array(snow_result), np.array(pandas_result))
@@ -232,7 +232,7 @@ def test_np_ufunc_unary_operators(np_ufunc):
 
 
 # The query count here is from the argument logging performed by numpy on error
-@sql_count_checker(query_count=3)
+@sql_count_checker(query_count=2)
 def test_np_ufunc_notimplemented():
     data = {
         "A": [1],
@@ -244,7 +244,7 @@ def test_np_ufunc_notimplemented():
 
 
 # The query count here is from the argument logging performed by numpy on error
-@sql_count_checker(query_count=4)
+@sql_count_checker(query_count=3)
 def test_np_ufunc_with_out_notimpl():
     data = {
         "A": [1],

From eb2d8f17fe946701d02c53a020e253d28ab1f3d5 Mon Sep 17 00:00:00 2001
From: Jonathan Shi <jonathan.shi@snowflake.com>
Date: Thu, 27 Mar 2025 16:15:06 -0700
Subject: [PATCH 8/9] fix repr tests

---
 .../plugin/_internal/ordered_dataframe.py     | 13 ++++-
 tests/integ/modin/frame/test_repr.py          | 55 +++++++++++++------
 2 files changed, 49 insertions(+), 19 deletions(-)

diff --git a/src/snowflake/snowpark/modin/plugin/_internal/ordered_dataframe.py b/src/snowflake/snowpark/modin/plugin/_internal/ordered_dataframe.py
index 1987e807a2..d752013fb6 100644
--- a/src/snowflake/snowpark/modin/plugin/_internal/ordered_dataframe.py
+++ b/src/snowflake/snowpark/modin/plugin/_internal/ordered_dataframe.py
@@ -2046,9 +2046,16 @@ def is_projection_of_table(self) -> bool:
         # 1 API call in the plan - either `Session.sql` for DataFrames based off of I/O operations
         # e.g. `read_snowflake` or `read_csv`, or `Session.create_dataframe` for DataFrames created
         # out of Python objects.
+        # We must also ensure that the underlying compiled query plan is only a single query --
+        # for example, a simple select on pd.DataFrame([1] * 2000) would result in CREATE TEMP TABLE
+        # + batch INSERT + DROP TABLE queries, which introduce non-trivial overhead.
         snowpark_df = self._dataframe_ref.snowpark_dataframe
         snowpark_plan = snowpark_df._plan
-        return len(snowpark_plan.api_calls) == 1 and any(
-            accepted_api in snowpark_plan.api_calls[0]["name"]
-            for accepted_api in ["Session.sql", "Session.create_dataframe"]
+        return (
+            len(snowpark_plan.api_calls) == 1
+            and len(snowpark_plan.queries) == 1
+            and any(
+                accepted_api in snowpark_plan.api_calls[0]["name"]
+                for accepted_api in ["Session.sql", "Session.create_dataframe"]
+            )
         )
diff --git a/tests/integ/modin/frame/test_repr.py b/tests/integ/modin/frame/test_repr.py
index f4ada1b7e9..9a8a617aca 100644
--- a/tests/integ/modin/frame/test_repr.py
+++ b/tests/integ/modin/frame/test_repr.py
@@ -15,7 +15,10 @@
 from tests.integ.utils.sql_counter import SqlCounter, sql_count_checker
 
 # expected_query_count is for test_repr_html paramterized SqlCounter test
-# an additional query + select may be incurred to eagerly retrieve the row count
+# if the input data would be retrieved by a simple select, an additional query + select is
+# incurred to eagerly retrieve the row count (larger data may avoid this because the CREATE TEMP
+# TABLE + batch INSERT + DROP TABLE queries are likely more expensive than the COUNT(*) OVER() window
+# function)
 _DATAFRAMES_TO_TEST = [
     (
         native_pd.DataFrame(
@@ -31,59 +34,73 @@
             }
         ),
         2,
+        2,
     ),
     (
         native_pd.DataFrame([1, 2], index=[pd.Timedelta(1), pd.Timedelta(-1)]),
         2,
+        2,
     ),
     (
         IRIS_DF,
-        5,
+        4,
+        1,
     ),
     (
         native_pd.DataFrame(),
         2,
+        2,
     ),
     (
         native_pd.DataFrame(
             {"A": list(range(10000)), "B": np.random.normal(size=10000)}
         ),
-        5,
+        4,
+        1,
     ),
     (
         native_pd.DataFrame(columns=["A", "B", "C", "D", "C", "B", "A"]),
         2,
+        2,
     ),
     # one large dataframe to test many columns
     (
         native_pd.DataFrame(columns=[f"x{i}" for i in range(300)]),
         2,
+        2,
     ),
     # one large dataframe to test both columns/rows
     (
         native_pd.DataFrame(
             data=np.zeros(shape=(300, 300)), columns=[f"x{i}" for i in range(300)]
         ),
-        5,
+        4,
+        1,
     ),
 ]
 
 
-@pytest.mark.parametrize("native_df, expected_query_count", _DATAFRAMES_TO_TEST)
-def test_repr(native_df, expected_query_count):
+@pytest.mark.parametrize(
+    "native_df, expected_query_count, expected_select_count", _DATAFRAMES_TO_TEST
+)
+def test_repr(native_df, expected_query_count, expected_select_count):
     snow_df = pd.DataFrame(native_df)
 
     native_str = repr(native_df)
     # only measure select statements here, creation of dfs may yield a couple
     # CREATE TEMPORARY TABLE/INSERT INTO queries
-    with SqlCounter(query_count=expected_query_count, select_count=2):
+    with SqlCounter(
+        query_count=expected_query_count, select_count=expected_select_count
+    ):
         snow_str = repr(snow_df)
 
         assert native_str == snow_str
 
 
-@pytest.mark.parametrize("native_df, expected_query_count", _DATAFRAMES_TO_TEST)
-def test_repr_html(native_df, expected_query_count):
+@pytest.mark.parametrize(
+    "native_df, expected_query_count, expected_select_count", _DATAFRAMES_TO_TEST
+)
+def test_repr_html(native_df, expected_query_count, expected_select_count):
 
     # TODO: SNOW-916596 Test this with Jupyter notebooks.
     # joins due to temp table creation
@@ -98,7 +115,9 @@ def test_repr_html(native_df, expected_query_count):
     native_html = native_df._repr_html_()
 
     # 10 of these are related to stored procs, inserts, alter session query tag.
-    with SqlCounter(query_count=expected_query_count, select_count=1):
+    with SqlCounter(
+        query_count=expected_query_count, select_count=expected_select_count
+    ):
         snow_html = snow_df._repr_html_()
 
     assert native_html == snow_html
@@ -134,19 +153,23 @@ def queries(self) -> list[QueryRecord]:
         return [query.sql_text for query in self._queries]
 
 
-@pytest.mark.parametrize("native_df, expected_query_count", _DATAFRAMES_TO_TEST)
-def test_repr_and_repr_html_issue_same_query(native_df, expected_query_count):
-    """This test ensures that the same query is issued for both `repr` and `repr_html`
+def test_repr_and_repr_html_issue_same_query():
+    """
+    This test ensures that the same query is issued for both `repr` and `repr_html`
     in order to take advantage of Snowflake server side caching when both are called back
-    to back (as in the case with displaying a DataFrame in a Jupyter notebook)."""
+    to back (as in the case with displaying a DataFrame in a Jupyter notebook).
+
+    If the input frame was a simple projection that results in an additional COUNT() query,
+    it is not captured by the ReprQueryListener.
+    """
 
     native_df = native_pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
     snow_df = pd.DataFrame(native_df)
 
     with ReprQueryListener(pd.session) as listener:
-        with SqlCounter(query_count=1, select_count=1):
+        with SqlCounter(query_count=2, select_count=2):
             repr_str = repr(snow_df)
-        with SqlCounter(query_count=1, select_count=1):
+        with SqlCounter(query_count=2, select_count=2):
             repr_html = snow_df._repr_html_()
 
     assert repr_str == repr(native_df)

From 5d2f8f4db0501b69adda909a51904648410a0e6d Mon Sep 17 00:00:00 2001
From: Jonathan Shi <jonathan.shi@snowflake.com>
Date: Tue, 8 Apr 2025 11:31:19 -0700
Subject: [PATCH 9/9] lift ensure_row_position call

---
 .../modin/plugin/compiler/snowflake_query_compiler.py       | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py b/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py
index e28f4ac940..c87ae34619 100644
--- a/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py
+++ b/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py
@@ -13067,14 +13067,12 @@ def build_repr_df(
         # the solution here uses a window function. This may lead to perf regressions, track these here SNOW-984177.
         # Ensure that our reference to self._modin_frame is updated with cached row count and position.
         row_count_value = None
+        frame = self._modin_frame.ensure_row_position_column()
         if self._modin_frame.ordered_dataframe.is_projection_of_table():
-            frame = self._modin_frame.ensure_row_position_column()
             row_count_value = frame.ordered_dataframe.materialize_row_count()
             row_count_expr = pandas_lit(row_count_value)
         else:
-            frame = (
-                self._modin_frame.ensure_row_position_column().ensure_row_count_column()
-            )
+            frame = frame.ensure_row_count_column()
             row_count_pandas_label = (
                 ROW_COUNT_COLUMN_LABEL
                 if len(frame.data_column_pandas_index_names) == 1