snowflakedb
diff --git a/‎CHANGELOG.md‎
Lines changed: 3 additions & 0 deletions b/‎CHANGELOG.md‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎src/snowflake/snowpark/modin/plugin/_internal/concat_utils.py‎
Lines changed: 4 additions & 2 deletions b/‎src/snowflake/snowpark/modin/plugin/_internal/concat_utils.py‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎src/snowflake/snowpark/modin/plugin/_internal/cut_utils.py‎
Lines changed: 2 additions & 1 deletion b/‎src/snowflake/snowpark/modin/plugin/_internal/cut_utils.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎src/snowflake/snowpark/modin/plugin/_internal/frame.py‎
Lines changed: 14 additions & 5 deletions b/‎src/snowflake/snowpark/modin/plugin/_internal/frame.py‎
Lines changed: 14 additions & 5 deletions
diff --git a/‎src/snowflake/snowpark/modin/plugin/_internal/generator_utils.py‎
Lines changed: 4 additions & 1 deletion b/‎src/snowflake/snowpark/modin/plugin/_internal/generator_utils.py‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎src/snowflake/snowpark/modin/plugin/_internal/get_dummies_utils.py‎
Lines changed: 4 additions & 3 deletions b/‎src/snowflake/snowpark/modin/plugin/_internal/get_dummies_utils.py‎
Lines changed: 4 additions & 3 deletions
@@ -37,6 +37,9 @@
 - Eliminate duplicate parameter check queries for casing status when retrieving the session.
 - Retrieve dataframe row counts through object metadata to avoid a COUNT(\*) query (performance)
 - Added support for applying Snowflake Cortex function `Complete`.
+- Introduce faster pandas: Improved performance by deferring row position computation. 
+  - The following operations are currently supported and can benefit from the optimization: `read_snowflake`, `repr`, `loc`, `reset_index`, `merge`, and binary operations.
+  - If a lazy object (e.g., DataFrame or Series) depends on a mix of supported and unsupported operations, the optimization will not be used.
 
 #### Dependency Updates
 
 
@@ -373,7 +373,9 @@ def _select_columns(
     )
 
 
-def add_global_ordering_columns(frame: InternalFrame, position: int) -> InternalFrame:
+def add_global_ordering_columns(
+    frame: InternalFrame, position: int, dummy_row_pos_mode: bool = False
+) -> InternalFrame:
     """
     To create global ordering for concat (axis=0) operation we first ensure a
     row position column for local ordering within the frame. Then add another
@@ -388,7 +390,7 @@ def add_global_ordering_columns(frame: InternalFrame, position: int) -> Internal
         A new frame with updated ordering columns.
 
     """
-    frame = frame.ensure_row_position_column()
+    frame = frame.ensure_row_position_column(dummy_row_pos_mode)
     ordered_dataframe = frame.ordered_dataframe.sort(
         [OrderingColumn(frame.row_position_snowflake_quoted_identifier)]
     )
 
@@ -152,6 +152,7 @@ def compute_bin_indices(
     cuts_frame: InternalFrame,
     n_cuts: int,
     right: bool = True,
+    dummy_row_pos_mode: bool = False,
 ) -> InternalFrame:
     """
     Given a frame of cuts, i.e. borders of bins (strictly increasing) compute for the data in values_frame the index of the bin they fall into.
@@ -183,7 +184,7 @@ def compute_bin_indices(
     # within OrderedDataFrame yet, we use the Snowpark layer directly. This should have no negative
     # consequences when it comes to building lazy graphs, as both cut and qcut are materializing operations.
 
-    cuts_frame = cuts_frame.ensure_row_position_column()
+    cuts_frame = cuts_frame.ensure_row_position_column(dummy_row_pos_mode)
     # perform asof join to find the closet to the cut frame data.
     asof_result = join(
         values_frame,
 
@@ -888,15 +888,19 @@ def to_pandas(
     ###########################################################################
     # START: Internal Frame mutation APIs.
     # APIs that creates a new InternalFrame instance, should only be added below
-    def ensure_row_position_column(self) -> "InternalFrame":
+    def ensure_row_position_column(
+        self, dummy_row_pos_mode: bool = False
+    ) -> "InternalFrame":
         """
         Ensure row position column is computed for given internal frame.
 
         Returns:
             A new InternalFrame instance with computed virtual index.
         """
         return InternalFrame.create(
-            ordered_dataframe=self.ordered_dataframe.ensure_row_position_column(),
+            ordered_dataframe=self.ordered_dataframe.ensure_row_position_column(
+                dummy_row_pos_mode
+            ),
             data_column_pandas_labels=self.data_column_pandas_labels,
             data_column_snowflake_quoted_identifiers=self.data_column_snowflake_quoted_identifiers,
             data_column_pandas_index_names=self.data_column_pandas_index_names,
@@ -1350,7 +1354,9 @@ def select_active_columns(self) -> "InternalFrame":
         )
 
     def strip_duplicates(
-        self: "InternalFrame", quoted_identifiers: list[str]
+        self: "InternalFrame",
+        quoted_identifiers: list[str],
+        dummy_row_pos_mode: bool = False,
     ) -> "InternalFrame":
         """
         When assigning frames via index operations for duplicates only the last entry is used, as entries are repeatedly overwritten.
@@ -1364,7 +1370,7 @@ def strip_duplicates(
             new internal frame with unique index.
         """
 
-        frame = self.ensure_row_position_column()
+        frame = self.ensure_row_position_column(dummy_row_pos_mode)
 
         # To remove the duplicates, first compute via windowing over index columns the value of the last row position.
         # with this join then select only the relevant rows. Note that an EXISTS subquery doesn't work here because
@@ -1400,12 +1406,15 @@ def strip_duplicates(
             left_on_cols=[frame.row_position_snowflake_quoted_identifier],
             right_on_cols=[relevant_last_value_row_positions_quoted_identifier],
             how="inner",
+            dummy_row_pos_mode=dummy_row_pos_mode,
         )
 
         # Because we reuse row position to select the relevant columns, we need to
         # generate a new row position column here so locational indexing after this operation
         # continues to work correctly.
-        new_ordered_dataframe = joined_ordered_dataframe.ensure_row_position_column()
+        new_ordered_dataframe = joined_ordered_dataframe.ensure_row_position_column(
+            dummy_row_pos_mode
+        )
         return InternalFrame.create(
             ordered_dataframe=new_ordered_dataframe,
             data_column_pandas_labels=frame.data_column_pandas_labels,
 
@@ -79,6 +79,7 @@ def generate_regular_range(
 
 def _create_qc_from_snowpark_dataframe(
     sp_df: DataFrame,
+    dummy_row_pos_mode: bool = False,
 ) -> "snowflake_query_compiler.SnowflakeQueryCompiler":
     """
     Create a Snowflake query compiler from a Snowpark DataFrame, assuming the DataFrame only contains one column.
@@ -89,7 +90,9 @@ def _create_qc_from_snowpark_dataframe(
     Returns:
         A Snowflake query compiler
     """
-    odf = OrderedDataFrame(DataFrameReference(sp_df)).ensure_row_position_column()
+    odf = OrderedDataFrame(DataFrameReference(sp_df)).ensure_row_position_column(
+        dummy_row_pos_mode
+    )
 
     from snowflake.snowpark.modin.plugin.compiler.snowflake_query_compiler import (
         SnowflakeQueryCompiler,
 
@@ -184,6 +184,7 @@ def get_dummies_helper(
     columns: list[Hashable],
     prefixes: list[Hashable],
     prefix_sep: str,
+    dummy_row_pos_mode: bool = False,
 ) -> InternalFrame:
     """
     Helper function for get dummies to perform encoding on given columns
@@ -222,9 +223,9 @@ def get_dummies_helper(
             )
 
     # append a lit true column as value column for pivot
-    new_internal_frame = internal_frame.ensure_row_position_column().append_column(
-        LIT_TRUE_COLUMN_PANDAS_LABEL, pandas_lit(True)
-    )
+    new_internal_frame = internal_frame.ensure_row_position_column(
+        dummy_row_pos_mode
+    ).append_column(LIT_TRUE_COLUMN_PANDAS_LABEL, pandas_lit(True))
     # the dummy column is appended as the last data column of the new_internal_frame
     row_position_column_snowflake_quoted_identifier = (
         new_internal_frame.row_position_snowflake_quoted_identifier