From a26edea15f97abcf77b412ce4945d034a0d57fc8 Mon Sep 17 00:00:00 2001
From: Jonathan Shi <jonathan.shi@snowflake.com>
Date: Tue, 28 Jan 2025 11:44:31 -0800
Subject: [PATCH 1/2] order by min(row position) in object describe

---
 .../compiler/snowflake_query_compiler.py       | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py b/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py
index 55bd416857..71532e5c6d 100644
--- a/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py
+++ b/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py
@@ -13558,8 +13558,14 @@ def describe(
                 axis=0,
             )
             # Compute top (the mode of each column) + freq (the number of times this mode appears).
-            top_freq_identifiers = padded_qc._modin_frame.ordered_dataframe.generate_snowflake_quoted_identifiers(
-                pandas_labels=["top", "freq"]
+            # Also create a new identifier to track min(__row_position__) of each group to ensure stability
+            new_identifiers = padded_qc._modin_frame.ordered_dataframe.generate_snowflake_quoted_identifiers(
+                pandas_labels=["top", "freq", "min_row_position"]
+            )
+            top_freq_identifiers = new_identifiers[0:2]
+            min_row_position_ident = new_identifiers[2]
+            row_position_ident = (
+                padded_qc._modin_frame.ordered_dataframe.row_position_snowflake_quoted_identifier
             )
             # To accommodate multi-level columns in the source frame, we generate a new index column
             # in the top/freq frame for each level. We transpose this frame later, so the columns
@@ -13608,7 +13614,7 @@ def count_freqs(
                 #        IFF(a IS NULL, NULL, COUNT(a)) AS freq
                 # FROM df
                 # GROUP BY a
-                # ORDER BY freq DESC NULLS LAST
+                # ORDER BY freq DESC, MIN(__row_position__) ASC NULLS LAST
                 # LIMIT 1
                 #
                 # The resulting 1-row frame for column "a": [1, 1, 2] will have the form
@@ -13656,9 +13662,13 @@ def count_freqs(
                                 pandas_lit(None),
                                 count(col(col_ident)),
                             ).as_(freq_ident),
+                            min_(row_position_ident).as_(min_row_position_ident),
                         ],
                     )
-                    .sort(OrderingColumn(freq_ident, ascending=False, na_last=True))
+                    .sort(
+                        OrderingColumn(freq_ident, ascending=False, na_last=True),
+                        OrderingColumn(min_row_position_ident, ascending=True),
+                    )
                     .limit(1)
                     .select(
                         *(

From c13f7b30c4ba8e750c6b0e8c3542b47393887d4b Mon Sep 17 00:00:00 2001
From: Jonathan Shi <jonathan.shi@snowflake.com>
Date: Tue, 28 Jan 2025 14:32:06 -0800
Subject: [PATCH 2/2] rename variables for review

---
 .../compiler/snowflake_query_compiler.py      | 40 +++++++++++--------
 1 file changed, 23 insertions(+), 17 deletions(-)

diff --git a/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py b/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py
index 71532e5c6d..4ade32c816 100644
--- a/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py
+++ b/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py
@@ -13559,12 +13559,15 @@ def describe(
             )
             # Compute top (the mode of each column) + freq (the number of times this mode appears).
             # Also create a new identifier to track min(__row_position__) of each group to ensure stability
-            new_identifiers = padded_qc._modin_frame.ordered_dataframe.generate_snowflake_quoted_identifiers(
+            (
+                top_identifier,
+                freq_identifier,
+                min_row_position_identifier,
+            ) = padded_qc._modin_frame.ordered_dataframe.generate_snowflake_quoted_identifiers(
                 pandas_labels=["top", "freq", "min_row_position"]
             )
-            top_freq_identifiers = new_identifiers[0:2]
-            min_row_position_ident = new_identifiers[2]
-            row_position_ident = (
+            top_freq_identifiers = (top_identifier, freq_identifier)
+            row_position_identifier = (
                 padded_qc._modin_frame.ordered_dataframe.row_position_snowflake_quoted_identifier
             )
             # To accommodate multi-level columns in the source frame, we generate a new index column
@@ -13579,7 +13582,7 @@ def describe(
             )
 
             def count_freqs(
-                col_labels: Union[str, tuple[str, ...]], col_ident: str
+                col_labels: Union[str, tuple[str, ...]], col_identifier: str
             ) -> OrderedDataFrame:
                 """
                 Helper function to compute the mode ("top") and frequency with which the mode
@@ -13601,7 +13604,6 @@ def count_freqs(
                 QC.value_counts(dropna=False) would correctly report NULL as the `top` item, but
                 reports `freq` as the number of times NULL appears, which we do not want.
                 """
-                top_ident, freq_ident = top_freq_identifiers
                 col_labels_tuple = (
                     col_labels if is_list_like(col_labels) else (col_labels,)
                 )
@@ -13655,19 +13657,21 @@ def count_freqs(
                 # +------+---+
                 return (
                     padded_qc._modin_frame.ordered_dataframe.group_by(
-                        [col_ident],
+                        [col_identifier],
                         [
                             iff(
-                                col(col_ident).is_null(),
+                                col(col_identifier).is_null(),
                                 pandas_lit(None),
-                                count(col(col_ident)),
-                            ).as_(freq_ident),
-                            min_(row_position_ident).as_(min_row_position_ident),
+                                count(col(col_identifier)),
+                            ).as_(freq_identifier),
+                            min_(row_position_identifier).as_(
+                                min_row_position_identifier
+                            ),
                         ],
                     )
                     .sort(
-                        OrderingColumn(freq_ident, ascending=False, na_last=True),
-                        OrderingColumn(min_row_position_ident, ascending=True),
+                        OrderingColumn(freq_identifier, ascending=False, na_last=True),
+                        OrderingColumn(min_row_position_identifier, ascending=True),
                     )
                     .limit(1)
                     .select(
@@ -13675,14 +13679,16 @@ def count_freqs(
                             # If the original frame had multi-level columns, we must create
                             # a multi-level index to transpose this frame later.
                             [
-                                pandas_lit(col_label).as_(index_ident)
-                                for col_label, index_ident in zip(
+                                pandas_lit(col_label).as_(index_identifier)
+                                for col_label, index_identifier in zip(
                                     col_labels_tuple, new_index_identifiers
                                 )
                             ]
                             + [
-                                col(col_ident).cast(VariantType()).as_(top_ident),
-                                freq_ident,
+                                col(col_identifier)
+                                .cast(VariantType())
+                                .as_(top_identifier),
+                                freq_identifier,
                             ]
                         )
                     )