fix: use correct comparator for arg_sort with descending + limit

root · root · commit 8a713ba093ee · 2026-03-06T22:53:38.000Z
select_nth_unstable_by always used ascending comparison to partition elements, which meant that with descending=true and a limit, the wrong N elements were selected (smallest instead of largest). Reverse the comparator when descending is set in both arg_sort and arg_sort_no_nulls. Closes #26833
diff --git a/crates/polars-core/src/chunked_array/ops/sort/arg_sort.rs b/crates/polars-core/src/chunked_array/ops/sort/arg_sort.rs
@@ -142,9 +142,13 @@ where
         let out = if limit >= vals.len() {
             vals.as_mut_slice()
         } else {
-            let (lower, _el, _upper) = vals
-                .as_mut_slice()
-                .select_nth_unstable_by(limit, |a, b| a.1.tot_cmp(&b.1));
+            let (lower, _el, _upper) = if options.descending {
+                vals.as_mut_slice()
+                    .select_nth_unstable_by(limit, |a, b| b.1.tot_cmp(&a.1))
+            } else {
+                vals.as_mut_slice()
+                    .select_nth_unstable_by(limit, |a, b| a.1.tot_cmp(&b.1))
+            };
             lower
         };
 
@@ -235,9 +239,13 @@ where
         let out = if limit >= vals.len() {
             vals.as_mut_slice()
         } else {
-            let (lower, _el, _upper) = vals
-                .as_mut_slice()
-                .select_nth_unstable_by(limit, |a, b| a.1.tot_cmp(&b.1));
+            let (lower, _el, _upper) = if options.descending {
+                vals.as_mut_slice()
+                    .select_nth_unstable_by(limit, |a, b| b.1.tot_cmp(&a.1))
+            } else {
+                vals.as_mut_slice()
+                    .select_nth_unstable_by(limit, |a, b| a.1.tot_cmp(&b.1))
+            };
             lower
         };
         sort_impl(out, options);
@@ -326,4 +334,111 @@ mod test {
         let idx = reverse_stable_no_nulls(&a, 0);
         assert_eq!(idx.len(), 0);
     }
+
+    #[test]
+    fn test_arg_sort_descending_with_limit() {
+        let a = Int32Chunked::new(
+            PlSmallStr::from_static("a"),
+            &[4, 2, 5, 1, 3],
+        );
+
+        let options = SortOptions {
+            descending: true,
+            nulls_last: false,
+            multithreaded: false,
+            limit: Some(3),
+            ..Default::default()
+        };
+        let result = a.arg_sort(options);
+        let idx: Vec<IdxSize> = result.into_no_null_iter().collect();
+        // descending top-3: values 5(idx=2), 4(idx=0), 3(idx=4)
+        assert_eq!(idx, vec![2, 0, 4]);
+    }
+
+    #[test]
+    fn test_arg_sort_ascending_with_limit() {
+        let a = Int32Chunked::new(
+            PlSmallStr::from_static("a"),
+            &[4, 2, 5, 1, 3],
+        );
+
+        let options = SortOptions {
+            descending: false,
+            nulls_last: false,
+            multithreaded: false,
+            limit: Some(3),
+            ..Default::default()
+        };
+        let result = a.arg_sort(options);
+        let idx: Vec<IdxSize> = result.into_no_null_iter().collect();
+        // ascending top-3: values 1(idx=3), 2(idx=1), 3(idx=4)
+        assert_eq!(idx, vec![3, 1, 4]);
+    }
+
+    #[test]
+    fn test_arg_sort_descending_limit_with_nulls() {
+        let a = Int32Chunked::new(
+            PlSmallStr::from_static("a"),
+            &[
+                Some(4),
+                None,
+                Some(5),
+                Some(1),
+                None,
+                Some(3),
+            ],
+        );
+
+        let options = SortOptions {
+            descending: true,
+            nulls_last: true,
+            multithreaded: false,
+            limit: Some(3),
+            ..Default::default()
+        };
+        let result = a.arg_sort(options);
+        let idx: Vec<IdxSize> = result.into_no_null_iter().collect();
+        // descending, nulls last, top-3: values 5(idx=2), 4(idx=0), 3(idx=5)
+        assert_eq!(idx, vec![2, 0, 5]);
+    }
+
+    #[test]
+    fn test_arg_sort_descending_limit_larger_than_len() {
+        let a = Int32Chunked::new(
+            PlSmallStr::from_static("a"),
+            &[3, 1, 2],
+        );
+
+        let options = SortOptions {
+            descending: true,
+            nulls_last: false,
+            multithreaded: false,
+            limit: Some(10),
+            ..Default::default()
+        };
+        let result = a.arg_sort(options);
+        let idx: Vec<IdxSize> = result.into_no_null_iter().collect();
+        assert_eq!(idx, vec![0, 2, 1]);
+    }
+
+    #[test]
+    fn test_arg_sort_descending_limit_with_duplicates() {
+        let a = Int32Chunked::new(
+            PlSmallStr::from_static("a"),
+            &[3, 5, 5, 1, 3, 2],
+        );
+
+        let options = SortOptions {
+            descending: true,
+            nulls_last: false,
+            multithreaded: false,
+            limit: Some(4),
+            ..Default::default()
+        };
+        let result = a.arg_sort(options);
+        let idx: Vec<IdxSize> = result.into_no_null_iter().collect();
+        // descending top-4: 5(idx=1), 5(idx=2), 3(idx=0), 3(idx=4)
+        let vals: Vec<i32> = idx.iter().map(|&i| a.get(i as usize).unwrap()).collect();
+        assert_eq!(vals, vec![5, 5, 3, 3]);
+    }
 }
diff --git a/py-polars/tests/unit/operations/test_sort.py b/py-polars/tests/unit/operations/test_sort.py
@@ -1289,3 +1289,39 @@ def test_sort_by_empty_list_eval_25433() -> None:
     out = df.select(pl.col.a.list.eval(pl.element().sort_by(pl.element())))
     expected = pl.DataFrame({"a": [sorted(some_list), []]})
     assert_frame_equal(out, expected)
+
+
+def test_top_k_bottom_k_correctness_26833() -> None:
+    df = pl.DataFrame({"a": [4, 2, 5, 1, 3]})
+
+    top3 = df.select(pl.col("a").top_k(3))["a"].to_list()
+    assert sorted(top3, reverse=True) == [5, 4, 3]
+
+    bottom3 = df.select(pl.col("a").bottom_k(3))["a"].to_list()
+    assert sorted(bottom3) == [1, 2, 3]
+
+
+def test_top_k_with_nulls_26833() -> None:
+    df = pl.DataFrame({"a": [4, None, 5, 1, None, 3]})
+
+    top3 = df.select(pl.col("a").top_k(3))["a"].to_list()
+    assert sorted([v for v in top3 if v is not None], reverse=True) == [5, 4, 3]
+
+    bottom2 = df.select(pl.col("a").bottom_k(2))["a"].to_list()
+    assert sorted([v for v in bottom2 if v is not None]) == [1, 3]
+
+
+def test_top_k_with_duplicates_26833() -> None:
+    df = pl.DataFrame({"a": [3, 5, 5, 1, 3, 2]})
+
+    top4 = df.select(pl.col("a").top_k(4))["a"].to_list()
+    assert sorted(top4, reverse=True) == [5, 5, 3, 3]
+
+    bottom4 = df.select(pl.col("a").bottom_k(4))["a"].to_list()
+    assert sorted(bottom4) == [1, 2, 3, 3]
+
+
+def test_top_k_larger_than_len_26833() -> None:
+    df = pl.DataFrame({"a": [3, 1, 2]})
+    result = df.select(pl.col("a").top_k(10))["a"].to_list()
+    assert sorted(result, reverse=True) == [3, 2, 1]