Skip to content

Commit ebe5aba

Browse files
feat(collection): Replace quickSort with pdqsort for performance and robustness
1 parent b59ecf4 commit ebe5aba

File tree

1 file changed

+175
-53
lines changed

1 file changed

+175
-53
lines changed

pkgs/collection/lib/src/algorithms.dart

Lines changed: 175 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
/// A selection of data manipulation algorithms.
66
library;
77

8-
import 'dart:math' show Random;
8+
import 'dart:math' show Random, ln2, log;
99

1010
import 'utils.dart';
1111

@@ -482,29 +482,42 @@ void _merge<E, K>(
482482
);
483483
}
484484

485-
/// Sort [elements] using a quick-sort algorithm.
485+
// ---------------------------------------------------------------------------
486+
// QuickSort based on Pattern-defeating Quicksort (pdqsort).
487+
// ---------------------------------------------------------------------------
488+
489+
/// Sorts a list between [start] (inclusive) and [end] (exclusive).
486490
///
487-
/// The elements are compared using [compare] on the elements.
488-
/// If [start] and [end] are provided, only that range is sorted.
491+
/// The sorting algorithm is a Pattern-defeating Quicksort (pdqsort), a
492+
/// hybrid of Quicksort, Heapsort, and Insertion Sort.
493+
/// It is not stable, but is typically very fast.
489494
///
490-
/// Uses insertion sort for smaller sublists.
495+
/// This implementation is highly efficient for common data patterns
496+
/// (such as sorted, reverse-sorted, or with few unique values) and has a
497+
/// guaranteed worst-case time complexity of O(n*log(n)).
498+
///
499+
/// For a stable sort, use [mergeSort].
491500
void quickSort<E>(
492501
List<E> elements,
493502
int Function(E a, E b) compare, [
494503
int start = 0,
495504
int? end,
496505
]) {
497-
end = RangeError.checkValidRange(start, end, elements.length);
498-
_quickSort<E, E>(elements, identity, compare, Random(), start, end);
506+
quickSortBy<E, E>(elements, identity<E>, compare, start, end);
499507
}
500508

501-
/// Sort [list] using a quick-sort algorithm.
509+
/// Sorts a list between [start] (inclusive) and [end] (exclusive) by key.
502510
///
503-
/// The elements are compared using [compare] on the value provided by [keyOf]
504-
/// on the element.
505-
/// If [start] and [end] are provided, only that range is sorted.
511+
/// The sorting algorithm is a Pattern-defeating Quicksort (pdqsort), a
512+
/// hybrid of Quicksort, Heapsort, and Insertion Sort.
513+
/// It is not stable, but is typically very fast.
506514
///
507-
/// Uses insertion sort for smaller sublists.
515+
/// This implementation is highly efficient for common data patterns
516+
/// (such as sorted, reverse-sorted, or with few unique values) and has a
517+
/// guaranteed worst-case time complexity of O(n*log(n)).
518+
///
519+
/// Elements are ordered by the [compare] function applied to the result of
520+
/// the [keyOf] function. For a stable sort, use [mergeSortBy].
508521
void quickSortBy<E, K>(
509522
List<E> list,
510523
K Function(E element) keyOf,
@@ -513,53 +526,162 @@ void quickSortBy<E, K>(
513526
int? end,
514527
]) {
515528
end = RangeError.checkValidRange(start, end, list.length);
516-
_quickSort(list, keyOf, compare, Random(), start, end);
529+
final length = end - start;
530+
if (length < 2) return;
531+
_pdqSortByImpl(list, keyOf, compare, start, end, _log2(length));
517532
}
518533

519-
void _quickSort<E, K>(
520-
List<E> list,
521-
K Function(E element) keyOf,
522-
int Function(K a, K b) compare,
523-
Random random,
524-
int start,
525-
int end,
526-
) {
527-
const minQuickSortLength = 24;
528-
var length = end - start;
529-
while (length >= minQuickSortLength) {
530-
var pivotIndex = random.nextInt(length) + start;
531-
var pivot = list[pivotIndex];
532-
var pivotKey = keyOf(pivot);
533-
var endSmaller = start;
534-
var startGreater = end;
535-
var startPivots = end - 1;
536-
list[pivotIndex] = list[startPivots];
537-
list[startPivots] = pivot;
538-
while (endSmaller < startPivots) {
539-
var current = list[endSmaller];
540-
var relation = compare(keyOf(current), pivotKey);
541-
if (relation < 0) {
542-
endSmaller++;
534+
/// Minimum list size below which pdqsort uses insertion sort.
535+
const int _pdqInsertionSortThreshold = 24;
536+
537+
/// Computes the base-2 logarithm of [n].
538+
int _log2(int n) => n == 0 ? 0 : (log(n) / ln2).floor();
539+
540+
/// Swaps the elements at positions [i] and [j] in [elements].
541+
void _pdqSwap<E>(List<E> elements, int i, int j) {
542+
final temp = elements[i];
543+
elements[i] = elements[j];
544+
elements[j] = temp;
545+
}
546+
547+
/// A simple, non-binary insertion sort for the base case of pdqsort.
548+
void _pdqInsertionSort<E, K>(List<E> elements, K Function(E) keyOf,
549+
int Function(K, K) compare, int start, int end) {
550+
for (var i = start + 1; i < end; i++) {
551+
final current = elements[i];
552+
final key = keyOf(current);
553+
var j = i - 1;
554+
while (j >= start && compare(keyOf(elements[j]), key) > 0) {
555+
elements[j + 1] = elements[j];
556+
j--;
557+
}
558+
elements[j + 1] = current;
559+
}
560+
}
561+
562+
/// Heapsort implementation for the fallback case of pdqsort.
563+
void _pdqHeapSort<E, K>(List<E> elements, K Function(E) keyOf,
564+
int Function(K, K) compare, int start, int end) {
565+
final n = end - start;
566+
for (var i = n ~/ 2 - 1; i >= 0; i--) {
567+
_pdqSiftDown(elements, keyOf, compare, i, n, start);
568+
}
569+
for (var i = n - 1; i > 0; i--) {
570+
_pdqSwap(elements, start, start + i);
571+
_pdqSiftDown(elements, keyOf, compare, 0, i, start);
572+
}
573+
}
574+
575+
/// Sift-down operation for the heapsort fallback.
576+
void _pdqSiftDown<E, K>(List<E> elements, K Function(E) keyOf,
577+
int Function(K, K) compare, int i, int n, int start) {
578+
var root = i;
579+
while (true) {
580+
final left = 2 * root + 1;
581+
final right = 2 * root + 2;
582+
var largest = root;
583+
584+
if (left < n &&
585+
compare(keyOf(elements[start + largest]),
586+
keyOf(elements[start + left])) <
587+
0) {
588+
largest = left;
589+
}
590+
if (right < n &&
591+
compare(keyOf(elements[start + largest]),
592+
keyOf(elements[start + right])) <
593+
0) {
594+
largest = right;
595+
}
596+
if (largest == root) {
597+
break;
598+
}
599+
_pdqSwap(elements, start + root, start + largest);
600+
root = largest;
601+
}
602+
}
603+
604+
/// Sorts three elements at indices [a], [b], and [c].
605+
void _pdqSort3<E, K>(List<E> elements, K Function(E) keyOf,
606+
int Function(K, K) compare, int a, int b, int c) {
607+
if (compare(keyOf(elements[a]), keyOf(elements[b])) > 0) {
608+
_pdqSwap(elements, a, b);
609+
}
610+
if (compare(keyOf(elements[b]), keyOf(elements[c])) > 0) {
611+
_pdqSwap(elements, b, c);
612+
if (compare(keyOf(elements[a]), keyOf(elements[b])) > 0) {
613+
_pdqSwap(elements, a, b);
614+
}
615+
}
616+
}
617+
618+
/// The core implementation of Pattern-defeating Quicksort.
619+
///
620+
/// [badAllowed] tracks how many bad pivot selections are allowed before
621+
/// falling back to heap sort.
622+
void _pdqSortByImpl<E, K>(List<E> elements, K Function(E) keyOf,
623+
int Function(K, K) compare, int start, int end, int badAllowed) {
624+
while (true) {
625+
final size = end - start;
626+
if (size < _pdqInsertionSortThreshold) {
627+
_pdqInsertionSort(elements, keyOf, compare, start, end);
628+
return;
629+
}
630+
631+
if (badAllowed == 0) {
632+
_pdqHeapSort(elements, keyOf, compare, start, end);
633+
return;
634+
}
635+
636+
final mid = start + size ~/ 2;
637+
if (size > 80) {
638+
// Ninther pivot selection for large arrays.
639+
final s = size ~/ 8;
640+
_pdqSort3(elements, keyOf, compare, start, start + s, start + 2 * s);
641+
_pdqSort3(elements, keyOf, compare, mid - s, mid, mid + s);
642+
_pdqSort3(
643+
elements, keyOf, compare, end - 1 - 2 * s, end - 1 - s, end - 1);
644+
_pdqSort3(elements, keyOf, compare, start + s, mid, end - 1 - s);
645+
} else {
646+
// Median-of-three for smaller arrays.
647+
_pdqSort3(elements, keyOf, compare, start, mid, end - 1);
648+
}
649+
650+
// 3-Way Partitioning (Dutch National Flag).
651+
_pdqSwap(elements, start, mid);
652+
final pivotKey = keyOf(elements[start]);
653+
654+
var less = start;
655+
var equal = start;
656+
var greater = end;
657+
658+
while (equal < greater) {
659+
var comparison = compare(keyOf(elements[equal]), pivotKey);
660+
if (comparison < 0) {
661+
_pdqSwap(elements, less++, equal++);
662+
} else if (comparison > 0) {
663+
greater--;
664+
_pdqSwap(elements, equal, greater);
543665
} else {
544-
startPivots--;
545-
var currentTarget = startPivots;
546-
list[endSmaller] = list[startPivots];
547-
if (relation > 0) {
548-
startGreater--;
549-
currentTarget = startGreater;
550-
list[startPivots] = list[startGreater];
551-
}
552-
list[currentTarget] = current;
666+
equal++;
553667
}
554668
}
555-
if (endSmaller - start < end - startGreater) {
556-
_quickSort(list, keyOf, compare, random, start, endSmaller);
557-
start = startGreater;
669+
670+
final leftSize = less - start;
671+
final rightSize = end - greater;
672+
673+
// Detect highly unbalanced partitions and decrement badAllowed.
674+
if (leftSize < size ~/ 8 || rightSize < size ~/ 8) {
675+
badAllowed--;
676+
}
677+
678+
// Recurse on the smaller partition first to keep stack depth low.
679+
if (leftSize < rightSize) {
680+
_pdqSortByImpl(elements, keyOf, compare, start, less, badAllowed);
681+
start = greater; // Tail-call optimization on the larger partition
558682
} else {
559-
_quickSort(list, keyOf, compare, random, startGreater, end);
560-
end = endSmaller;
683+
_pdqSortByImpl(elements, keyOf, compare, greater, end, badAllowed);
684+
end = less; // Tail-call optimization on the larger partition
561685
}
562-
length = end - start;
563686
}
564-
_movingInsertionSort<E, K>(list, keyOf, compare, start, end, list, start);
565687
}

0 commit comments

Comments
 (0)