Skip to content

Commit 3de148f

Browse files
committed
docs
1 parent ef3a870 commit 3de148f

File tree

3 files changed

+27
-7
lines changed

3 files changed

+27
-7
lines changed

Misc/ACKS

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -685,6 +685,7 @@ Eddy De Greef
685685
Duane Griffin
686686
Grant Griffin
687687
Andrea Griffini
688+
Dominykas Grigonis
688689
Semyon Grigoryev
689690
Duncan Grisby
690691
Olivier Grisel

Objects/listobject.c

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1801,7 +1801,8 @@ binarysort(MergeState *ms, const sortslice *ss, Py_ssize_t n, Py_ssize_t ok)
18011801
++ok;
18021802

18031803
#if 1 // Adaptivity with post `count_run` optimization of 1st pivot
1804-
// 1. Known: a[ok] < a[ok - 1], as called after `count_run`
1804+
/* 1. Known: a[ok] < a[ok - 1], as called after `count_run`
1805+
This just insorts first element taking that into account */
18051806
if (ok >= n)
18061807
return 0;
18071808
Py_ssize_t aL = 0;
@@ -1815,7 +1816,11 @@ binarysort(MergeState *ms, const sortslice *ss, Py_ssize_t n, Py_ssize_t ok)
18151816

18161817
Py_ssize_t m = ok < 5 ? 11 : ok + 6;
18171818
if (m < n) {
1818-
// 2. Small non-adaptive run to acquire good `std` estimate
1819+
/* 2. Small non-adaptive run to acquire good `std` estimate
1820+
Number of iterations (m) is chosen heuristically
1821+
and is subject to further calibration if needed.
1822+
It does minimum 6 iterations and up to 10 if pre-sorted part
1823+
is small as estimates of small integers are less reliable. */
18191824
Py_ssize_t mu = aL;
18201825
Py_ssize_t std = ok >> 1;
18211826
for (; ok < m; ++ok) {
@@ -1832,7 +1837,15 @@ binarysort(MergeState *ms, const sortslice *ss, Py_ssize_t n, Py_ssize_t ok)
18321837
mu = aL;
18331838
}
18341839

1835-
// 3. Adaptive routine while `std` is small enough
1840+
/* 3. Adaptive routine while `std` is small enough
1841+
Take the last insertion point as the first midpoint
1842+
and do 2 subsequent step of size `std` trying to capture
1843+
the range into which new value falls in, potentially
1844+
locating insertion point faster than standard `binarysort`.
1845+
Continue until `std` (step size) is lower than
1846+
(size of sorted part) / 4.
1847+
If estimate from (2) is initially not small enough,
1848+
this does not execute a single time. */
18361849
Py_ssize_t std_max = ok >> 2;
18371850
for (; ok < n && std <= std_max; ++ok) {
18381851
pivot = a[ok];
@@ -1885,7 +1898,7 @@ binarysort(MergeState *ms, const sortslice *ss, Py_ssize_t n, Py_ssize_t ok)
18851898
}
18861899
}
18871900
}
1888-
// Binary Insertion
1901+
// Simple Binary Insertion Sort
18891902
while (aL < aR) {
18901903
M = (aL + aR) >> 1;
18911904
IFLT(pivot, a[M])
@@ -1897,13 +1910,13 @@ binarysort(MergeState *ms, const sortslice *ss, Py_ssize_t n, Py_ssize_t ok)
18971910
_binarysort_INSORT(aL, M)
18981911

18991912
std += labs(aL - mu);
1900-
std /= 2; // EWMA with alpha=0.5
1913+
std /= 2; // EWMA with alpha=0.5
19011914
mu = aL;
1902-
std_max += !(ok % 4);
1915+
std_max += !(ok % 4); // Keep approximately equal to: ok / 4
19031916
}
19041917
}
19051918

1906-
// 4. Finish off with non-adaptive sort
1919+
// 4. Finish with non-adaptive sort
19071920
#endif // End of adaptivity
19081921

19091922

Objects/listsort.txt

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -839,6 +839,12 @@ reasonable minrun values.
839839
Additionally, "binary insertion sort" has implemented adaptivity procedure,
840840
which reduces the number of comparisons for cases where data is already
841841
sorted to high degree in either forward or reversed order.
842+
While "binary insertion sort" ensures optimal number of comparisons
843+
it looses best case of textbook insertion sort when data is highly sorted
844+
in correct order. Adaptivity addition brings that back and more at small cost.
845+
It adapts to any data where position of next element is close to the one of
846+
last element. Thus, it adapts to cases where it is highly sorted in correct
847+
order, reverse order or elements are being funneled into some mid-point.
842848

843849

844850
LEFT OR RIGHT

0 commit comments

Comments
 (0)