@@ -1753,6 +1753,31 @@ struct s_MergeState {
17531753 Py_ssize_t mr_current , mr_e , mr_mask ;
17541754};
17551755
1756+ #define _binarysort_BISECT (L , R ) \
1757+ do { \
1758+ do { \
1759+ M = (L + R) >> 1; \
1760+ IFLT(pivot, a[M]) \
1761+ R = M; \
1762+ else \
1763+ L = M + 1; \
1764+ } while (L < R); \
1765+ assert(L == R); \
1766+ } while (0)
1767+
1768+ #define _binarysort_INSORT (idx , tmp ) \
1769+ do { \
1770+ for (tmp = ok; tmp > idx; --tmp) \
1771+ a[tmp] = a[tmp - 1]; \
1772+ a[idx] = pivot; \
1773+ if (has_values) { \
1774+ pivot = v[ok]; \
1775+ for (tmp = ok; tmp > idx; --tmp) \
1776+ v[tmp] = v[tmp - 1]; \
1777+ v[idx] = pivot; \
1778+ } \
1779+ } while (0)
1780+
17561781/* binarysort is the best method for sorting small arrays: it does few
17571782 compares, but can do data movement quadratic in the number of elements.
17581783 ss->keys is viewed as an array of n kays, a[:n]. a[:ok] is already sorted.
@@ -1786,24 +1811,10 @@ binarysort(MergeState *ms, const sortslice *ss, Py_ssize_t n, Py_ssize_t ok)
17861811 Py_ssize_t aL = 0 ;
17871812 Py_ssize_t aR = ok - 1 ;
17881813 pivot = a [ok ];
1814+
17891815 assert (aL < aR );
1790- do {
1791- M = (aL + aR ) >> 1 ;
1792- IFLT (pivot , a [M ])
1793- aR = M ;
1794- else
1795- aL = M + 1 ;
1796- } while (aL < aR );
1797- assert (aL == aR );
1798- for (M = ok ; M > aL ; -- M )
1799- a [M ] = a [M - 1 ];
1800- a [aL ] = pivot ;
1801- if (has_values ) {
1802- pivot = v [ok ];
1803- for (M = ok ; M > aL ; -- M )
1804- v [M ] = v [M - 1 ];
1805- v [aL ] = pivot ;
1806- }
1816+ _binarysort_BISECT (aL , aR );
1817+ _binarysort_INSORT (aL , M );
18071818 ++ ok ;
18081819
18091820 Py_ssize_t m = ok < 5 ? 11 : ok + 6 ;
@@ -1817,24 +1828,8 @@ binarysort(MergeState *ms, const sortslice *ss, Py_ssize_t n, Py_ssize_t ok)
18171828 pivot = a [ok ];
18181829
18191830 assert (aL < aR );
1820- do {
1821- M = (aL + aR ) >> 1 ;
1822- IFLT (pivot , a [M ])
1823- aR = M ;
1824- else
1825- aL = M + 1 ;
1826- } while (aL < aR );
1827- assert (aL == aR );
1828-
1829- for (M = ok ; M > aL ; -- M )
1830- a [M ] = a [M - 1 ];
1831- a [aL ] = pivot ;
1832- if (has_values ) {
1833- pivot = v [ok ];
1834- for (M = ok ; M > aL ; -- M )
1835- v [M ] = v [M - 1 ];
1836- v [aL ] = pivot ;
1837- }
1831+ _binarysort_BISECT (aL , aR );
1832+ _binarysort_INSORT (aL , M );
18381833
18391834 std += labs (aL - mu );
18401835 std /= 2 ; // EWMA with alpha=0.5
@@ -1850,14 +1845,13 @@ binarysort(MergeState *ms, const sortslice *ss, Py_ssize_t n, Py_ssize_t ok)
18501845 aL = 0 ;
18511846 aR = mu ;
18521847 if (aL < aR ) {
1853- std += !std ;
1854- M = aR - std ;
1848+ M = aR - 1 - std ;
18551849 if (M < aL )
18561850 M = aL ;
18571851 IFLT (pivot , a [M ]) {
18581852 aR = M ;
18591853 if (aL < aR ) {
1860- M = aR - std ;
1854+ M = aR - 1 - std ;
18611855 if (M < aL )
18621856 M = aL ;
18631857 IFLT (pivot , a [M ])
@@ -1904,16 +1898,7 @@ binarysort(MergeState *ms, const sortslice *ss, Py_ssize_t n, Py_ssize_t ok)
19041898 aL = M + 1 ;
19051899 }
19061900 assert (aL == aR );
1907-
1908- for (M = ok ; M > aL ; -- M )
1909- a [M ] = a [M - 1 ];
1910- a [aL ] = pivot ;
1911- if (has_values ) {
1912- pivot = v [ok ];
1913- for (M = ok ; M > aL ; -- M )
1914- v [M ] = v [M - 1 ];
1915- v [aL ] = pivot ;
1916- }
1901+ _binarysort_INSORT (aL , M );
19171902
19181903 std += labs (aL - mu );
19191904 std /= 2 ; // EWMA with alpha=0.5
@@ -2017,15 +2002,8 @@ binarysort(MergeState *ms, const sortslice *ss, Py_ssize_t n, Py_ssize_t ok)
20172002 Caution: using memmove is much slower under MSVC 5; we're not
20182003 usually moving many slots. Years later: under Visual Studio 2022,
20192004 memmove seems just slightly slower than doing it "by hand". */
2020- for (M = ok ; M > L ; -- M )
2021- a [M ] = a [M - 1 ];
2022- a [L ] = pivot ;
2023- if (has_values ) {
2024- pivot = v [ok ];
2025- for (M = ok ; M > L ; -- M )
2026- v [M ] = v [M - 1 ];
2027- v [L ] = pivot ;
2028- }
2005+
2006+ _binarysort_INSORT (L , M );
20292007 }
20302008#endif // pick binary or regular insertion sort
20312009 return 0 ;
0 commit comments