Skip to content

Commit f17d1df

Browse files
LilithHafnerLilith Hafner
authored andcommitted
put back the old QuickSort, PartialQuickSort, and MergeSort algorithms... (#47788)
...as they were in 1.8 and rename the new PartialQuickSort to QuickerSort Also improve the documentation and API for constructing QuickerSort and test the API Co-authored-by: Lilith Hafner <[email protected]> (cherry picked from commit 8cdb17b)
1 parent 95cb3a8 commit f17d1df

File tree

2 files changed

+212
-72
lines changed

2 files changed

+212
-72
lines changed

base/sort.jl

Lines changed: 182 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ issorted(itr;
8686
issorted(itr, ord(lt,by,rev,order))
8787

8888
function partialsort!(v::AbstractVector, k::Union{Integer,OrdinalRange}, o::Ordering)
89-
_sort!(v, _PartialQuickSort(k), o, (;))
89+
_sort!(v, QuickerSort(k), o, (;))
9090
maybeview(v, k)
9191
end
9292

@@ -931,49 +931,40 @@ end
931931

932932

933933
"""
934-
PartialQuickSort(lo::Union{Integer, Missing}, hi::Union{Integer, Missing}, next::Algorithm) <: Algorithm
934+
QuickerSort(next::Algorithm=SMALL_ALGORITHM) <: Algorithm
935+
QuickerSort(lo::Union{Integer, Missing}, hi::Union{Integer, Missing}=lo, next::Algorithm=SMALL_ALGORITHM) <: Algorithm
935936
936-
Indicate that a sorting function should use the partial quick sort algorithm.
937+
Use the `QuickerSort` algorithm with the `next` algorithm as a base case.
937938
938-
Partial quick sort finds and sorts the elements that would end up in positions `lo:hi` using
939-
[`QuickSort`](@ref). It is recursive and uses the `next` algorithm for small chunks
939+
`QuickerSort` is like `QuickSort`, but utilizes scratch space to operate faster and allow
940+
for the possibility of maintaining stability.
941+
942+
If `lo` and `hi` are provided, finds and sorts the elements in the range `lo:hi`, reordering
943+
but not necessarily sorting other elements in the process. If `lo` or `hi` is `missing`, it
944+
is treated as the first or last index of the input, respectively.
945+
946+
`lo` and `hi` may be specified together as an `AbstractUnitRange`.
940947
941948
Characteristics:
942949
* *stable*: preserves the ordering of elements which compare equal
943950
(e.g. "a" and "A" in a sort of letters which ignores case).
944951
* *not in-place* in memory.
945-
* *divide-and-conquer*: sort strategy similar to [`MergeSort`](@ref).
952+
* *divide-and-conquer*: sort strategy similar to [`QuickSort`](@ref).
953+
* *linear runtime* if `length(lo:hi)` is constant
954+
* *quadratic worst case runtime* in pathological cases
955+
(vanishingly rare for non-malicious input)
946956
"""
947-
struct PartialQuickSort{L<:Union{Integer,Missing}, H<:Union{Integer,Missing}, T<:Algorithm} <: Algorithm
957+
struct QuickerSort{L<:Union{Integer,Missing}, H<:Union{Integer,Missing}, T<:Algorithm} <: Algorithm
948958
lo::L
949959
hi::H
950960
next::T
951961
end
952-
PartialQuickSort(k::Integer) = PartialQuickSort(missing, k, SMALL_ALGORITHM)
953-
PartialQuickSort(k::OrdinalRange) = PartialQuickSort(first(k), last(k), SMALL_ALGORITHM)
954-
_PartialQuickSort(k::Integer) = InitialOptimizations(PartialQuickSort(k:k))
955-
_PartialQuickSort(k::OrdinalRange) = InitialOptimizations(PartialQuickSort(k))
956-
957-
"""
958-
QuickSort
959-
960-
Indicate that a sorting function should use the quick sort algorithm.
962+
QuickerSort(next::Algorithm=SMALL_ALGORITHM) = QuickerSort(missing, missing, next)
963+
QuickerSort(lo::Union{Integer, Missing}, hi::Union{Integer, Missing}) = QuickerSort(lo, hi, SMALL_ALGORITHM)
964+
QuickerSort(lo::Union{Integer, Missing}, next::Algorithm=SMALL_ALGORITHM) = QuickerSort(lo, lo, next)
965+
QuickerSort(r::OrdinalRange, next::Algorithm=SMALL_ALGORITHM) = QuickerSort(first(r), last(r), next)
961966

962-
Quick sort picks a pivot element, partitions the array based on the pivot,
963-
and then sorts the elements before and after the pivot recursively.
964-
965-
Characteristics:
966-
* *stable*: preserves the ordering of elements which compare equal
967-
(e.g. "a" and "A" in a sort of letters which ignores case).
968-
* *not in-place* in memory.
969-
* *divide-and-conquer*: sort strategy similar to [`MergeSort`](@ref).
970-
* *good performance* for almost all large collections.
971-
* *quadratic worst case runtime* in pathological cases
972-
(vanishingly rare for non-malicious input)
973-
"""
974-
const QuickSort = PartialQuickSort(missing, missing, SMALL_ALGORITHM)
975-
976-
# select a pivot for QuickSort
967+
# select a pivot for QuickerSort
977968
#
978969
# This method is redefined to rand(lo:hi) in Random.jl
979970
# We can't use rand here because it is not available in Core.Compiler and
@@ -1013,7 +1004,7 @@ function partition!(t::AbstractVector, lo::Integer, hi::Integer, offset::Integer
10131004
pivot, lo-offset
10141005
end
10151006

1016-
function _sort!(v::AbstractVector, a::PartialQuickSort, o::Ordering, kw;
1007+
function _sort!(v::AbstractVector, a::QuickerSort, o::Ordering, kw;
10171008
t=nothing, offset=nothing, swap=false, rev=false)
10181009
@getkw lo hi scratch
10191010

@@ -1029,7 +1020,7 @@ function _sort!(v::AbstractVector, a::PartialQuickSort, o::Ordering, kw;
10291020
@inbounds v[j] = pivot
10301021
swap = !swap
10311022

1032-
# For QuickSort, a.lo === a.hi === missing, so the first two branches get skipped
1023+
# For QuickerSort(), a.lo === a.hi === missing, so the first two branches get skipped
10331024
if !ismissing(a.lo) && j <= a.lo # Skip sorting the lower part
10341025
swap && copyto!(v, lo, t, lo+offset, j-lo)
10351026
rev && reverse!(v, lo, j-1)
@@ -1225,7 +1216,7 @@ the initial optimizations because they can change the input vector's type and or
12251216
make them `UIntMappable`.
12261217
12271218
If the input is not [`UIntMappable`](@ref), then we perform a presorted check and dispatch
1228-
to [`QuickSort`](@ref).
1219+
to [`QuickerSort`](@ref).
12291220
12301221
Otherwise, we dispatch to [`InsertionSort`](@ref) for inputs with `length <= 40` and then
12311222
perform a presorted check ([`CheckSorted`](@ref)).
@@ -1257,7 +1248,7 @@ Consequently, we apply [`RadixSort`](@ref) for any reasonably long inputs that r
12571248
stage.
12581249
12591250
Finally, if the input has length less than 80, we dispatch to [`InsertionSort`](@ref) and
1260-
otherwise we dispatch to [`QuickSort`](@ref).
1251+
otherwise we dispatch to [`QuickerSort`](@ref).
12611252
"""
12621253
const DEFAULT_STABLE = InitialOptimizations(
12631254
IsUIntMappable(
@@ -1267,9 +1258,9 @@ const DEFAULT_STABLE = InitialOptimizations(
12671258
ConsiderCountingSort(
12681259
ConsiderRadixSort(
12691260
Small{80}(
1270-
QuickSort)))))),
1261+
QuickerSort())))))),
12711262
StableCheckSorted(
1272-
QuickSort)))
1263+
QuickerSort())))
12731264
"""
12741265
DEFAULT_UNSTABLE
12751266
@@ -1483,7 +1474,7 @@ function partialsortperm!(ix::AbstractVector{<:Integer}, v::AbstractVector,
14831474
end
14841475

14851476
# do partial quicksort
1486-
_sort!(ix, _PartialQuickSort(k), Perm(ord(lt, by, rev, order), v), (;))
1477+
_sort!(ix, QuickerSort(k), Perm(ord(lt, by, rev, order), v), (;))
14871478

14881479
maybeview(ix, k)
14891480
end
@@ -1863,18 +1854,53 @@ end
18631854

18641855
### Unused constructs for backward compatibility ###
18651856

1866-
struct MergeSortAlg{T <: Algorithm} <: Algorithm
1867-
next::T
1857+
## Old algorithms ##
1858+
1859+
struct QuickSortAlg <: Algorithm end
1860+
struct MergeSortAlg <: Algorithm end
1861+
1862+
"""
1863+
PartialQuickSort{T <: Union{Integer,OrdinalRange}}
1864+
1865+
Indicate that a sorting function should use the partial quick sort
1866+
algorithm. Partial quick sort returns the smallest `k` elements sorted from smallest
1867+
to largest, finding them and sorting them using [`QuickSort`](@ref).
1868+
1869+
Characteristics:
1870+
* *not stable*: does not preserve the ordering of elements which
1871+
compare equal (e.g. "a" and "A" in a sort of letters which
1872+
ignores case).
1873+
* *in-place* in memory.
1874+
* *divide-and-conquer*: sort strategy similar to [`MergeSort`](@ref).
1875+
"""
1876+
struct PartialQuickSort{T <: Union{Integer,OrdinalRange}} <: Algorithm
1877+
k::T
18681878
end
18691879

18701880
"""
1871-
MergeSort
1881+
QuickSort
18721882
1873-
Indicate that a sorting function should use the merge sort algorithm.
1883+
Indicate that a sorting function should use the quick sort
1884+
algorithm, which is *not* stable.
18741885
1875-
Merge sort divides the collection into subcollections and
1876-
repeatedly merges them, sorting each subcollection at each step,
1877-
until the entire collection has been recombined in sorted form.
1886+
Characteristics:
1887+
* *not stable*: does not preserve the ordering of elements which
1888+
compare equal (e.g. "a" and "A" in a sort of letters which
1889+
ignores case).
1890+
* *in-place* in memory.
1891+
* *divide-and-conquer*: sort strategy similar to [`MergeSort`](@ref).
1892+
* *good performance* for large collections.
1893+
"""
1894+
const QuickSort = QuickSortAlg()
1895+
1896+
"""
1897+
MergeSort
1898+
1899+
Indicate that a sorting function should use the merge sort
1900+
algorithm. Merge sort divides the collection into
1901+
subcollections and repeatedly merges them, sorting each
1902+
subcollection at each step, until the entire
1903+
collection has been recombined in sorted form.
18781904
18791905
Characteristics:
18801906
* *stable*: preserves the ordering of elements which compare
@@ -1883,21 +1909,94 @@ Characteristics:
18831909
* *not in-place* in memory.
18841910
* *divide-and-conquer* sort strategy.
18851911
"""
1886-
const MergeSort = MergeSortAlg(SMALL_ALGORITHM)
1912+
const MergeSort = MergeSortAlg()
18871913

1888-
function _sort!(v::AbstractVector, a::MergeSortAlg, o::Ordering, kw; t=nothing, offset=nothing)
1889-
@getkw lo hi scratch
1914+
# selectpivot!
1915+
#
1916+
# Given 3 locations in an array (lo, mi, and hi), sort v[lo], v[mi], v[hi]) and
1917+
# choose the middle value as a pivot
1918+
#
1919+
# Upon return, the pivot is in v[lo], and v[hi] is guaranteed to be
1920+
# greater than the pivot
1921+
1922+
@inline function selectpivot!(v::AbstractVector, lo::Integer, hi::Integer, o::Ordering)
1923+
@inbounds begin
1924+
mi = midpoint(lo, hi)
1925+
1926+
# sort v[mi] <= v[lo] <= v[hi] such that the pivot is immediately in place
1927+
if lt(o, v[lo], v[mi])
1928+
v[mi], v[lo] = v[lo], v[mi]
1929+
end
1930+
1931+
if lt(o, v[hi], v[lo])
1932+
if lt(o, v[hi], v[mi])
1933+
v[hi], v[lo], v[mi] = v[lo], v[mi], v[hi]
1934+
else
1935+
v[hi], v[lo] = v[lo], v[hi]
1936+
end
1937+
end
1938+
1939+
# return the pivot
1940+
return v[lo]
1941+
end
1942+
end
1943+
1944+
# partition!
1945+
#
1946+
# select a pivot, and partition v according to the pivot
1947+
1948+
function partition!(v::AbstractVector, lo::Integer, hi::Integer, o::Ordering)
1949+
pivot = selectpivot!(v, lo, hi, o)
1950+
# pivot == v[lo], v[hi] > pivot
1951+
i, j = lo, hi
1952+
@inbounds while true
1953+
i += 1; j -= 1
1954+
while lt(o, v[i], pivot); i += 1; end;
1955+
while lt(o, pivot, v[j]); j -= 1; end;
1956+
i >= j && break
1957+
v[i], v[j] = v[j], v[i]
1958+
end
1959+
v[j], v[lo] = pivot, v[j]
1960+
1961+
# v[j] == pivot
1962+
# v[k] >= pivot for k > j
1963+
# v[i] <= pivot for i < j
1964+
return j
1965+
end
1966+
1967+
function sort!(v::AbstractVector, lo::Integer, hi::Integer, a::QuickSortAlg, o::Ordering)
1968+
@inbounds while lo < hi
1969+
hi-lo <= SMALL_THRESHOLD && return sort!(v, lo, hi, SMALL_ALGORITHM, o)
1970+
j = partition!(v, lo, hi, o)
1971+
if j-lo < hi-j
1972+
# recurse on the smaller chunk
1973+
# this is necessary to preserve O(log(n))
1974+
# stack space in the worst case (rather than O(n))
1975+
lo < (j-1) && sort!(v, lo, j-1, a, o)
1976+
lo = j+1
1977+
else
1978+
j+1 < hi && sort!(v, j+1, hi, a, o)
1979+
hi = j-1
1980+
end
1981+
end
1982+
return v
1983+
end
1984+
1985+
sort!(v::AbstractVector{T}, lo::Integer, hi::Integer, a::MergeSortAlg, o::Ordering, t0::Vector{T}) where T =
1986+
invoke(sort!, Tuple{typeof.((v, lo, hi, a, o))..., AbstractVector{T}}, v, lo, hi, a, o, t0) # For disambiguation
1987+
function sort!(v::AbstractVector{T}, lo::Integer, hi::Integer, a::MergeSortAlg, o::Ordering,
1988+
t0::Union{AbstractVector{T}, Nothing}=nothing) where T
18901989
@inbounds if lo < hi
1891-
hi-lo <= SMALL_THRESHOLD && return _sort!(v, a.next, o, kw)
1990+
hi-lo <= SMALL_THRESHOLD && return sort!(v, lo, hi, SMALL_ALGORITHM, o)
18921991

18931992
m = midpoint(lo, hi)
18941993

1895-
if t === nothing
1896-
scratch, t = make_scratch(scratch, eltype(v), m-lo+1)
1897-
end
1994+
t = t0 === nothing ? similar(v, m-lo+1) : t0
1995+
length(t) < m-lo+1 && resize!(t, m-lo+1)
1996+
Base.require_one_based_indexing(t)
18981997

1899-
_sort!(v, a, o, (;kw..., hi=m, scratch); t, offset)
1900-
_sort!(v, a, o, (;kw..., lo=m+1, scratch); t, offset)
1998+
sort!(v, lo, m, a, o, t)
1999+
sort!(v, m+1, hi, a, o, t)
19012000

19022001
i, j = 1, lo
19032002
while j <= m
@@ -1924,9 +2023,37 @@ function _sort!(v::AbstractVector, a::MergeSortAlg, o::Ordering, kw; t=nothing,
19242023
end
19252024
end
19262025

1927-
scratch
2026+
return v
2027+
end
2028+
2029+
function sort!(v::AbstractVector, lo::Integer, hi::Integer, a::PartialQuickSort,
2030+
o::Ordering)
2031+
@inbounds while lo < hi
2032+
hi-lo <= SMALL_THRESHOLD && return sort!(v, lo, hi, SMALL_ALGORITHM, o)
2033+
j = partition!(v, lo, hi, o)
2034+
2035+
if j <= first(a.k)
2036+
lo = j+1
2037+
elseif j >= last(a.k)
2038+
hi = j-1
2039+
else
2040+
# recurse on the smaller chunk
2041+
# this is necessary to preserve O(log(n))
2042+
# stack space in the worst case (rather than O(n))
2043+
if j-lo < hi-j
2044+
lo < (j-1) && sort!(v, lo, j-1, a, o)
2045+
lo = j+1
2046+
else
2047+
hi > (j+1) && sort!(v, j+1, hi, a, o)
2048+
hi = j-1
2049+
end
2050+
end
2051+
end
2052+
return v
19282053
end
19292054

2055+
## Old extensibility mechanisms ##
2056+
19302057
# Support 3-, 5-, and 6-argument versions of sort! for calling into the internals in the old way
19312058
sort!(v::AbstractVector, a::Algorithm, o::Ordering) = sort!(v, firstindex(v), lastindex(v), a, o)
19322059
function sort!(v::AbstractVector, lo::Integer, hi::Integer, a::Algorithm, o::Ordering)
@@ -1952,8 +2079,4 @@ function _sort!(v::AbstractVector, a::Algorithm, o::Ordering, kw)
19522079
end
19532080
end
19542081

1955-
# Keep old internal types so that people can keep dispatching with
1956-
# sort!(::AbstractVector, ::Integer, ::Integer, ::Base.QuickSortAlg, ::Ordering) = ...
1957-
const QuickSortAlg = typeof(QuickSort)
1958-
19592082
end # module Sort

0 commit comments

Comments
 (0)