From 54573bb5fe8588897ce9a667efeca49f7b60ee2f Mon Sep 17 00:00:00 2001 From: jazullo Date: Mon, 24 Mar 2025 06:15:33 +0000 Subject: [PATCH 1/7] Use allocation proposal in mergesort --- src/Array.hs | 42 ++++++++++++++++++++++++++++++++++++++++++ src/DpsMergeSort4.hs | 6 +++--- 2 files changed, 45 insertions(+), 3 deletions(-) diff --git a/src/Array.hs b/src/Array.hs index 562c4e6..f758f30 100644 --- a/src/Array.hs +++ b/src/Array.hs @@ -1,6 +1,7 @@ {-# LANGUAGE CPP #-} {-# LANGUAGE BangPatterns #-} {-# LANGUAGE DeriveFunctor #-} +{-# LANGUAGE LiberalTypeSynonyms #-} -- {-# LANGUAGE Strict #-} @@ -15,6 +16,9 @@ module Array -- * Construction and querying , alloc, make, generate, generate_par, generate_par_m, makeArray + , flattenCallback, makeCallback, biJoinAllocAffine, allocScratchAffine + , biJoinAlloc, allocScratch + , copy, copy_par, copy_par_m , size, get, set, slice, append , splitAt @@ -98,6 +102,44 @@ makeArray = make free :: HasPrim a => Array a -. () free = Unsafe.toLinear (\_ -> ()) +{-# INLINE flattenCallback #-} +flattenCallback :: (forall c. (Array b -. Ur c) -. Array a -. Ur c) -. Array a -. Array b +flattenCallback f arr = unur (f ur arr) + +{-# INLINE makeCallback #-} +makeCallback :: (Array b -. Array a) -. (Array a -. Ur c) -. Array b -. Ur c +makeCallback direct k arr = k (direct arr) + +{-# INLINE biJoinAllocAffine #-} +biJoinAllocAffine :: HasPrim tmps => Int -> tmps -> (Array tmps -. Array srcs -. Array dsts) -> Array srcs -. Array dsts +biJoinAllocAffine i a f = flattenCallback (\cont src -> alloc i a (\tmp -> makeCallback (f tmp) cont src)) + +-- efficient implementation of above +{-# INLINE allocScratchAffine #-} +allocScratchAffine :: HasPrim tmps => Int -> tmps -> (Array srcs -. Array tmps -. Array dsts) -> Array srcs -. Array dsts +allocScratchAffine i a f arr = f arr (makeArray i a) + +{-# INLINE biJoinAlloc #-} +biJoinAlloc :: HasPrim tmps => Int -> tmps -> (Array tmps -. Array srcs -. (Array dsts, Array tmpdsts)) -> Array srcs -. Array dsts +biJoinAlloc i a f = + let + g tmp src = + let + !(dst, tmp') = f tmp src + in + case free tmp' of !() -> dst + in + flattenCallback (\cont src -> alloc i a (\tmp -> makeCallback (g tmp) cont src)) + +-- efficient implementation of above +{-# INLINE allocScratch #-} +allocScratch :: HasPrim tmps => Int -> tmps -> (Array srcs -. Array tmps -. (Array dsts, Array tmpdsts)) -> Array srcs -. Array dsts +allocScratch i a f arr = + let + !(dst, tmp) = f arr (makeArray i a) + in case free tmp of !() -> dst + + -------------------------------------------------------------------------------- -- Parallel operations -------------------------------------------------------------------------------- diff --git a/src/DpsMergeSort4.hs b/src/DpsMergeSort4.hs index 129d49f..22627b2 100644 --- a/src/DpsMergeSort4.hs +++ b/src/DpsMergeSort4.hs @@ -77,11 +77,11 @@ msortInplace src tmp = go src tmp where msort' :: (Show a, HasPrimOrd a) => a -> A.Array a -. A.Array a msort' anyVal src = let !(Ur len, src') = A.size2 src - !(src'', _tmp) = msortInplace src' (A.make len anyVal) in - case A.free _tmp of !() -> src'' + !src'' = A.allocScratch len anyVal msortInplace src' in + src'' {-# INLINE msort' #-} --- finally, the top-level merge sort function -- TODO: use A.get2/A.size2 for linearity +-- finally, the top-level merge sort function {-@ msort :: { xs:(A.Array a) | left xs == 0 && right xs == size xs } -> { ys:_ | toBag xs == toBag ys && isSorted' ys && A.size xs == A.size ys && token xs == token ys } @-} From bcac1ee2ce6118fc8457734f68b104d6c6fa7b2f Mon Sep 17 00:00:00 2001 From: jazullo Date: Mon, 24 Mar 2025 06:34:15 +0000 Subject: [PATCH 2/7] Update piecewise fallback with mergesort optimizations --- src/PiecewiseFallbackSort.hs | 70 ++++++++++++++++++++++-------------- 1 file changed, 43 insertions(+), 27 deletions(-) diff --git a/src/PiecewiseFallbackSort.hs b/src/PiecewiseFallbackSort.hs index 36a0b0e..01bb25b 100644 --- a/src/PiecewiseFallbackSort.hs +++ b/src/PiecewiseFallbackSort.hs @@ -1,6 +1,8 @@ -- Based on DpsMergeSort4.hs and Insertion.hs with analysis {-# LANGUAGE CPP #-} +{-# OPTIONS_GHC -Wno-name-shadowing #-} + module PiecewiseFallbackSort where import qualified Language.Haskell.Liquid.Bag as B @@ -31,33 +33,45 @@ import Array.List as A left ts == left ys && right ts == right ys }> / [A.size xs] @-} msortInplace :: (Show a, HasPrimOrd a) => Int -> A.Array a -. A.Array a -. (A.Array a, A.Array a) -msortInplace cutoff src tmp = -- cutoff > 0, though it may not be necessary to show sorting correctness - let !(Ur len, src') = A.size2 src in - if len <= 1 then (src', tmp) -- MHB added - else if len <= cutoff then (isort_top' src', tmp) - else - let !(srcA, srcB) = splitMid src' - !(tmpA, tmpB) = splitMid tmp - !(src1, src2) = splitMid srcA - !(src3, src4) = splitMid srcB - !(tmp1, tmp2) = splitMid tmpA - !(tmp3, tmp4) = splitMid tmpB - !(src1', tmp1') = msortInplace cutoff src1 tmp1 - !(src2', tmp2') = msortInplace cutoff src2 tmp2 - !(src3', tmp3') = msortInplace cutoff src3 tmp3 - !(src4', tmp4') = msortInplace cutoff src4 tmp4 - tmpA' = A.append tmp1' tmp2' - tmpB' = A.append tmp3' tmp4' - !(srcA'', tmpA'') = merge src1' src2' tmpA' - !(srcB'', tmpB'') = merge src3' src4' tmpB' - src'' = A.append srcA'' srcB'' - !(tmp''', src''') = merge tmpA'' tmpB'' src'' - in (src''', tmp''') ? lem_toBag_splitMid src - ? lem_toBag_splitMid tmp - ? lem_toBag_splitMid srcA - ? lem_toBag_splitMid srcB - ? lem_toBag_splitMid tmpA - ? lem_toBag_splitMid tmpB +msortInplace cutoff src tmp = go cutoff src tmp where + {-@ go :: n:Int -> xs:Array a + -> { ys:(Array a ) | A.size ys == A.size xs && left xs == left ys && + right xs == right ys } + -> (Array a, Array a)<{\zs ts -> toBag xs == toBag zs && isSorted' zs && + token xs == token zs && token ys == token ts && + A.size xs == A.size zs && A.size ys == A.size ts && + left zs == left xs && right zs == right xs && + left ts == left ys && right ts == right ys }> + / [A.size xs] @-} + go :: (Show a, HasPrimOrd a) => Int -> A.Array a -. A.Array a -. (A.Array a, A.Array a) + go cutoff src tmp = + let !(Ur len, src') = A.size2 src in + if len <= 1 then (src', tmp) -- MHB added + else if len <= cutoff then (isort_top' src', tmp) + else + let !(srcA, srcB) = splitMid src' + !(tmpA, tmpB) = splitMid tmp + !(src1, src2) = splitMid srcA + !(src3, src4) = splitMid srcB + !(tmp1, tmp2) = splitMid tmpA + !(tmp3, tmp4) = splitMid tmpB + !(src1', tmp1') = msortInplace cutoff src1 tmp1 + !(src2', tmp2') = msortInplace cutoff src2 tmp2 + !(src3', tmp3') = msortInplace cutoff src3 tmp3 + !(src4', tmp4') = msortInplace cutoff src4 tmp4 + tmpA' = A.append tmp1' tmp2' + tmpB' = A.append tmp3' tmp4' + !(srcA'', tmpA'') = merge src1' src2' tmpA' + !(srcB'', tmpB'') = merge src3' src4' tmpB' + src'' = A.append srcA'' srcB'' + !(tmp''', src''') = merge tmpA'' tmpB'' src'' + in (src''', tmp''') ? lem_toBag_splitMid src + ? lem_toBag_splitMid tmp + ? lem_toBag_splitMid srcA + ? lem_toBag_splitMid srcB + ? lem_toBag_splitMid tmpA + ? lem_toBag_splitMid tmpB +{-# INLINE msortInplace #-} {-@ pfsort' :: y:a -> { xs:(Array a) | A.size xs > 0 && left xs == 0 && right xs == size xs && y == A.get xs 0 } @@ -72,6 +86,7 @@ pfsort' anyVal src = then truncate((18820.2738 / (exp ((log (fromIntegral len)) * 0.5) )) :: Float) else 28) src' (A.makeArray len anyVal) in case A.free _tmp of !() -> src'' +{-# INLINE pfsort' #-} {-@ pfsort :: { xs:(A.Array a) | left xs == 0 && right xs == size xs } -> { ys:_ | toBag xs == toBag ys && isSorted' ys && @@ -81,3 +96,4 @@ pfsort src = let !(Ur len, src') = A.size2 src in if len == 0 then src' else let !(Ur x0, src'') = A.get2 0 src' in pfsort' x0 src'' +{-# INLINABLE pfsort #-} From ec278d205e5174b72aedab64892546bbb1d608fd Mon Sep 17 00:00:00 2001 From: jazullo Date: Mon, 24 Mar 2025 09:04:02 +0000 Subject: [PATCH 3/7] Add new plotting scripts and an explainer --- .../criterionmethodology.py | 81 +++++++++++++++++++ .../criterion-drop-in-replacement/readme | 5 ++ .../sweep_seq.py | 51 ++++++++++++ .../c-sorting-benchmarks/readme | 0 .../sort_insertion_out.csv | 0 .../sort_merge_seq_out.csv | 0 .../c-sorting-benchmarks/sort_quick_out.csv | 0 .../scripts/{ => old-criterion}/plot.py | 0 .../plot_relative_speedup.py | 0 benchmarks/scripts/{ => old-criterion}/readme | 0 .../scripts/{ => old-criterion}/sweep_seq.py | 0 11 files changed, 137 insertions(+) create mode 100644 benchmarks/scripts/criterion-drop-in-replacement/criterionmethodology.py create mode 100644 benchmarks/scripts/criterion-drop-in-replacement/readme create mode 100644 benchmarks/scripts/criterion-drop-in-replacement/sweep_seq.py rename benchmarks/scripts/{ => old-criterion}/c-sorting-benchmarks/readme (100%) rename benchmarks/scripts/{ => old-criterion}/c-sorting-benchmarks/sort_insertion_out.csv (100%) rename benchmarks/scripts/{ => old-criterion}/c-sorting-benchmarks/sort_merge_seq_out.csv (100%) rename benchmarks/scripts/{ => old-criterion}/c-sorting-benchmarks/sort_quick_out.csv (100%) rename benchmarks/scripts/{ => old-criterion}/plot.py (100%) rename benchmarks/scripts/{ => old-criterion}/plot_relative_speedup.py (100%) rename benchmarks/scripts/{ => old-criterion}/readme (100%) rename benchmarks/scripts/{ => old-criterion}/sweep_seq.py (100%) diff --git a/benchmarks/scripts/criterion-drop-in-replacement/criterionmethodology.py b/benchmarks/scripts/criterion-drop-in-replacement/criterionmethodology.py new file mode 100644 index 0000000..28f8ecc --- /dev/null +++ b/benchmarks/scripts/criterion-drop-in-replacement/criterionmethodology.py @@ -0,0 +1,81 @@ +#!/usr/bin/env python3 +import numpy as np +from sys import argv +import subprocess +from time import time +import math + +from matplotlib import pyplot as plt + +MAKE_PLOT = False + +def linear_regression_with_std(x, y): + x = np.array(x) + y = np.array(y) + x_mean = np.mean(x) + y_mean = np.mean(y) + numerator = np.sum((x - x_mean) * (y - y_mean)) + denominator = np.sum((x - x_mean) ** 2) + slope = numerator / denominator + intercept = y_mean - slope * x_mean + y_pred = slope * x + intercept + residuals = y - y_pred + std_dev = np.std(residuals) + return slope, intercept, std_dev + +def do_bench(cliargs, iters): + print([cliargs[1], str(iters)] + cliargs[2:]) + out = str(subprocess.check_output([cliargs[1], str(iters)] + cliargs[2:])) + s1 = out[out.find("SELFTIMED")+11:] + s2 = float(s1[:s1.find("\n")-4]) + selftimed = s2 + + b1 = out[out.find("BATCHTIME")+11:] + b2 = float(b1[:b1.find("SELFTIMED")-2]) + batchtime = b2 + + print(f"ITERS: {iters}, BATCHTIME: {batchtime}, SELFTIMED: {selftimed}") + return batchtime + +def converge(cliargs): + xs = [] + ys = [] + iters = 1 + t = time() + while len(xs) == 0: + st = do_bench(cliargs, iters) + if st * iters < 0.65: + iters *= 2 + continue + xs.append(iters) + ys.append(st) + for _ in range(2): + if time() - t < 3.5: + iters = int(math.trunc(float(iters) * 1.2) + 1) + else: + iters += 1 + iters // 20 + st = do_bench(cliargs, iters) + xs.append(iters) + ys.append(st) + while time() - t < 3.5: + if time() - t < 3.5: + iters = int(math.trunc(float(iters) * 1.2) + 1) + else: + iters += 1 + iters // 20 + st = do_bench(cliargs, iters) + xs.append(iters) + ys.append(st) + m, b, sigma = linear_regression_with_std(xs, ys) + print(f"Slope (Mean): {m}, Intercept (Overhead): {b}, Stdev: {sigma}") + p, lnc, lngsd = linear_regression_with_std([math.log(x) for x in xs], [math.log(y) for y in ys]) + c, gsd = math.exp(lnc), math.exp(lngsd) + print(f"Power (Distortion): {p}, Factor (Geomean) {c}, GeoStdev {gsd}") + if MAKE_PLOT: + plt.plot(xs, ys, 'rx') + plt.plot([xs[0], xs[-1]], [m*xs[0]+b, m*xs[-1]+b], color="blue") + plt.plot(xs, [c*x**p for x in xs], color="green") + plt.savefig("plot.png") + return m, sigma, c, gsd + +if __name__ == "__main__": + print(converge(argv)) diff --git a/benchmarks/scripts/criterion-drop-in-replacement/readme b/benchmarks/scripts/criterion-drop-in-replacement/readme new file mode 100644 index 0000000..3623cc9 --- /dev/null +++ b/benchmarks/scripts/criterion-drop-in-replacement/readme @@ -0,0 +1,5 @@ +The script `criterionmethodology.py` is my implementation of a benchrunner-runner that uses the criterion methodology. We take as input some program which takes `iters` as a command-line argument, times a function of interest in a tight loop which repeats `iters` many times, and then prints to stdout the batchtime (total loop time) and selftimed (total loop time divided by iters). The essense of criterion is then to sweep `iters` and perform a linear regression against iters and batchtime. The slope is the mean and the y-intercept represents some notion of shared overhead, insensitive to `iters`. Ultimately, criterion serves as a way to benchmark tasks with very short execution times, as startup overhead can be ignored. + +Since we have relatively precise timing over loops, I also implemented the criterion methodolgy *geometrically*. I take the logarithm of all the x and y values, compute the linear regression over that, then exponentiate the y-intercept - this represents the geomean. The other dependent portion, which is the slope, becomes a power (the equation is y = e^b x^m), which represents *geometric overhead*, e.g. how much overhead is being added per iteration. This may do well to model any slowdowns arising from pre-allocating arrays. Additionally, since performance data is non-negative and judged multiplicatively (twice as good means numbers are half, twice has bad means numbers are doubled; these are all *factors*), the geomean and geo-standard-deviation may make more sense theoretically. However, from my testing, the geomean seams to vary wildly for programs with fleeting execution times, even between repeat runs with the same parameters. + +The scripts `criterionmethodology.py` and `sweep_seq.py` can both be ran directly. The first takes command-line arguments, e.g. `criterionmethodology benchrunner Quicksort Seq 2000` will call `benchrunner iters Quicksort Seq 2000` for various `iters`. `sweep_seq` performs a logarithmic sweep over different array sizes, invoking the criterion methdology at each point. diff --git a/benchmarks/scripts/criterion-drop-in-replacement/sweep_seq.py b/benchmarks/scripts/criterion-drop-in-replacement/sweep_seq.py new file mode 100644 index 0000000..b5affe8 --- /dev/null +++ b/benchmarks/scripts/criterion-drop-in-replacement/sweep_seq.py @@ -0,0 +1,51 @@ +#!/usr/bin/env python3 +import os +import numpy as np +from criterionmethodology import converge +import sys + +# names = ["Optsort", "Insertionsort", "Mergesort", "Quicksort"] +# names = ["CopyArray", "Quicksort", "Insertionsort", "Mergesort"] +names = ["Insertionsort"] + +# DENSITY = 4 +DENSITY = 12 +def bounds(name): + match name: + case "Insertionsort": + lo = 3 # 2**n ... + hi = 16 + case "Quicksort": + lo = 3 + hi = 22 + case "Mergesort": + # lo = 12 + lo = 3 + hi = 24 + case "Cilksort": + # lo = 12 + lo = 3 + hi = 16#24 + case "Optsort": + lo = 3 + hi = 16#24 + case _: + lo = 3 + hi = 20 + return lo, hi, (hi-lo)*DENSITY+1 + +def dotrial(name, size): + return converge([sys.argv[0], "benchrunner", name, "Seq", str(int(size))]) + +if __name__ == "__main__": + for name in names: + lo, hi, pts = bounds(name) + with open("%s_out3.csv" % name, "w") as f: + f.write("# size\tmean\tstddev\tgeomean\tgeostdev\n") + for i in np.unique(np.logspace(lo, hi, pts, base=2).astype(int)): + with open("%s_out3.csv" % name, "a") as f: + try: + f.write("%d" % int(i) + "\t%f\t%f\t%f\t%f\n" % dotrial(name, i)) + except: + pass + diff --git a/benchmarks/scripts/c-sorting-benchmarks/readme b/benchmarks/scripts/old-criterion/c-sorting-benchmarks/readme similarity index 100% rename from benchmarks/scripts/c-sorting-benchmarks/readme rename to benchmarks/scripts/old-criterion/c-sorting-benchmarks/readme diff --git a/benchmarks/scripts/c-sorting-benchmarks/sort_insertion_out.csv b/benchmarks/scripts/old-criterion/c-sorting-benchmarks/sort_insertion_out.csv similarity index 100% rename from benchmarks/scripts/c-sorting-benchmarks/sort_insertion_out.csv rename to benchmarks/scripts/old-criterion/c-sorting-benchmarks/sort_insertion_out.csv diff --git a/benchmarks/scripts/c-sorting-benchmarks/sort_merge_seq_out.csv b/benchmarks/scripts/old-criterion/c-sorting-benchmarks/sort_merge_seq_out.csv similarity index 100% rename from benchmarks/scripts/c-sorting-benchmarks/sort_merge_seq_out.csv rename to benchmarks/scripts/old-criterion/c-sorting-benchmarks/sort_merge_seq_out.csv diff --git a/benchmarks/scripts/c-sorting-benchmarks/sort_quick_out.csv b/benchmarks/scripts/old-criterion/c-sorting-benchmarks/sort_quick_out.csv similarity index 100% rename from benchmarks/scripts/c-sorting-benchmarks/sort_quick_out.csv rename to benchmarks/scripts/old-criterion/c-sorting-benchmarks/sort_quick_out.csv diff --git a/benchmarks/scripts/plot.py b/benchmarks/scripts/old-criterion/plot.py similarity index 100% rename from benchmarks/scripts/plot.py rename to benchmarks/scripts/old-criterion/plot.py diff --git a/benchmarks/scripts/plot_relative_speedup.py b/benchmarks/scripts/old-criterion/plot_relative_speedup.py similarity index 100% rename from benchmarks/scripts/plot_relative_speedup.py rename to benchmarks/scripts/old-criterion/plot_relative_speedup.py diff --git a/benchmarks/scripts/readme b/benchmarks/scripts/old-criterion/readme similarity index 100% rename from benchmarks/scripts/readme rename to benchmarks/scripts/old-criterion/readme diff --git a/benchmarks/scripts/sweep_seq.py b/benchmarks/scripts/old-criterion/sweep_seq.py similarity index 100% rename from benchmarks/scripts/sweep_seq.py rename to benchmarks/scripts/old-criterion/sweep_seq.py From fa44d8086163fc9d55b2a31c2bf4dce68091b2e4 Mon Sep 17 00:00:00 2001 From: jazullo Date: Mon, 24 Mar 2025 20:54:42 +0000 Subject: [PATCH 4/7] Use alloc proposal in parallel mergesort Also, remove typeclass constraint on `free` since it is unnecessary and enables typechecking --- src/Array.hs | 2 +- src/DpsMergeSort4Par.hs | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Array.hs b/src/Array.hs index f758f30..945f611 100644 --- a/src/Array.hs +++ b/src/Array.hs @@ -99,7 +99,7 @@ makeArray = make #endif {-# INLINE free #-} -free :: HasPrim a => Array a -. () +free :: Array a -. () free = Unsafe.toLinear (\_ -> ()) {-# INLINE flattenCallback #-} diff --git a/src/DpsMergeSort4Par.hs b/src/DpsMergeSort4Par.hs index 862eccf..ed4a85b 100644 --- a/src/DpsMergeSort4Par.hs +++ b/src/DpsMergeSort4Par.hs @@ -107,8 +107,8 @@ msort' :: (Show a, HasPrimOrd a) => a -> A.Array a -. A.Array a msort' anyVal src = let !(Ur len, src') = A.size2 src - !(src'', _tmp) = msortInplace src' (A.make len anyVal) in - case A.free _tmp of !() -> src'' + !src'' = A.allocScratch len anyVal msortInplace src' in + src'' {-# INLINE msort' #-} -- finally, the top-level merge sort function From eafa2b115bf7093f27154a868422694cf536895c Mon Sep 17 00:00:00 2001 From: jazullo Date: Tue, 25 Mar 2025 06:01:42 +0000 Subject: [PATCH 5/7] Construct Cilksort with reasonable parameters and connect it to benchrunner --- benchrunner/Benchrunner.hs | 3 + benchrunner/Types.hs | 1 + lh-array-sort.cabal | 8 +- src/CilkSort.hs | 95 ++++++++++++++--------- src/QuickSortCilk.hs | 153 ++++++++++++++++++++----------------- 5 files changed, 148 insertions(+), 112 deletions(-) diff --git a/benchrunner/Benchrunner.hs b/benchrunner/Benchrunner.hs index adcccb0..df4a27d 100644 --- a/benchrunner/Benchrunner.hs +++ b/benchrunner/Benchrunner.hs @@ -19,6 +19,7 @@ import qualified Insertion as I import qualified QuickSort as Q import qualified DpsMergeSort4 as DMS import qualified DpsMergeSort4Par as DMSP +import qualified CilkSort as CSP import qualified PiecewiseFallbackSort as PFS import qualified PiecewiseFallbackSortPar as PFSP import qualified Microbench as MB @@ -41,6 +42,7 @@ getInput bench mb_size = case bench of Insertionsort -> ArrayIn <$> randArray (Proxy :: Proxy Int64) (mb 100) Quicksort -> ArrayIn <$> randArray (Proxy :: Proxy Int64) (mb 1000000) Mergesort -> ArrayIn <$> randArray (Proxy :: Proxy Int64) (mb 8000000) + Cilksort -> ArrayIn <$> randArray (Proxy :: Proxy Int64) (mb 8000000) Optsort -> ArrayIn <$> randArray (Proxy :: Proxy Int64) (mb 8000000) _ -> error "getInput: Unexpected Input!" where @@ -103,6 +105,7 @@ sortFn bench parorseq = case (bench,parorseq) of (Mergesort, Par) -> DMSP.msort (Optsort, Seq) -> PFS.pfsort (Optsort, Par) -> PFSP.pfsort + (Cilksort, Par) -> CSP.cilkSort oth -> error $ "sortFn: unknown configuration: " ++ show oth vectorSortFn :: SortAlgo -> ParOrSeq -> VecSort diff --git a/benchrunner/Types.hs b/benchrunner/Types.hs index 42e9df6..2a1798d 100644 --- a/benchrunner/Types.hs +++ b/benchrunner/Types.hs @@ -11,6 +11,7 @@ data SortAlgo = Insertionsort | Mergesort | Quicksort + | Cilksort | Optsort -- piecewise fallback deriving (Eq, Show, Read) diff --git a/lh-array-sort.cabal b/lh-array-sort.cabal index 050b416..62df0f1 100644 --- a/lh-array-sort.cabal +++ b/lh-array-sort.cabal @@ -67,13 +67,9 @@ library PiecewiseFallbackSort PiecewiseFallbackSortPar - -- JZ: Add Parallel Cilksort - -- Current Cilksort is entirely sequential + QuickSortCilk + CilkSort --- remove until ready: --- QuickSortNew --- the last not quite ready yet? --- CilkSort Linear.Common other-modules: Array.List diff --git a/src/CilkSort.hs b/src/CilkSort.hs index bfd96c2..ed8e6f1 100644 --- a/src/CilkSort.hs +++ b/src/CilkSort.hs @@ -6,24 +6,26 @@ module CilkSort where import qualified Language.Haskell.Liquid.Bag as B import Language.Haskell.Liquid.ProofCombinators hiding ((?)) import ProofCombinators -import Array +import Array as A import ArrayOperations -import DpsMerge +import DpsMergePar +import qualified DpsMergeSort4 as Seq import Properties.Equivalence import Properties.Order -import Insertion import QuickSortCilk +import Par +import Linear.Common #ifdef MUTABLE_ARRAYS import Array.Mutable as A +import Control.DeepSeq ( NFData(..) ) #else import Array.List as A #endif #define KILO 1024 -#define MERGESIZE (2*KILO) -#define QUICKSIZE (2*KILO) -#define INSERTIONSIZE 20 +#define QUICKSIZE (8*KILO) +#define SEQSIZE (8*KILO) -- DPS mergesort -- unfold twice, merge twice {-@ cilkSortInplace :: xs:Array a @@ -35,51 +37,70 @@ import Array.List as A left zs == left xs && right zs == right xs && left ts == left ys && right ts == right ys }> / [A.size xs] @-} -cilkSortInplace :: (Show a, Ord a) => A.Array a -> A.Array a -> (A.Array a, A.Array a) +#ifdef MUTABLE_ARRAYS +cilkSortInplace :: (Show a, HasPrimOrd a, NFData a) => +#else +cilkSortInplace :: (Show a, HasPrimOrd a) => +#endif + A.Array a -. A.Array a -. (A.Array a, A.Array a) cilkSortInplace src tmp = - let (len, src') = A.size2 src in - if len <= QUICKSIZE - then let src'' = quickSort src' - in (src'', tmp) + let !(Ur len, src') = A.size2 src in + if len <= SEQSIZE + then + if len <= QUICKSIZE + then let src'' = quickSort src' + in (src'', tmp) + else Seq.msortInplace src' tmp else - let (srcA, srcB) = splitMid src' - (tmpA, tmpB) = splitMid tmp - (src1, src2) = splitMid srcA - (src3, src4) = splitMid srcB - (tmp1, tmp2) = splitMid tmpA - (tmp3, tmp4) = splitMid tmpB - (src1', tmp1') = cilkSortInplace src1 tmp1 - (src2', tmp2') = cilkSortInplace src2 tmp2 - (src3', tmp3') = cilkSortInplace src3 tmp3 - (src4', tmp4') = cilkSortInplace src4 tmp4 + let !(srcA, srcB) = splitMid src' + !(tmpA, tmpB) = splitMid tmp + !(src1, src2) = splitMid srcA + !(src3, src4) = splitMid srcB + !(tmp1, tmp2) = splitMid tmpA + !(tmp3, tmp4) = splitMid tmpB + !(((src1', tmp1'), (src2', tmp2')), ((src3', tmp3'), (src4', tmp4'))) + = (.||||.) (cilkSortInplace src1 tmp1) (cilkSortInplace src2 tmp2) + (cilkSortInplace src3 tmp3) (cilkSortInplace src4 tmp4) tmpA' = A.append tmp1' tmp2' tmpB' = A.append tmp3' tmp4' - (srcA'', tmpA'') = merge src1' src2' tmpA' - (srcB'', tmpB'') = merge src3' src4' tmpB' + !((srcA'', tmpA''), (srcB'', tmpB'')) + = merge_par src1' src2' tmpA' .||. merge_par src3' src4' tmpB' +-- = tuple2 (merge_par src1' src2') tmpA' (merge_par src3' src4') tmpB' src'' = A.append srcA'' srcB'' - (tmp''', src''') = merge tmpA'' tmpB'' src'' - in (src''', tmp''') ? lem_toBag_splitMid src + !(tmp''', src''') = merge_par tmpA'' tmpB'' src'' + in (src''', tmp''') ? lem_toBag_splitMid src ? lem_toBag_splitMid tmp ? lem_toBag_splitMid srcA ? lem_toBag_splitMid srcB ? lem_toBag_splitMid tmpA ? lem_toBag_splitMid tmpB -{-@ cilkSort' :: { xs:(Array a) | A.size xs > 0 && left xs == 0 && right xs == size xs } - -> { y:a | y == A.get xs 0 } - -> { zs:(Array a) | toBag xs == toBag zs && isSorted' zs && - A.size xs == A.size zs && token xs == token zs } @-} -cilkSort' :: (Show a, Ord a) => A.Array a -> a -> A.Array a -cilkSort' src anyVal = - let (len, src') = A.size2 src - (src'', _tmp) = cilkSortInplace src' (A.make len anyVal) in - _tmp `seq` src'' +{-@ cilkSort' :: y:a + -> { xs:(Array a) | A.size xs > 0 && left xs == 0 && right xs == size xs && y == A.get xs 0 } + -> { zs:(Array a) | toBag xs == toBag zs && isSorted' zs && + A.size xs == A.size zs && token xs == token zs } @-} +#ifdef MUTABLE_ARRAYS +cilkSort' :: (Show a, HasPrimOrd a, NFData a) => +#else +cilkSort' :: (Show a, HasPrimOrd a) => +#endif + a -> A.Array a -. A.Array a +cilkSort' anyVal src = + let !(Ur len, src') = A.size2 src + !src'' = A.allocScratch len anyVal cilkSortInplace src' in + src'' +-- finally, the top-level merge sort function {-@ cilkSort :: { xs:(A.Array a) | left xs == 0 && right xs == size xs } -> { ys:_ | toBag xs == toBag ys && isSorted' ys && A.size xs == A.size ys && token xs == token ys } @-} -cilkSort :: (Show a, Ord a) => A.Array a -> A.Array a +#ifdef MUTABLE_ARRAYS +cilkSort :: (Show a, HasPrimOrd a, NFData a) => +#else +cilkSort :: (Show a, HasPrimOrd a) => +#endif + A.Array a -. A.Array a cilkSort src = - let (len, src') = A.size2 src in + let !(Ur len, src') = A.size2 src in if len == 0 then src' - else let (x0, src'') = A.get2 src' 0 in cilkSort' src'' x0 + else let !(Ur x0, src'') = A.get2 0 src' in cilkSort' x0 src'' diff --git a/src/QuickSortCilk.hs b/src/QuickSortCilk.hs index bffa939..b580d16 100644 --- a/src/QuickSortCilk.hs +++ b/src/QuickSortCilk.hs @@ -20,6 +20,7 @@ import Properties.RangeProperties import Insertion +import Linear.Common #ifdef MUTABLE_ARRAYS import qualified Unsafe.Linear as Unsafe import Array.Mutable as A @@ -41,27 +42,33 @@ import qualified Array as A {-@ quickSort :: xs:(Array a) -> { ys:(Array a) | isSorted' ys && A.size xs == A.size ys && toBag xs == toBag ys } @-} -- quickSort :: (Ord a, Show a) => Array a -> Array a -quickSort :: (HasPrimOrd a, Show a) => Array a -> Array a +quickSort :: (HasPrimOrd a, Show a) => Array a -. Array a quickSort xs = - let (n, xs1) = A.size2 xs in + let (Ur n, xs1) = A.size2 xs in if n == 0 then xs1 - else let (hd, xs2) = A.get2 xs1 0 - {-@ promise :: { tmp:(Array a) | size tmp == n } - -> { out:(Ur (Array a)) | size (unur out) == n && - toSlice (unur out) 0 n == toSlice xs2 0 n} @-} - promise tmp = Ur (A.copy xs2 0 tmp 0 n) - ? lem_copy_equal_slice xs2 0 tmp 0 n - {- @ cpy :: { ys:(Array a) | size ys == n && toSlice ys 0 n == toSlice xs2 0 n } @-} - Ur cpy = A.alloc n hd (Unsafe.toLinear promise) - in quickSortBtw (cpy ? lem_equal_slice_bag xs2 cpy 0 n) 0 n + else let (Ur hd, xs2) = A.get2 0 xs1 + tmp = makeArray n hd in + A.copy2 0 0 n xs2 tmp ? lem_copy_equal_slice xs2 0 tmp 0 n & \(xs2', cpy0) -> + (A.free(xs2'), cpy0) & \((), cpy) -> + quickSortBtw 0 n (cpy ? lem_equal_slice_bag xs2 cpy 0 n) +{-# INLINABLE quickSort #-} + +{-@ quickSort' :: xs:(Array a) -> { ys:(Array a) | isSorted' ys && A.size xs == A.size ys && + toBag xs == toBag ys } @-} +quickSort' :: (HasPrimOrd a, Show a) => Array a -. Array a +quickSort' xs = + let (Ur n, xs1) = A.size2 xs in + if n == 0 then xs1 + else quickSortBtw 0 n xs1 +{-# INLINABLE quickSort' #-} {-@ quickSortBtw :: xs:(Array a) -> { i:Int | 0 <= i } -> { j:Int | i <= j && j <= A.size xs } -> { ys:(Array a) | isSortedBtw ys i j && A.size xs == A.size ys && toSlice xs 0 i == toSlice ys 0 i && toSlice xs j (A.size xs) == toSlice ys j (A.size xs) && toBagBtw xs i j == toBagBtw ys i j } / [j - i] @-} -quickSortBtw :: HasPrimOrd a => Array a -> Int -> Int -> Array a -quickSortBtw xs i j = +quickSortBtw :: HasPrimOrd a => Int -> Int -> (Array a -. Array a) +quickSortBtw i j xs = if j - i < 2 then xs else if j - i <= INSERTIONSIZE then let (xs_l, xs_cr) = A.splitAt i xs @@ -101,9 +108,9 @@ quickSortBtw xs i j = ? lem_toSlice_slice xs i (A.size xs) (j-i) (A.size xs - i) === toSlice xs j (A.size xs) ) - else let (xs', i_piv) = shuffleBtw xs i j -- isPartitionedBtw xs' i i_piv j - xs'' = quickSortBtw xs' i i_piv - xs''' = quickSortBtw xs'' (i_piv + 1) j + else let (xs', Ur i_piv) = shuffleBtw i j xs + xs'' = quickSortBtw i i_piv xs' + xs''' = quickSortBtw (i_piv + 1) j xs'' ? lem_qs_pres_partition_left xs' xs'' i i_piv j in xs''' ? lem_sorted_partitions xs''' i i_piv (j ? lem_equal_slice_sorted xs'' xs''' 0 i i_piv (i_piv+1) @@ -116,66 +123,74 @@ quickSortBtw xs i j = ? lem_equal_slice_bag xs'' xs''' i (i_piv+1) ? lem_toBagBtw_compose xs' xs'' i i_piv j ? lem_toBagBtw_compose xs'' xs''' i (i_piv+1) j +{-# INLINE quickSortBtw #-} -{-@ shuffleBtw :: xs:(Array a) -> { i:Int | 0 <= i } -> { j:Int | i + 1 < j && j <= A.size xs } - -> (Array a, Int)<{\ys ip -> isPartitionedBtw ys i ip j && +{-@ shuffleBtw :: { i:Int | 0 <= i } -> j:Int -> { xs:(Array a) | i + 1 < j && j <= A.size xs } + -> (Array a, Ur Int)<{\ys ip -> isPartitionedBtw ys i (unur ip) j && toSlice xs 0 i == toSlice ys 0 i && toSlice xs j (A.size xs) == toSlice ys j (A.size xs) && toBagBtw xs i j == toBagBtw ys i j && - i <= ip && ip < j && A.size ys == A.size xs }> @-} -shuffleBtw :: HasPrimOrd a => Array a -> Int -> Int -> (Array a, Int) -shuffleBtw xs i j = + i <= unur ip && unur ip < j && A.size ys == A.size xs }> @-} + +shuffleBtw :: forall a. HasPrimOrd a => Int -> Int -> (Array a -. (Array a, Ur Int)) +shuffleBtw i j xs = xs & A.get2 (j-1) {- fix (j-1)^th element as the pivot -} & \(Ur piv, xs1) -> let - (piv, xs1) = A.get2 xs (j-1) -- fix xs[j-1] as the pivot - {-@ goShuffle :: { zs:(Array a) | get zs (j-1) == piv && A.size zs == A.size xs && - toBagBtw zs i j == toBagBtw xs i j && - toSlice xs 0 i == toSlice zs 0 i && - toSlice xs j (A.size zs) == toSlice zs j (A.size zs)} - -> { jl:Int | i <= jl && rangeUpperBound zs i jl piv } - -> { jr:Int | jl <= jr+1 && jr < j-1 && rangeLowerBound zs (jr+1) (j-1) piv } - -> (Array a, Int)<{\ws ip -> rangeUpperBound ws i ip piv && - rangeLowerBound ws ip (j-1) piv && - A.size ws == A.size zs && - get ws (j-1) == get zs (j-1) && - toBagBtw zs i j == toBagBtw ws i j && - toBagBtw xs i j == toBagBtw ws i j && - toSlice zs 0 i == toSlice ws 0 i && - toSlice zs j (A.size zs) == toSlice ws j (A.size zs) && - i <= ip && ip < j }> / [jr - jl + 1] @-} - -- at return, all of ws[i:ip] <= ws[j-1] and all of ws[ip:j-1] > ws[j-1]. - goShuffle zs jl jr = -- BOTH bounds inclusive here - if jl > jr - then (zs, jl) - else let (vl, zs') = A.get2 zs jl in - if vl <= piv - then goShuffle zs' (jl+1 ? lem_rangeProperty_build_right zs (belowPivot (get zs (j-1))) - i (jl ? toProof (belowPivot (get zs (j-1)) (get zs jl)))) - jr - else let (vr, zs'') = A.get2 zs' jr in - if vr > piv - then goShuffle zs'' jl (jr-1) - else let zs''' = swap zs'' jl jr - ? lem_range_outside_swap zs i jl jr (j-1) piv - ? lma_swap zs jl jr - ? lma_swap_eql zs jl jr (j-1) - ? lem_bagBtw_swap zs i jl jr j - ? lem_toSlice_swap zs i jl jr j - in goShuffle zs''' jl (jr-1) + {-@ goShuffle :: + { jl:Int | i <= jl } + -> { jr:Int | jl <= jr+1 } + -> { zs:(Array a) | get zs (j-1) == piv && A.size zs == A.size xs && + toBagBtw zs i j == toBagBtw xs i j && + toSlice xs 0 i == toSlice zs 0 i && + toSlice xs j (A.size zs) == toSlice zs j (A.size zs) - (xs', ip) = goShuffle xs1 i (j-2) -- BOTH bounds inclusive - {- @ xs'' :: { vs:(Array a) | isPartitionedBtw vs i ip j && - toSlice xs' 0 i == toSlice vs 0 i && - toSlice xs' j (A.size xs') == toSlice vs j (A.size xs') && - A.size xs' == A.size vs && - toBagBtw xs i j == toBagBtw vs i j } @-} - xs'' = if ip < j-1 - then swap2 xs' ip (j-1) ? lma_swap xs' ip (j-1) - ? lem_bagBtw_swap xs' i ip (j-1) j - ? lem_range_inside_swap xs' ip (j-1) - ? lem_range_outside_swap xs' i ip (j-1) j (get xs' (j-1)) - ? lem_toSlice_swap xs' i ip (j-1) j + && rangeUpperBound zs i jl piv + && jr < j-1 && rangeLowerBound zs (jr+1) (j-1) piv } + -> (Array a, Ur Int)<{\ws ip -> rangeUpperBound ws i (unur ip) piv && + rangeLowerBound ws (unur ip) (j-1) piv && + A.size ws == A.size zs && + get ws (j-1) == get zs (j-1) && + toBagBtw zs i j == toBagBtw ws i j && + toBagBtw xs i j == toBagBtw ws i j && + toSlice zs 0 i == toSlice ws 0 i && + toSlice zs j (A.size zs) == toSlice ws j (A.size zs) && + i <= unur ip && unur ip < j }> / [jr - jl + 1] @-} + -- at return, all of ws[i:ip] <= ws[j-1] and all of ws[ip:j-1] > ws[j-1]. + goShuffle :: HasPrimOrd a => Int -> Int -> (Array a -. (Array a, Ur Int)) + goShuffle jl jr zs = -- BOTH bounds inclusive here + if jl > jr + then (zs, Ur jl) + else A.get2 jl zs & \(Ur vl, zs') -> + if vl <= piv + then goShuffle (jl+1 ? lem_rangeProperty_build_right zs (belowPivot (get zs (j-1))) + i (jl ? toProof (belowPivot (get zs (j-1)) (get zs jl)))) + jr zs' + else A.get2 jr zs' & \(Ur vr, zs'') -> + if vr > piv + then goShuffle jl (jr-1) zs'' + else let zs''' = swap2 jl jr zs'' + ? lem_range_outside_swap zs i jl jr (j-1) piv + ? lma_swap zs jl jr + ? lma_swap_eql zs jl jr (j-1) + ? lem_bagBtw_swap zs i jl jr j + ? lem_toSlice_swap zs i jl jr j + in goShuffle jl (jr-1) zs''' in + + goShuffle i (j-2) xs1 & \(xs', Ur ip) -> -- BOTH bounds inclusive + let + {- @ xs'' :: { vs:(Array a) | isPartitionedBtw vs i ip j && + toSlice xs' 0 i == toSlice vs 0 i && + toSlice xs' j (A.size xs') == toSlice vs j (A.size xs') && + A.size xs' == A.size vs && + toBagBtw xs i j == toBagBtw vs i j } @-} + xs'' = if ip < j-1 + then swap2 ip (j-1) xs' ? lma_swap xs' ip (j-1) + ? lem_bagBtw_swap xs' i ip (j-1) j + ? lem_range_inside_swap xs' ip (j-1) + ? lem_range_outside_swap xs' i ip (j-1) j (get xs' (j-1)) + ? lem_toSlice_swap xs' i ip (j-1) j else xs' - in (xs'', ip) + in (xs'', Ur ip) +{-# INLINE shuffleBtw #-} -- where -- | This belongs in Equivalence.hs but causes a Fixpoint panic if it's there From f49363b37004da03665afde83434351c7af1f2d7 Mon Sep 17 00:00:00 2001 From: Artem Pelenitsyn Date: Mon, 7 Apr 2025 10:43:29 -0400 Subject: [PATCH 6/7] add simple Liquid annotation for `flattenCallback`, which makes Liquid crash --- src/Array.hs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/Array.hs b/src/Array.hs index 945f611..586fc1e 100644 --- a/src/Array.hs +++ b/src/Array.hs @@ -103,6 +103,8 @@ free :: Array a -. () free = Unsafe.toLinear (\_ -> ()) {-# INLINE flattenCallback #-} +{-@ flattenCallback :: f:_ -> xs:_ -> + ret:{ ys:(Array a) } @-} flattenCallback :: (forall c. (Array b -. Ur c) -. Array a -. Ur c) -. Array a -. Array b flattenCallback f arr = unur (f ur arr) From 04758eafc857c42eac14614507a27a943eba7fa1 Mon Sep 17 00:00:00 2001 From: Michael Borkowski Date: Tue, 23 Sep 2025 10:42:20 -0400 Subject: [PATCH 7/7] fix cilksort segfault/slowness --- src/CilkSort.hs | 83 +++++++++++++++++++++++++++++++------------------ 1 file changed, 52 insertions(+), 31 deletions(-) diff --git a/src/CilkSort.hs b/src/CilkSort.hs index ed8e6f1..f4a3c8f 100644 --- a/src/CilkSort.hs +++ b/src/CilkSort.hs @@ -43,37 +43,56 @@ cilkSortInplace :: (Show a, HasPrimOrd a, NFData a) => cilkSortInplace :: (Show a, HasPrimOrd a) => #endif A.Array a -. A.Array a -. (A.Array a, A.Array a) -cilkSortInplace src tmp = - let !(Ur len, src') = A.size2 src in - if len <= SEQSIZE - then - if len <= QUICKSIZE - then let src'' = quickSort src' - in (src'', tmp) - else Seq.msortInplace src' tmp - else - let !(srcA, srcB) = splitMid src' - !(tmpA, tmpB) = splitMid tmp - !(src1, src2) = splitMid srcA - !(src3, src4) = splitMid srcB - !(tmp1, tmp2) = splitMid tmpA - !(tmp3, tmp4) = splitMid tmpB - !(((src1', tmp1'), (src2', tmp2')), ((src3', tmp3'), (src4', tmp4'))) - = (.||||.) (cilkSortInplace src1 tmp1) (cilkSortInplace src2 tmp2) - (cilkSortInplace src3 tmp3) (cilkSortInplace src4 tmp4) - tmpA' = A.append tmp1' tmp2' - tmpB' = A.append tmp3' tmp4' - !((srcA'', tmpA''), (srcB'', tmpB'')) - = merge_par src1' src2' tmpA' .||. merge_par src3' src4' tmpB' --- = tuple2 (merge_par src1' src2') tmpA' (merge_par src3' src4') tmpB' - src'' = A.append srcA'' srcB'' - !(tmp''', src''') = merge_par tmpA'' tmpB'' src'' - in (src''', tmp''') ? lem_toBag_splitMid src - ? lem_toBag_splitMid tmp - ? lem_toBag_splitMid srcA - ? lem_toBag_splitMid srcB - ? lem_toBag_splitMid tmpA - ? lem_toBag_splitMid tmpB +cilkSortInplace src tmp = go src tmp where + {-@ go ::xs:Array a + -> { ys:(Array a ) | A.size ys == A.size xs && left xs == left ys && + right xs == right ys } + -> (Array a, Array a)<{\zs ts -> toBag xs == toBag zs && isSorted' zs && + token xs == token zs && token ys == token ts && + A.size xs == A.size zs && A.size ys == A.size ts && + left zs == left xs && right zs == right xs && + left ts == left ys && right ts == right ys }> + / [A.size xs] @-} +#ifdef MUTABLE_ARRAYS + go :: (Show a, HasPrimOrd a, NFData a) => +#else + go :: (Show a, HasPrimOrd a) => +#endif + A.Array a -. A.Array a -. (A.Array a, A.Array a) + go src tmp = + let !(Ur len, src') = A.size2 src in + if len <= SEQSIZE + then + if len <= QUICKSIZE + then let src'' = quickSort' src' + in (src'', tmp) + else Seq.msortInplace src' tmp + else + let !(srcA, srcB) = splitMid src' + !(tmpA, tmpB) = splitMid tmp + !(src1, src2) = splitMid srcA + !(src3, src4) = splitMid srcB + !(tmp1, tmp2) = splitMid tmpA + !(tmp3, tmp4) = splitMid tmpB + !(((src1', tmp1'), (src2', tmp2')), ((src3', tmp3'), (src4', tmp4'))) + = (go src1 tmp1 .||. go src2 tmp2) .||. + (go src3 tmp3 .||. go src4 tmp4) + -- = (.||||.) (go src1 tmp1) (go src2 tmp2) + -- (go src3 tmp3) (go src4 tmp4) + tmpA' = A.append tmp1' tmp2' + tmpB' = A.append tmp3' tmp4' + !((srcA'', tmpA''), (srcB'', tmpB'')) + = merge_par src1' src2' tmpA' .||. merge_par src3' src4' tmpB' + -- = tuple2 (merge_par src1' src2') tmpA' (merge_par src3' src4') tmpB' + src'' = A.append srcA'' srcB'' + !(tmp''', src''') = merge_par tmpA'' tmpB'' src'' + in (src''', tmp''') ? lem_toBag_splitMid src + ? lem_toBag_splitMid tmp + ? lem_toBag_splitMid srcA + ? lem_toBag_splitMid srcB + ? lem_toBag_splitMid tmpA + ? lem_toBag_splitMid tmpB +{-# INLINE cilkSortInplace #-} {-@ cilkSort' :: y:a -> { xs:(Array a) | A.size xs > 0 && left xs == 0 && right xs == size xs && y == A.get xs 0 } @@ -89,6 +108,7 @@ cilkSort' anyVal src = let !(Ur len, src') = A.size2 src !src'' = A.allocScratch len anyVal cilkSortInplace src' in src'' +{-# INLINE cilkSort' #-} -- finally, the top-level merge sort function {-@ cilkSort :: { xs:(A.Array a) | left xs == 0 && right xs == size xs } @@ -104,3 +124,4 @@ cilkSort src = let !(Ur len, src') = A.size2 src in if len == 0 then src' else let !(Ur x0, src'') = A.get2 0 src' in cilkSort' x0 src'' +{-# INLINABLE cilkSort #-} \ No newline at end of file