Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
#!/usr/bin/env python3
import numpy as np
from sys import argv
import subprocess
from time import time
import math

from matplotlib import pyplot as plt

MAKE_PLOT = False

def linear_regression_with_std(x, y):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd expect Python to have something like this in one of the libraries (should be easy to Google).

x = np.array(x)
y = np.array(y)
x_mean = np.mean(x)
y_mean = np.mean(y)
numerator = np.sum((x - x_mean) * (y - y_mean))
denominator = np.sum((x - x_mean) ** 2)
slope = numerator / denominator
intercept = y_mean - slope * x_mean
y_pred = slope * x + intercept
residuals = y - y_pred
std_dev = np.std(residuals)
return slope, intercept, std_dev

def do_bench(cliargs, iters):
print([cliargs[1], str(iters)] + cliargs[2:])
out = str(subprocess.check_output([cliargs[1], str(iters)] + cliargs[2:]))
s1 = out[out.find("SELFTIMED")+11:]
s2 = float(s1[:s1.find("\n")-4])
selftimed = s2

b1 = out[out.find("BATCHTIME")+11:]
b2 = float(b1[:b1.find("SELFTIMED")-2])
batchtime = b2

print(f"ITERS: {iters}, BATCHTIME: {batchtime}, SELFTIMED: {selftimed}")
return batchtime

def converge(cliargs):
xs = []
ys = []
iters = 1
t = time()
while len(xs) == 0:
st = do_bench(cliargs, iters)
if st * iters < 0.65:
iters *= 2
continue
xs.append(iters)
ys.append(st)
for _ in range(2):
if time() - t < 3.5:
iters = int(math.trunc(float(iters) * 1.2) + 1)
else:
iters += 1 + iters // 20
st = do_bench(cliargs, iters)
xs.append(iters)
ys.append(st)
while time() - t < 3.5:
if time() - t < 3.5:
iters = int(math.trunc(float(iters) * 1.2) + 1)
else:
iters += 1 + iters // 20
st = do_bench(cliargs, iters)
xs.append(iters)
ys.append(st)
m, b, sigma = linear_regression_with_std(xs, ys)
print(f"Slope (Mean): {m}, Intercept (Overhead): {b}, Stdev: {sigma}")
p, lnc, lngsd = linear_regression_with_std([math.log(x) for x in xs], [math.log(y) for y in ys])
c, gsd = math.exp(lnc), math.exp(lngsd)
print(f"Power (Distortion): {p}, Factor (Geomean) {c}, GeoStdev {gsd}")
if MAKE_PLOT:
plt.plot(xs, ys, 'rx')
plt.plot([xs[0], xs[-1]], [m*xs[0]+b, m*xs[-1]+b], color="blue")
plt.plot(xs, [c*x**p for x in xs], color="green")
plt.savefig("plot.png")
return m, sigma, c, gsd

if __name__ == "__main__":
print(converge(argv))
5 changes: 5 additions & 0 deletions benchmarks/scripts/criterion-drop-in-replacement/readme
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
The script `criterionmethodology.py` is my implementation of a benchrunner-runner that uses the criterion methodology. We take as input some program which takes `iters` as a command-line argument, times a function of interest in a tight loop which repeats `iters` many times, and then prints to stdout the batchtime (total loop time) and selftimed (total loop time divided by iters). The essense of criterion is then to sweep `iters` and perform a linear regression against iters and batchtime. The slope is the mean and the y-intercept represents some notion of shared overhead, insensitive to `iters`. Ultimately, criterion serves as a way to benchmark tasks with very short execution times, as startup overhead can be ignored.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's very text-heavy. What would help with it is examples of how you run it and what outputs you expect.


Since we have relatively precise timing over loops, I also implemented the criterion methodolgy *geometrically*. I take the logarithm of all the x and y values, compute the linear regression over that, then exponentiate the y-intercept - this represents the geomean. The other dependent portion, which is the slope, becomes a power (the equation is y = e^b x^m), which represents *geometric overhead*, e.g. how much overhead is being added per iteration. This may do well to model any slowdowns arising from pre-allocating arrays. Additionally, since performance data is non-negative and judged multiplicatively (twice as good means numbers are half, twice has bad means numbers are doubled; these are all *factors*), the geomean and geo-standard-deviation may make more sense theoretically. However, from my testing, the geomean seams to vary wildly for programs with fleeting execution times, even between repeat runs with the same parameters.

The scripts `criterionmethodology.py` and `sweep_seq.py` can both be ran directly. The first takes command-line arguments, e.g. `criterionmethodology benchrunner Quicksort Seq 2000` will call `benchrunner iters Quicksort Seq 2000` for various `iters`. `sweep_seq` performs a logarithmic sweep over different array sizes, invoking the criterion methdology at each point.
51 changes: 51 additions & 0 deletions benchmarks/scripts/criterion-drop-in-replacement/sweep_seq.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
#!/usr/bin/env python3
import os
import numpy as np
from criterionmethodology import converge
import sys

# names = ["Optsort", "Insertionsort", "Mergesort", "Quicksort"]
# names = ["CopyArray", "Quicksort", "Insertionsort", "Mergesort"]
names = ["Insertionsort"]

# DENSITY = 4
DENSITY = 12
def bounds(name):
match name:
case "Insertionsort":
lo = 3 # 2**n ...
hi = 16
case "Quicksort":
lo = 3
hi = 22
case "Mergesort":
# lo = 12
lo = 3
hi = 24
case "Cilksort":
# lo = 12
lo = 3
hi = 16#24
case "Optsort":
lo = 3
hi = 16#24
case _:
lo = 3
hi = 20
return lo, hi, (hi-lo)*DENSITY+1

def dotrial(name, size):
return converge([sys.argv[0], "benchrunner", name, "Seq", str(int(size))])

if __name__ == "__main__":
for name in names:
lo, hi, pts = bounds(name)
with open("%s_out3.csv" % name, "w") as f:
f.write("# size\tmean\tstddev\tgeomean\tgeostdev\n")
for i in np.unique(np.logspace(lo, hi, pts, base=2).astype(int)):
with open("%s_out3.csv" % name, "a") as f:
try:
f.write("%d" % int(i) + "\t%f\t%f\t%f\t%f\n" % dotrial(name, i))
except:
pass

File renamed without changes.
File renamed without changes.
3 changes: 3 additions & 0 deletions benchrunner/Benchrunner.hs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import qualified Insertion as I
import qualified QuickSort as Q
import qualified DpsMergeSort4 as DMS
import qualified DpsMergeSort4Par as DMSP
import qualified CilkSort as CSP
import qualified PiecewiseFallbackSort as PFS
import qualified PiecewiseFallbackSortPar as PFSP
import qualified Microbench as MB
Expand All @@ -41,6 +42,7 @@ getInput bench mb_size = case bench of
Insertionsort -> ArrayIn <$> randArray (Proxy :: Proxy Int64) (mb 100)
Quicksort -> ArrayIn <$> randArray (Proxy :: Proxy Int64) (mb 1000000)
Mergesort -> ArrayIn <$> randArray (Proxy :: Proxy Int64) (mb 8000000)
Cilksort -> ArrayIn <$> randArray (Proxy :: Proxy Int64) (mb 8000000)
Optsort -> ArrayIn <$> randArray (Proxy :: Proxy Int64) (mb 8000000)
_ -> error "getInput: Unexpected Input!"
where
Expand Down Expand Up @@ -103,6 +105,7 @@ sortFn bench parorseq = case (bench,parorseq) of
(Mergesort, Par) -> DMSP.msort
(Optsort, Seq) -> PFS.pfsort
(Optsort, Par) -> PFSP.pfsort
(Cilksort, Par) -> CSP.cilkSort
oth -> error $ "sortFn: unknown configuration: " ++ show oth

vectorSortFn :: SortAlgo -> ParOrSeq -> VecSort
Expand Down
1 change: 1 addition & 0 deletions benchrunner/Types.hs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ data SortAlgo
= Insertionsort
| Mergesort
| Quicksort
| Cilksort
| Optsort -- piecewise fallback
deriving (Eq, Show, Read)

Expand Down
8 changes: 2 additions & 6 deletions lh-array-sort.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -67,13 +67,9 @@ library
PiecewiseFallbackSort
PiecewiseFallbackSortPar

-- JZ: Add Parallel Cilksort
-- Current Cilksort is entirely sequential
QuickSortCilk
CilkSort

-- remove until ready:
-- QuickSortNew
-- the last not quite ready yet?
-- CilkSort
Linear.Common
other-modules:
Array.List
Expand Down
44 changes: 43 additions & 1 deletion src/Array.hs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
{-# LANGUAGE CPP #-}
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE DeriveFunctor #-}
{-# LANGUAGE LiberalTypeSynonyms #-}

-- {-# LANGUAGE Strict #-}

Expand All @@ -15,6 +16,9 @@ module Array

-- * Construction and querying
, alloc, make, generate, generate_par, generate_par_m, makeArray
, flattenCallback, makeCallback, biJoinAllocAffine, allocScratchAffine
, biJoinAlloc, allocScratch

, copy, copy_par, copy_par_m
, size, get, set, slice, append
, splitAt
Expand Down Expand Up @@ -95,9 +99,47 @@ makeArray = make
#endif

{-# INLINE free #-}
free :: HasPrim a => Array a -. ()
free :: Array a -. ()
free = Unsafe.toLinear (\_ -> ())

{-# INLINE flattenCallback #-}
flattenCallback :: (forall c. (Array b -. Ur c) -. Array a -. Ur c) -. Array a -. Array b
flattenCallback f arr = unur (f ur arr)

{-# INLINE makeCallback #-}
makeCallback :: (Array b -. Array a) -. (Array a -. Ur c) -. Array b -. Ur c
makeCallback direct k arr = k (direct arr)

{-# INLINE biJoinAllocAffine #-}
biJoinAllocAffine :: HasPrim tmps => Int -> tmps -> (Array tmps -. Array srcs -. Array dsts) -> Array srcs -. Array dsts
biJoinAllocAffine i a f = flattenCallback (\cont src -> alloc i a (\tmp -> makeCallback (f tmp) cont src))

-- efficient implementation of above
{-# INLINE allocScratchAffine #-}
allocScratchAffine :: HasPrim tmps => Int -> tmps -> (Array srcs -. Array tmps -. Array dsts) -> Array srcs -. Array dsts
allocScratchAffine i a f arr = f arr (makeArray i a)

{-# INLINE biJoinAlloc #-}
biJoinAlloc :: HasPrim tmps => Int -> tmps -> (Array tmps -. Array srcs -. (Array dsts, Array tmpdsts)) -> Array srcs -. Array dsts
biJoinAlloc i a f =
let
g tmp src =
let
!(dst, tmp') = f tmp src
in
case free tmp' of !() -> dst
in
flattenCallback (\cont src -> alloc i a (\tmp -> makeCallback (g tmp) cont src))

-- efficient implementation of above
{-# INLINE allocScratch #-}
allocScratch :: HasPrim tmps => Int -> tmps -> (Array srcs -. Array tmps -. (Array dsts, Array tmpdsts)) -> Array srcs -. Array dsts
allocScratch i a f arr =
let
!(dst, tmp) = f arr (makeArray i a)
in case free tmp of !() -> dst


--------------------------------------------------------------------------------
-- Parallel operations
--------------------------------------------------------------------------------
Expand Down
95 changes: 58 additions & 37 deletions src/CilkSort.hs
Original file line number Diff line number Diff line change
Expand Up @@ -6,24 +6,26 @@ module CilkSort where
import qualified Language.Haskell.Liquid.Bag as B
import Language.Haskell.Liquid.ProofCombinators hiding ((?))
import ProofCombinators
import Array
import Array as A
import ArrayOperations
import DpsMerge
import DpsMergePar
import qualified DpsMergeSort4 as Seq
import Properties.Equivalence
import Properties.Order
import Insertion
import QuickSortCilk
import Par

import Linear.Common
#ifdef MUTABLE_ARRAYS
import Array.Mutable as A
import Control.DeepSeq ( NFData(..) )
#else
import Array.List as A
#endif

#define KILO 1024
#define MERGESIZE (2*KILO)
#define QUICKSIZE (2*KILO)
#define INSERTIONSIZE 20
#define QUICKSIZE (8*KILO)
#define SEQSIZE (8*KILO)

-- DPS mergesort -- unfold twice, merge twice
{-@ cilkSortInplace :: xs:Array a
Expand All @@ -35,51 +37,70 @@ import Array.List as A
left zs == left xs && right zs == right xs &&
left ts == left ys && right ts == right ys }>
/ [A.size xs] @-}
cilkSortInplace :: (Show a, Ord a) => A.Array a -> A.Array a -> (A.Array a, A.Array a)
#ifdef MUTABLE_ARRAYS
cilkSortInplace :: (Show a, HasPrimOrd a, NFData a) =>
#else
cilkSortInplace :: (Show a, HasPrimOrd a) =>
#endif
A.Array a -. A.Array a -. (A.Array a, A.Array a)
cilkSortInplace src tmp =
let (len, src') = A.size2 src in
if len <= QUICKSIZE
then let src'' = quickSort src'
in (src'', tmp)
let !(Ur len, src') = A.size2 src in
if len <= SEQSIZE
then
if len <= QUICKSIZE
then let src'' = quickSort src'
in (src'', tmp)
else Seq.msortInplace src' tmp
else
let (srcA, srcB) = splitMid src'
(tmpA, tmpB) = splitMid tmp
(src1, src2) = splitMid srcA
(src3, src4) = splitMid srcB
(tmp1, tmp2) = splitMid tmpA
(tmp3, tmp4) = splitMid tmpB
(src1', tmp1') = cilkSortInplace src1 tmp1
(src2', tmp2') = cilkSortInplace src2 tmp2
(src3', tmp3') = cilkSortInplace src3 tmp3
(src4', tmp4') = cilkSortInplace src4 tmp4
let !(srcA, srcB) = splitMid src'
!(tmpA, tmpB) = splitMid tmp
!(src1, src2) = splitMid srcA
!(src3, src4) = splitMid srcB
!(tmp1, tmp2) = splitMid tmpA
!(tmp3, tmp4) = splitMid tmpB
!(((src1', tmp1'), (src2', tmp2')), ((src3', tmp3'), (src4', tmp4')))
= (.||||.) (cilkSortInplace src1 tmp1) (cilkSortInplace src2 tmp2)
(cilkSortInplace src3 tmp3) (cilkSortInplace src4 tmp4)
tmpA' = A.append tmp1' tmp2'
tmpB' = A.append tmp3' tmp4'
(srcA'', tmpA'') = merge src1' src2' tmpA'
(srcB'', tmpB'') = merge src3' src4' tmpB'
!((srcA'', tmpA''), (srcB'', tmpB''))
= merge_par src1' src2' tmpA' .||. merge_par src3' src4' tmpB'
-- = tuple2 (merge_par src1' src2') tmpA' (merge_par src3' src4') tmpB'
src'' = A.append srcA'' srcB''
(tmp''', src''') = merge tmpA'' tmpB'' src''
in (src''', tmp''') ? lem_toBag_splitMid src
!(tmp''', src''') = merge_par tmpA'' tmpB'' src''
in (src''', tmp''') ? lem_toBag_splitMid src
? lem_toBag_splitMid tmp
? lem_toBag_splitMid srcA
? lem_toBag_splitMid srcB
? lem_toBag_splitMid tmpA
? lem_toBag_splitMid tmpB

{-@ cilkSort' :: { xs:(Array a) | A.size xs > 0 && left xs == 0 && right xs == size xs }
-> { y:a | y == A.get xs 0 }
-> { zs:(Array a) | toBag xs == toBag zs && isSorted' zs &&
A.size xs == A.size zs && token xs == token zs } @-}
cilkSort' :: (Show a, Ord a) => A.Array a -> a -> A.Array a
cilkSort' src anyVal =
let (len, src') = A.size2 src
(src'', _tmp) = cilkSortInplace src' (A.make len anyVal) in
_tmp `seq` src''
{-@ cilkSort' :: y:a
-> { xs:(Array a) | A.size xs > 0 && left xs == 0 && right xs == size xs && y == A.get xs 0 }
-> { zs:(Array a) | toBag xs == toBag zs && isSorted' zs &&
A.size xs == A.size zs && token xs == token zs } @-}
#ifdef MUTABLE_ARRAYS
cilkSort' :: (Show a, HasPrimOrd a, NFData a) =>
#else
cilkSort' :: (Show a, HasPrimOrd a) =>
#endif
a -> A.Array a -. A.Array a
cilkSort' anyVal src =
let !(Ur len, src') = A.size2 src
!src'' = A.allocScratch len anyVal cilkSortInplace src' in
src''

-- finally, the top-level merge sort function
{-@ cilkSort :: { xs:(A.Array a) | left xs == 0 && right xs == size xs }
-> { ys:_ | toBag xs == toBag ys && isSorted' ys &&
A.size xs == A.size ys && token xs == token ys } @-}
cilkSort :: (Show a, Ord a) => A.Array a -> A.Array a
#ifdef MUTABLE_ARRAYS
cilkSort :: (Show a, HasPrimOrd a, NFData a) =>
#else
cilkSort :: (Show a, HasPrimOrd a) =>
#endif
A.Array a -. A.Array a
cilkSort src =
let (len, src') = A.size2 src in
let !(Ur len, src') = A.size2 src in
if len == 0 then src'
else let (x0, src'') = A.get2 src' 0 in cilkSort' src'' x0
else let !(Ur x0, src'') = A.get2 0 src' in cilkSort' x0 src''
Loading
Loading