Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
#!/usr/bin/env python3
import numpy as np
from sys import argv
import subprocess
from time import time
import math

from matplotlib import pyplot as plt

MAKE_PLOT = False

def linear_regression_with_std(x, y):
x = np.array(x)
y = np.array(y)
x_mean = np.mean(x)
y_mean = np.mean(y)
numerator = np.sum((x - x_mean) * (y - y_mean))
denominator = np.sum((x - x_mean) ** 2)
slope = numerator / denominator
intercept = y_mean - slope * x_mean
y_pred = slope * x + intercept
residuals = y - y_pred
std_dev = np.std(residuals)
return slope, intercept, std_dev

def do_bench(cliargs, iters):
print([cliargs[1], str(iters)] + cliargs[2:])
out = str(subprocess.check_output([cliargs[1], str(iters)] + cliargs[2:]))
s1 = out[out.find("SELFTIMED")+11:]
s2 = float(s1[:s1.find("\n")-4])
selftimed = s2

b1 = out[out.find("BATCHTIME")+11:]
b2 = float(b1[:b1.find("SELFTIMED")-2])
batchtime = b2

print(f"ITERS: {iters}, BATCHTIME: {batchtime}, SELFTIMED: {selftimed}")
return batchtime

def converge(cliargs):
xs = []
ys = []
iters = 1
t = time()
while len(xs) == 0:
st = do_bench(cliargs, iters)
if st * iters < 0.65:
iters *= 2
continue
xs.append(iters)
ys.append(st)
for _ in range(2):
if time() - t < 3.5:
iters = int(math.trunc(float(iters) * 1.2) + 1)
else:
iters += 1 + iters // 20
st = do_bench(cliargs, iters)
xs.append(iters)
ys.append(st)
while time() - t < 3.5:
if time() - t < 3.5:
iters = int(math.trunc(float(iters) * 1.2) + 1)
else:
iters += 1 + iters // 20
st = do_bench(cliargs, iters)
xs.append(iters)
ys.append(st)
m, b, sigma = linear_regression_with_std(xs, ys)
print(f"Slope (Mean): {m}, Intercept (Overhead): {b}, Stdev: {sigma}")
p, lnc, lngsd = linear_regression_with_std([math.log(x) for x in xs], [math.log(y) for y in ys])
c, gsd = math.exp(lnc), math.exp(lngsd)
print(f"Power (Distortion): {p}, Factor (Geomean) {c}, GeoStdev {gsd}")
if MAKE_PLOT:
plt.plot(xs, ys, 'rx')
plt.plot([xs[0], xs[-1]], [m*xs[0]+b, m*xs[-1]+b], color="blue")
plt.plot(xs, [c*x**p for x in xs], color="green")
plt.savefig("plot.png")
return m, sigma, c, gsd

if __name__ == "__main__":
print(converge(argv))
5 changes: 5 additions & 0 deletions benchmarks/scripts/criterion-drop-in-replacement/readme
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
The script `criterionmethodology.py` is my implementation of a benchrunner-runner that uses the criterion methodology. We take as input some program which takes `iters` as a command-line argument, times a function of interest in a tight loop which repeats `iters` many times, and then prints to stdout the batchtime (total loop time) and selftimed (total loop time divided by iters). The essense of criterion is then to sweep `iters` and perform a linear regression against iters and batchtime. The slope is the mean and the y-intercept represents some notion of shared overhead, insensitive to `iters`. Ultimately, criterion serves as a way to benchmark tasks with very short execution times, as startup overhead can be ignored.

Since we have relatively precise timing over loops, I also implemented the criterion methodolgy *geometrically*. I take the logarithm of all the x and y values, compute the linear regression over that, then exponentiate the y-intercept - this represents the geomean. The other dependent portion, which is the slope, becomes a power (the equation is y = e^b x^m), which represents *geometric overhead*, e.g. how much overhead is being added per iteration. This may do well to model any slowdowns arising from pre-allocating arrays. Additionally, since performance data is non-negative and judged multiplicatively (twice as good means numbers are half, twice has bad means numbers are doubled; these are all *factors*), the geomean and geo-standard-deviation may make more sense theoretically. However, from my testing, the geomean seams to vary wildly for programs with fleeting execution times, even between repeat runs with the same parameters.

The scripts `criterionmethodology.py` and `sweep_seq.py` can both be ran directly. The first takes command-line arguments, e.g. `criterionmethodology benchrunner Quicksort Seq 2000` will call `benchrunner iters Quicksort Seq 2000` for various `iters`. `sweep_seq` performs a logarithmic sweep over different array sizes, invoking the criterion methdology at each point.
51 changes: 51 additions & 0 deletions benchmarks/scripts/criterion-drop-in-replacement/sweep_seq.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
#!/usr/bin/env python3
import os
import numpy as np
from criterionmethodology import converge
import sys

# names = ["Optsort", "Insertionsort", "Mergesort", "Quicksort"]
# names = ["CopyArray", "Quicksort", "Insertionsort", "Mergesort"]
names = ["Insertionsort"]

# DENSITY = 4
DENSITY = 12
def bounds(name):
match name:
case "Insertionsort":
lo = 3 # 2**n ...
hi = 16
case "Quicksort":
lo = 3
hi = 22
case "Mergesort":
# lo = 12
lo = 3
hi = 24
case "Cilksort":
# lo = 12
lo = 3
hi = 16#24
case "Optsort":
lo = 3
hi = 16#24
case _:
lo = 3
hi = 20
return lo, hi, (hi-lo)*DENSITY+1

def dotrial(name, size):
return converge([sys.argv[0], "benchrunner", name, "Seq", str(int(size))])

if __name__ == "__main__":
for name in names:
lo, hi, pts = bounds(name)
with open("%s_out3.csv" % name, "w") as f:
f.write("# size\tmean\tstddev\tgeomean\tgeostdev\n")
for i in np.unique(np.logspace(lo, hi, pts, base=2).astype(int)):
with open("%s_out3.csv" % name, "a") as f:
try:
f.write("%d" % int(i) + "\t%f\t%f\t%f\t%f\n" % dotrial(name, i))
except:
pass

File renamed without changes.
File renamed without changes.
3 changes: 3 additions & 0 deletions benchrunner/Benchrunner.hs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import qualified Insertion as I
import qualified QuickSort as Q
import qualified DpsMergeSort4 as DMS
import qualified DpsMergeSort4Par as DMSP
import qualified CilkSort as CSP
import qualified PiecewiseFallbackSort as PFS
import qualified PiecewiseFallbackSortPar as PFSP
import qualified Microbench as MB
Expand All @@ -41,6 +42,7 @@ getInput bench mb_size = case bench of
Insertionsort -> ArrayIn <$> randArray (Proxy :: Proxy Int64) (mb 100)
Quicksort -> ArrayIn <$> randArray (Proxy :: Proxy Int64) (mb 1000000)
Mergesort -> ArrayIn <$> randArray (Proxy :: Proxy Int64) (mb 8000000)
Cilksort -> ArrayIn <$> randArray (Proxy :: Proxy Int64) (mb 8000000)
Optsort -> ArrayIn <$> randArray (Proxy :: Proxy Int64) (mb 8000000)
_ -> error "getInput: Unexpected Input!"
where
Expand Down Expand Up @@ -103,6 +105,7 @@ sortFn bench parorseq = case (bench,parorseq) of
(Mergesort, Par) -> DMSP.msort
(Optsort, Seq) -> PFS.pfsort
(Optsort, Par) -> PFSP.pfsort
(Cilksort, Par) -> CSP.cilkSort
oth -> error $ "sortFn: unknown configuration: " ++ show oth

vectorSortFn :: SortAlgo -> ParOrSeq -> VecSort
Expand Down
1 change: 1 addition & 0 deletions benchrunner/Types.hs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ data SortAlgo
= Insertionsort
| Mergesort
| Quicksort
| Cilksort
| Optsort -- piecewise fallback
deriving (Eq, Show, Read)

Expand Down
8 changes: 2 additions & 6 deletions lh-array-sort.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -67,13 +67,9 @@ library
PiecewiseFallbackSort
PiecewiseFallbackSortPar

-- JZ: Add Parallel Cilksort
-- Current Cilksort is entirely sequential
QuickSortCilk
CilkSort

-- remove until ready:
-- QuickSortNew
-- the last not quite ready yet?
-- CilkSort
Linear.Common
other-modules:
Array.List
Expand Down
46 changes: 45 additions & 1 deletion src/Array.hs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
{-# LANGUAGE CPP #-}
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE DeriveFunctor #-}
{-# LANGUAGE LiberalTypeSynonyms #-}

-- {-# LANGUAGE Strict #-}

Expand All @@ -15,6 +16,9 @@ module Array

-- * Construction and querying
, alloc, make, generate, generate_par, generate_par_m, makeArray
, flattenCallback, makeCallback, biJoinAllocAffine, allocScratchAffine
, biJoinAlloc, allocScratch

, copy, copy_par, copy_par_m
, size, get, set, slice, append
, splitAt
Expand Down Expand Up @@ -95,9 +99,49 @@ makeArray = make
#endif

{-# INLINE free #-}
free :: HasPrim a => Array a -. ()
free :: Array a -. ()
free = Unsafe.toLinear (\_ -> ())

{-# INLINE flattenCallback #-}
{-@ flattenCallback :: f:_ -> xs:_ ->
ret:{ ys:(Array a) } @-}
flattenCallback :: (forall c. (Array b -. Ur c) -. Array a -. Ur c) -. Array a -. Array b
flattenCallback f arr = unur (f ur arr)

{-# INLINE makeCallback #-}
makeCallback :: (Array b -. Array a) -. (Array a -. Ur c) -. Array b -. Ur c
makeCallback direct k arr = k (direct arr)

{-# INLINE biJoinAllocAffine #-}
biJoinAllocAffine :: HasPrim tmps => Int -> tmps -> (Array tmps -. Array srcs -. Array dsts) -> Array srcs -. Array dsts
biJoinAllocAffine i a f = flattenCallback (\cont src -> alloc i a (\tmp -> makeCallback (f tmp) cont src))

-- efficient implementation of above
{-# INLINE allocScratchAffine #-}
allocScratchAffine :: HasPrim tmps => Int -> tmps -> (Array srcs -. Array tmps -. Array dsts) -> Array srcs -. Array dsts
allocScratchAffine i a f arr = f arr (makeArray i a)

{-# INLINE biJoinAlloc #-}
biJoinAlloc :: HasPrim tmps => Int -> tmps -> (Array tmps -. Array srcs -. (Array dsts, Array tmpdsts)) -> Array srcs -. Array dsts
biJoinAlloc i a f =
let
g tmp src =
let
!(dst, tmp') = f tmp src
in
case free tmp' of !() -> dst
in
flattenCallback (\cont src -> alloc i a (\tmp -> makeCallback (g tmp) cont src))

-- efficient implementation of above
{-# INLINE allocScratch #-}
allocScratch :: HasPrim tmps => Int -> tmps -> (Array srcs -. Array tmps -. (Array dsts, Array tmpdsts)) -> Array srcs -. Array dsts
allocScratch i a f arr =
let
!(dst, tmp) = f arr (makeArray i a)
in case free tmp of !() -> dst


--------------------------------------------------------------------------------
-- Parallel operations
--------------------------------------------------------------------------------
Expand Down
Loading
Loading