Skip to content

Commit 979bc29

Browse files
jazulloulysses4ever
authored andcommitted
Add new plotting scripts and an explainer
1 parent 337d733 commit 979bc29

File tree

11 files changed

+147
-0
lines changed

11 files changed

+147
-0
lines changed
Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
#!/usr/bin/env python3
2+
import numpy as np
3+
from sys import argv
4+
import subprocess
5+
from time import time
6+
import math
7+
8+
from matplotlib import pyplot as plt
9+
10+
MAKE_PLOT = False
11+
12+
def linear_regression_with_std(x, y):
13+
x = np.array(x)
14+
y = np.array(y)
15+
x_mean = np.mean(x)
16+
y_mean = np.mean(y)
17+
numerator = np.sum((x - x_mean) * (y - y_mean))
18+
denominator = np.sum((x - x_mean) ** 2)
19+
slope = numerator / denominator
20+
intercept = y_mean - slope * x_mean
21+
y_pred = slope * x + intercept
22+
residuals = y - y_pred
23+
std_dev = np.std(residuals)
24+
return slope, intercept, std_dev
25+
26+
def do_bench(cliargs, iters):
27+
print([cliargs[1], str(iters)] + cliargs[2:])
28+
out = str(subprocess.check_output([cliargs[1], str(iters)] + cliargs[2:]))
29+
s1 = out[out.find("SELFTIMED")+11:]
30+
s2 = float(s1[:s1.find("\n")-4])
31+
selftimed = s2
32+
33+
b1 = out[out.find("BATCHTIME")+11:]
34+
b2 = float(b1[:b1.find("SELFTIMED")-2])
35+
batchtime = b2
36+
37+
print(f"ITERS: {iters}, BATCHTIME: {batchtime}, SELFTIMED: {selftimed}")
38+
return batchtime
39+
40+
def converge(cliargs):
41+
xs = []
42+
ys = []
43+
iters = 1
44+
t = time()
45+
while len(xs) == 0:
46+
st = do_bench(cliargs, iters)
47+
if st * iters < 0.65:
48+
iters *= 2
49+
continue
50+
xs.append(iters)
51+
ys.append(st)
52+
for _ in range(2):
53+
if time() - t < 3.5:
54+
iters = int(math.trunc(float(iters) * 1.2) + 1)
55+
else:
56+
iters += 1 + iters // 20
57+
st = do_bench(cliargs, iters)
58+
xs.append(iters)
59+
ys.append(st)
60+
while time() - t < 3.5:
61+
if time() - t < 3.5:
62+
iters = int(math.trunc(float(iters) * 1.2) + 1)
63+
else:
64+
iters += 1 + iters // 20
65+
st = do_bench(cliargs, iters)
66+
xs.append(iters)
67+
ys.append(st)
68+
m, b, sigma = linear_regression_with_std(xs, ys)
69+
print(f"Slope (Mean): {m}, Intercept (Overhead): {b}, Stdev: {sigma}")
70+
p, lnc, lngsd = linear_regression_with_std([math.log(x) for x in xs], [math.log(y) for y in ys])
71+
c, gsd = math.exp(lnc), math.exp(lngsd)
72+
print(f"Power (Distortion): {p}, Factor (Geomean) {c}, GeoStdev {gsd}")
73+
if MAKE_PLOT:
74+
plt.plot(xs, ys, 'rx')
75+
plt.plot([xs[0], xs[-1]], [m*xs[0]+b, m*xs[-1]+b], color="blue")
76+
plt.plot(xs, [c*x**p for x in xs], color="green")
77+
plt.savefig("plot.png")
78+
return m, sigma, c, gsd
79+
80+
if __name__ == "__main__":
81+
print(converge(argv))
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
The script `criterionmethodology.py` is my implementation of a benchrunner-runner that uses the criterion methodology.
2+
We take as input some program which takes `iters` as a command-line argument, times a function of interest in a tight loop which repeats `iters` many times, and then prints to stdout the batchtime (total loop time) and selftimed (total loop time divided by iters).
3+
The essense of criterion is then to sweep `iters` and perform a linear regression against iters and batchtime.
4+
The slope is the mean and the y-intercept represents some notion of shared overhead, insensitive to `iters`.
5+
Ultimately, criterion serves as a way to benchmark tasks with very short execution times, as startup overhead can be ignored.
6+
7+
Since we have relatively precise timing over loops, I also implemented the criterion methodolgy *geometrically*.
8+
I take the logarithm of all the x and y values, compute the linear regression over that, then exponentiate the y-intercept - this represents the geomean.
9+
The other dependent portion, which is the slope, becomes a power (the equation is y = e^b x^m), which represents *geometric overhead*, e.g. how much overhead is being added per iteration.
10+
This may do well to model any slowdowns arising from pre-allocating arrays.
11+
Additionally, since performance data is non-negative and judged multiplicatively (twice as good means numbers are half, twice has bad means numbers are doubled; these are all *factors*), the geomean and geo-standard-deviation may make more sense theoretically.
12+
However, from my testing, the geomean seams to vary wildly for programs with fleeting execution times, even between repeat runs with the same parameters.
13+
14+
The scripts `criterionmethodology.py` and `sweep_seq.py` can both be ran directly.
15+
The first takes command-line arguments, e.g. `criterionmethodology benchrunner Quicksort Seq 2000` will call `benchrunner iters Quicksort Seq 2000` for various `iters`.
16+
`sweep_seq` performs a logarithmic sweep over different array sizes, invoking the criterion methdology at each point.
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
#!/usr/bin/env python3
2+
import os
3+
import numpy as np
4+
from criterionmethodology import converge
5+
import sys
6+
7+
# names = ["Optsort", "Insertionsort", "Mergesort", "Quicksort"]
8+
# names = ["CopyArray", "Quicksort", "Insertionsort", "Mergesort"]
9+
names = ["Insertionsort"]
10+
11+
# DENSITY = 4
12+
DENSITY = 12
13+
def bounds(name):
14+
match name:
15+
case "Insertionsort":
16+
lo = 3 # 2**n ...
17+
hi = 16
18+
case "Quicksort":
19+
lo = 3
20+
hi = 22
21+
case "Mergesort":
22+
# lo = 12
23+
lo = 3
24+
hi = 24
25+
case "Cilksort":
26+
# lo = 12
27+
lo = 3
28+
hi = 16#24
29+
case "Optsort":
30+
lo = 3
31+
hi = 16#24
32+
case _:
33+
lo = 3
34+
hi = 20
35+
return lo, hi, (hi-lo)*DENSITY+1
36+
37+
def dotrial(name, size):
38+
return converge([sys.argv[0], "benchrunner", name, "Seq", str(int(size))])
39+
40+
if __name__ == "__main__":
41+
for name in names:
42+
lo, hi, pts = bounds(name)
43+
with open("%s_out3.csv" % name, "w") as f:
44+
f.write("# size\tmean\tstddev\tgeomean\tgeostdev\n")
45+
for i in np.unique(np.logspace(lo, hi, pts, base=2).astype(int)):
46+
with open("%s_out3.csv" % name, "a") as f:
47+
try:
48+
f.write("%d" % int(i) + "\t%f\t%f\t%f\t%f\n" % dotrial(name, i))
49+
except:
50+
pass
File renamed without changes.
File renamed without changes.

0 commit comments

Comments
 (0)