Skip to content

Commit ce3065f

Browse files
committed
cooking the Python: it's getting there
1 parent abaf99b commit ce3065f

File tree

2 files changed

+93
-35
lines changed

2 files changed

+93
-35
lines changed
Lines changed: 86 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,32 @@
11
#!/usr/bin/env python
2+
3+
#
4+
# The script determines the cost of one iteration of a function (in seconds) using an executable that
5+
#
6+
# - runs `iters` iterations of that function in a tight loop and
7+
# - prints out the time it took to run them.
8+
#
9+
# Example call:
10+
#
11+
# ./criterionmethodology.py $(cabal list-bin benchrunner) Quicksort Seq 2000
12+
#
13+
# In particular, we
14+
#
15+
# - run given executable (the first and only relevant argument) with 'iters' argument varied from 1 to N;
16+
# N and the step size are dynamially determined based on the time it takes to run the binary;
17+
# - fetch timing results from binary's stdout and do linear regression over them;
18+
# - plot the regression (see the `plot` function) in `plot.png`.
19+
#
20+
# Growing the `iters` parameter is the main ingenuity of the script. It follows the Criterion methodology:
21+
# running the given binary for small number of iterations doubling them every time, and upon reaching
22+
# a certain threshold (FIRST_ITER_THRESHOLD), increasing them linearly until the overall execution time
23+
# reaches another threshold (TOTAL_TIME_THRESHOLD) seconds.
24+
#
25+
# - The `converge` function runs the whole process, starting with a small number of iterations.
26+
# - The `iter` function encodes the methodology for increasing 'iters'.
27+
# - The `do_bench` function runs the binary and scrapes the output, so the expected binary's interface is encoded in it.
28+
#
29+
230
import numpy as np
331
from sys import argv
432
import subprocess
@@ -7,7 +35,16 @@
735

836
from matplotlib import pyplot as plt
937

38+
LOG=True
1039
MAKE_PLOT = False
40+
FIRST_ITER_THRESHOLD = 3e-6 # 0.65
41+
TOTAL_TIME_THRESHOLD = 1 # 3.5
42+
# ^^ Joseph's original values, but they are too high for my machine.
43+
44+
# Poor-man logging
45+
def log(format, **xs):
46+
if LOG:
47+
print(format, **xs)
1148

1249
def linear_regression_with_std(x, y):
1350
x = np.array(x)
@@ -23,10 +60,9 @@ def linear_regression_with_std(x, y):
2360
std_dev = np.std(residuals)
2461
return slope, intercept, std_dev
2562

63+
# Do one trial: run the binary with given arguments, including the given `iters`, and return the batch time.
2664
def do_bench(cliargs, iters):
27-
bin = cliargs[1].rsplit('/', 1)[-1]
28-
print([bin] + cliargs[2:] + [str(iters)])
29-
out = str(subprocess.check_output([cliargs[1], str(iters)] + cliargs[2:]))
65+
out = str(subprocess.check_output([cliargs[0], str(iters)] + cliargs[1:]))
3066
s1 = out[out.find("SELFTIMED")+11:]
3167
s2 = float(s1[:s1.find("\n")-4])
3268
selftimed = s2
@@ -35,51 +71,74 @@ def do_bench(cliargs, iters):
3571
b2 = float(b1[:b1.find("SELFTIMED")-2])
3672
batchtime = b2
3773

38-
#print(f"ITERS: {iters}, BATCHTIME: {batchtime}, SELFTIMED: {selftimed}")
74+
#log(f"ITERS: {iters}, BATCHTIME: {batchtime}, SELFTIMED: {selftimed}")
3975
return batchtime
4076

77+
# Increase 'iters' and do one trial with that. Store results in xs and ys. Return new iters.
78+
def iter(iters, cliargs, start_time, xs, ys):
79+
if time() - start_time < TOTAL_TIME_THRESHOLD:
80+
iters = int(math.trunc(float(iters) * 1.2) + 1)
81+
else:
82+
iters += 1 + iters // 20
83+
log(str(iters) + " ", end="", flush=True)
84+
st = do_bench(cliargs, iters)
85+
xs.append(iters)
86+
ys.append(st)
87+
return iters
88+
89+
def plot(xs, ys, b, c, m, p):
90+
plotfile = "plot.png"
91+
os.remove(plotfile) if os.path.exists(plotfile) else None
92+
plt.plot(xs, ys, 'rx')
93+
plt.plot([xs[0], xs[-1]], [m*xs[0]+b, m*xs[-1]+b], color="blue")
94+
plt.plot(xs, [c*x**p for x in xs], color="green")
95+
plt.savefig(plotfile)
96+
97+
# Main function to run the iteration experiment.
98+
# - cliargs is a list of command line arguments WIHTOUT the current script's name (argv[0]), in particular:
99+
# - the first argument is the path to the binary, and
100+
# - the rest is simply the arguments to pass to the binary.
41101
def converge(cliargs):
102+
bin = cliargs[0].rsplit('/', 1)[-1] # Get the binary name from the path
103+
log("Converge on: " + str([bin] + cliargs[1:]))
104+
log("iters: ", end="")
42105
xs = []
43106
ys = []
44107
iters = 1
45108
t = time()
109+
110+
# First find a starting point for `iters` where the time is at least FIRST_ITER_THRESHOLD seconds
46111
while len(xs) == 0:
112+
log(str(iters) + " ", end="", flush=True)
47113
st = do_bench(cliargs, iters)
48-
if st * iters < 0.65:
114+
if st < FIRST_ITER_THRESHOLD: # Artem: Joseph had `st * iters < ...` here but I think it's a typo
49115
iters *= 2
50116
continue
51117
xs.append(iters)
52118
ys.append(st)
119+
120+
log(" | ", end="", flush=True)
121+
# Do two more trials increasing iters regardless of time
53122
for _ in range(2):
54-
if time() - t < 3.5:
55-
iters = int(math.trunc(float(iters) * 1.2) + 1)
56-
else:
57-
iters += 1 + iters // 20
58-
st = do_bench(cliargs, iters)
59-
xs.append(iters)
60-
ys.append(st)
61-
while time() - t < 3.5:
62-
if time() - t < 3.5:
63-
iters = int(math.trunc(float(iters) * 1.2) + 1)
64-
else:
65-
iters += 1 + iters // 20
66-
st = do_bench(cliargs, iters)
67-
xs.append(iters)
68-
ys.append(st)
123+
iters = iter(iters, cliargs, t, xs, ys)
124+
125+
log(" | ", end="", flush=True)
126+
# Keep increasing iters until we reach TOTAL_TIME_THRESHOLD seconds of execution in total
127+
while time() - t < TOTAL_TIME_THRESHOLD:
128+
iters = iter(iters, cliargs, t, xs, ys)
129+
log("done!")
69130

70131
m, b, sig = linear_regression_with_std(xs, ys)
71132
p, lnc, lngsd = linear_regression_with_std([math.log(x) for x in xs], [math.log(y) for y in ys])
72133
c, gsd = math.exp(lnc), math.exp(lngsd)
73134

74-
print(f"Slope (Mean): {m:.2e}, Stdev: {sig:.2e}, Intercept (Overhead): {b:.2e}")
75-
print(f"Factor (Geomean): {c:.2e}, GeoStdev: {gsd:.2e}, Power (Distortion): {p:.2e}")
135+
log(f"Slope (Mean): {m:.2e}, Stdev: {sig:.2e}, Intercept (Overhead): {b:.2e}")
136+
log(f"Factor (Geomean): {c:.2e}, GeoStdev: {gsd:.2e}, Power (Distortion): {p:.2e}")
76137

77138
if MAKE_PLOT:
78-
plt.plot(xs, ys, 'rx')
79-
plt.plot([xs[0], xs[-1]], [m*xs[0]+b, m*xs[-1]+b], color="blue")
80-
plt.plot(xs, [c*x**p for x in xs], color="green")
81-
plt.savefig("plot.png")
139+
plot(xs, ys, b, c, m, p)
140+
82141
return m, sig, c, gsd
83142

84143
if __name__ == "__main__":
85-
converge(argv)
144+
converge(argv[1:])

benchmarks/scripts/criterion-drop-in-replacement/sweep_seq.py

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ def bounds(name):
1414
match name:
1515
case "Insertionsort":
1616
lo = 3 # 2**n ...
17-
hi = 16
17+
hi = 12 # for local testing; initially: 16
1818
case "Quicksort":
1919
lo = 3
2020
hi = 22
@@ -34,17 +34,16 @@ def bounds(name):
3434
hi = 20
3535
return lo, hi, (hi-lo)*DENSITY+1
3636

37-
def dotrial(name, size):
38-
return converge([sys.argv[0], "benchrunner", name, "Seq", str(int(size))])
37+
def dotrial(exe, name, size):
38+
return converge([exe, name, "Seq", str(int(size))])
3939

4040
if __name__ == "__main__":
41+
exe = sys.argv[1]
42+
print("Running with executable:", exe)
4143
for name in names:
4244
lo, hi, pts = bounds(name)
4345
with open("%s_out3.csv" % name, "w") as f:
4446
f.write("# size\tmean\tstddev\tgeomean\tgeostdev\n")
45-
for i in np.unique(np.logspace(lo, hi, pts, base=2).astype(int)):
47+
for i in np.unique(np.logspace(lo, hi, pts, base=2).astype(int)): # Artem: I don't understand this and I must
4648
with open("%s_out3.csv" % name, "a") as f:
47-
try:
48-
f.write("%d" % int(i) + "\t%f\t%f\t%f\t%f\n" % dotrial(name, i))
49-
except:
50-
pass
49+
f.write("%d" % int(i) + "\t%f\t%f\t%f\t%f\n" % dotrial(exe, name, i))

0 commit comments

Comments
 (0)