11#!/usr/bin/env python
2+
3+ #
4+ # The script determines the cost of one iteration of a function (in seconds) using an executable that
5+ #
6+ # - runs `iters` iterations of that function in a tight loop and
7+ # - prints out the time it took to run them.
8+ #
9+ # Example call:
10+ #
11+ # ./criterionmethodology.py $(cabal list-bin benchrunner) Quicksort Seq 2000
12+ #
13+ # In particular, we
14+ #
15+ # - run given executable (the first and only relevant argument) with 'iters' argument varied from 1 to N;
16+ # N and the step size are dynamially determined based on the time it takes to run the binary;
17+ # - fetch timing results from binary's stdout and do linear regression over them;
18+ # - plot the regression (see the `plot` function) in `plot.png`.
19+ #
20+ # Growing the `iters` parameter is the main ingenuity of the script. It follows the Criterion methodology:
21+ # running the given binary for small number of iterations doubling them every time, and upon reaching
22+ # a certain threshold (FIRST_ITER_THRESHOLD), increasing them linearly until the overall execution time
23+ # reaches another threshold (TOTAL_TIME_THRESHOLD) seconds.
24+ #
25+ # - The `converge` function runs the whole process, starting with a small number of iterations.
26+ # - The `iter` function encodes the methodology for increasing 'iters'.
27+ # - The `do_bench` function runs the binary and scrapes the output, so the expected binary's interface is encoded in it.
28+ #
29+
230import numpy as np
331from sys import argv
432import subprocess
735
836from matplotlib import pyplot as plt
937
38+ LOG = True
1039MAKE_PLOT = False
40+ FIRST_ITER_THRESHOLD = 3e-6 # 0.65
41+ TOTAL_TIME_THRESHOLD = 1 # 3.5
42+ # ^^ Joseph's original values, but they are too high for my machine.
43+
44+ # Poor-man logging
45+ def log (format , ** xs ):
46+ if LOG :
47+ print (format , ** xs )
1148
1249def linear_regression_with_std (x , y ):
1350 x = np .array (x )
@@ -23,10 +60,9 @@ def linear_regression_with_std(x, y):
2360 std_dev = np .std (residuals )
2461 return slope , intercept , std_dev
2562
63+ # Do one trial: run the binary with given arguments, including the given `iters`, and return the batch time.
2664def do_bench (cliargs , iters ):
27- bin = cliargs [1 ].rsplit ('/' , 1 )[- 1 ]
28- print ([bin ] + cliargs [2 :] + [str (iters )])
29- out = str (subprocess .check_output ([cliargs [1 ], str (iters )] + cliargs [2 :]))
65+ out = str (subprocess .check_output ([cliargs [0 ], str (iters )] + cliargs [1 :]))
3066 s1 = out [out .find ("SELFTIMED" )+ 11 :]
3167 s2 = float (s1 [:s1 .find ("\n " )- 4 ])
3268 selftimed = s2
@@ -35,51 +71,74 @@ def do_bench(cliargs, iters):
3571 b2 = float (b1 [:b1 .find ("SELFTIMED" )- 2 ])
3672 batchtime = b2
3773
38- #print (f"ITERS: {iters}, BATCHTIME: {batchtime}, SELFTIMED: {selftimed}")
74+ #log (f"ITERS: {iters}, BATCHTIME: {batchtime}, SELFTIMED: {selftimed}")
3975 return batchtime
4076
77+ # Increase 'iters' and do one trial with that. Store results in xs and ys. Return new iters.
78+ def iter (iters , cliargs , start_time , xs , ys ):
79+ if time () - start_time < TOTAL_TIME_THRESHOLD :
80+ iters = int (math .trunc (float (iters ) * 1.2 ) + 1 )
81+ else :
82+ iters += 1 + iters // 20
83+ log (str (iters ) + " " , end = "" , flush = True )
84+ st = do_bench (cliargs , iters )
85+ xs .append (iters )
86+ ys .append (st )
87+ return iters
88+
89+ def plot (xs , ys , b , c , m , p ):
90+ plotfile = "plot.png"
91+ os .remove (plotfile ) if os .path .exists (plotfile ) else None
92+ plt .plot (xs , ys , 'rx' )
93+ plt .plot ([xs [0 ], xs [- 1 ]], [m * xs [0 ]+ b , m * xs [- 1 ]+ b ], color = "blue" )
94+ plt .plot (xs , [c * x ** p for x in xs ], color = "green" )
95+ plt .savefig (plotfile )
96+
97+ # Main function to run the iteration experiment.
98+ # - cliargs is a list of command line arguments WIHTOUT the current script's name (argv[0]), in particular:
99+ # - the first argument is the path to the binary, and
100+ # - the rest is simply the arguments to pass to the binary.
41101def converge (cliargs ):
102+ bin = cliargs [0 ].rsplit ('/' , 1 )[- 1 ] # Get the binary name from the path
103+ log ("Converge on: " + str ([bin ] + cliargs [1 :]))
104+ log ("iters: " , end = "" )
42105 xs = []
43106 ys = []
44107 iters = 1
45108 t = time ()
109+
110+ # First find a starting point for `iters` where the time is at least FIRST_ITER_THRESHOLD seconds
46111 while len (xs ) == 0 :
112+ log (str (iters ) + " " , end = "" , flush = True )
47113 st = do_bench (cliargs , iters )
48- if st * iters < 0.65 :
114+ if st < FIRST_ITER_THRESHOLD : # Artem: Joseph had `st * iters < ...` here but I think it's a typo
49115 iters *= 2
50116 continue
51117 xs .append (iters )
52118 ys .append (st )
119+
120+ log (" | " , end = "" , flush = True )
121+ # Do two more trials increasing iters regardless of time
53122 for _ in range (2 ):
54- if time () - t < 3.5 :
55- iters = int (math .trunc (float (iters ) * 1.2 ) + 1 )
56- else :
57- iters += 1 + iters // 20
58- st = do_bench (cliargs , iters )
59- xs .append (iters )
60- ys .append (st )
61- while time () - t < 3.5 :
62- if time () - t < 3.5 :
63- iters = int (math .trunc (float (iters ) * 1.2 ) + 1 )
64- else :
65- iters += 1 + iters // 20
66- st = do_bench (cliargs , iters )
67- xs .append (iters )
68- ys .append (st )
123+ iters = iter (iters , cliargs , t , xs , ys )
124+
125+ log (" | " , end = "" , flush = True )
126+ # Keep increasing iters until we reach TOTAL_TIME_THRESHOLD seconds of execution in total
127+ while time () - t < TOTAL_TIME_THRESHOLD :
128+ iters = iter (iters , cliargs , t , xs , ys )
129+ log ("done!" )
69130
70131 m , b , sig = linear_regression_with_std (xs , ys )
71132 p , lnc , lngsd = linear_regression_with_std ([math .log (x ) for x in xs ], [math .log (y ) for y in ys ])
72133 c , gsd = math .exp (lnc ), math .exp (lngsd )
73134
74- print (f"Slope (Mean): { m :.2e} , Stdev: { sig :.2e} , Intercept (Overhead): { b :.2e} " )
75- print (f"Factor (Geomean): { c :.2e} , GeoStdev: { gsd :.2e} , Power (Distortion): { p :.2e} " )
135+ log (f"Slope (Mean): { m :.2e} , Stdev: { sig :.2e} , Intercept (Overhead): { b :.2e} " )
136+ log (f"Factor (Geomean): { c :.2e} , GeoStdev: { gsd :.2e} , Power (Distortion): { p :.2e} " )
76137
77138 if MAKE_PLOT :
78- plt .plot (xs , ys , 'rx' )
79- plt .plot ([xs [0 ], xs [- 1 ]], [m * xs [0 ]+ b , m * xs [- 1 ]+ b ], color = "blue" )
80- plt .plot (xs , [c * x ** p for x in xs ], color = "green" )
81- plt .savefig ("plot.png" )
139+ plot (xs , ys , b , c , m , p )
140+
82141 return m , sig , c , gsd
83142
84143if __name__ == "__main__" :
85- converge (argv )
144+ converge (argv [ 1 :] )
0 commit comments