michaelborkowski
diff --git a/‎benchmarks/scripts/criterion-drop-in-replacement/criterionmethodology.py‎
Lines changed: 81 additions & 0 deletions b/‎benchmarks/scripts/criterion-drop-in-replacement/criterionmethodology.py‎
Lines changed: 81 additions & 0 deletions
diff --git a/‎benchmarks/scripts/criterion-drop-in-replacement/readme‎
Lines changed: 3 additions & 0 deletions b/‎benchmarks/scripts/criterion-drop-in-replacement/readme‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎benchmarks/scripts/criterion-drop-in-replacement/sweep_seq.py‎
Lines changed: 51 additions & 0 deletions b/‎benchmarks/scripts/criterion-drop-in-replacement/sweep_seq.py‎
Lines changed: 51 additions & 0 deletions
diff --git a/‎benchmarks/scripts/c-sorting-benchmarks/readme‎ renamed to ‎benchmarks/scripts/old-criterion/c-sorting-benchmarks/readme‎ b/‎benchmarks/scripts/c-sorting-benchmarks/readme‎ renamed to ‎benchmarks/scripts/old-criterion/c-sorting-benchmarks/readme‎
diff --git a/‎benchmarks/scripts/c-sorting-benchmarks/sort_insertion_out.csv‎ renamed to ‎benchmarks/scripts/old-criterion/c-sorting-benchmarks/sort_insertion_out.csv‎ b/‎benchmarks/scripts/c-sorting-benchmarks/sort_insertion_out.csv‎ renamed to ‎benchmarks/scripts/old-criterion/c-sorting-benchmarks/sort_insertion_out.csv‎
diff --git a/‎benchmarks/scripts/c-sorting-benchmarks/sort_merge_seq_out.csv‎ renamed to ‎benchmarks/scripts/old-criterion/c-sorting-benchmarks/sort_merge_seq_out.csv‎ b/‎benchmarks/scripts/c-sorting-benchmarks/sort_merge_seq_out.csv‎ renamed to ‎benchmarks/scripts/old-criterion/c-sorting-benchmarks/sort_merge_seq_out.csv‎
diff --git a/‎benchmarks/scripts/c-sorting-benchmarks/sort_quick_out.csv‎ renamed to ‎benchmarks/scripts/old-criterion/c-sorting-benchmarks/sort_quick_out.csv‎ b/‎benchmarks/scripts/c-sorting-benchmarks/sort_quick_out.csv‎ renamed to ‎benchmarks/scripts/old-criterion/c-sorting-benchmarks/sort_quick_out.csv‎
diff --git a/‎benchmarks/scripts/plot.py‎ renamed to ‎benchmarks/scripts/old-criterion/plot.py‎ b/‎benchmarks/scripts/plot.py‎ renamed to ‎benchmarks/scripts/old-criterion/plot.py‎
diff --git a/‎benchmarks/scripts/plot_relative_speedup.py‎ renamed to ‎benchmarks/scripts/old-criterion/plot_relative_speedup.py‎ b/‎benchmarks/scripts/plot_relative_speedup.py‎ renamed to ‎benchmarks/scripts/old-criterion/plot_relative_speedup.py‎
diff --git a/‎benchmarks/scripts/readme‎ renamed to ‎benchmarks/scripts/old-criterion/readme‎ b/‎benchmarks/scripts/readme‎ renamed to ‎benchmarks/scripts/old-criterion/readme‎
@@ -0,0 +1,81 @@
+#!/usr/bin/env python3
+import numpy as np
+from sys import argv
+import subprocess
+from time import time
+import math
+
+from matplotlib import pyplot as plt
+
+MAKE_PLOT = False
+
+def linear_regression_with_std(x, y):
+    x = np.array(x)
+    y = np.array(y)
+    x_mean = np.mean(x)
+    y_mean = np.mean(y)
+    numerator = np.sum((x - x_mean) * (y - y_mean))
+    denominator = np.sum((x - x_mean) ** 2)
+    slope = numerator / denominator
+    intercept = y_mean - slope * x_mean
+    y_pred = slope * x + intercept
+    residuals = y - y_pred
+    std_dev = np.std(residuals)
+    return slope, intercept, std_dev
+
+def do_bench(cliargs, iters): 
+    print([cliargs[1], str(iters)] + cliargs[2:])
+    out = str(subprocess.check_output([cliargs[1], str(iters)] + cliargs[2:]))
+    s1 = out[out.find("SELFTIMED")+11:]
+    s2 = float(s1[:s1.find("\n")-4])
+    selftimed = s2
+
+    b1 = out[out.find("BATCHTIME")+11:]
+    b2 = float(b1[:b1.find("SELFTIMED")-2])
+    batchtime = b2
+
+    print(f"ITERS: {iters}, BATCHTIME: {batchtime}, SELFTIMED: {selftimed}")
+    return batchtime
+
+def converge(cliargs): 
+    xs = []
+    ys = []
+    iters = 1
+    t = time()
+    while len(xs) == 0: 
+        st = do_bench(cliargs, iters)
+        if st * iters < 0.65: 
+            iters *= 2
+            continue
+        xs.append(iters)
+        ys.append(st)
+    for _ in range(2): 
+        if time() - t < 3.5: 
+            iters = int(math.trunc(float(iters) * 1.2) + 1)
+        else: 
+            iters += 1 + iters // 20
+        st = do_bench(cliargs, iters)
+        xs.append(iters)
+        ys.append(st)
+    while time() - t < 3.5: 
+        if time() - t < 3.5: 
+            iters = int(math.trunc(float(iters) * 1.2) + 1)
+        else: 
+            iters += 1 + iters // 20
+        st = do_bench(cliargs, iters)
+        xs.append(iters)
+        ys.append(st)
+    m, b, sigma = linear_regression_with_std(xs, ys)
+    print(f"Slope (Mean): {m}, Intercept (Overhead): {b}, Stdev: {sigma}")
+    p, lnc, lngsd = linear_regression_with_std([math.log(x) for x in xs], [math.log(y) for y in ys])
+    c, gsd = math.exp(lnc), math.exp(lngsd)
+    print(f"Power (Distortion): {p}, Factor (Geomean) {c}, GeoStdev {gsd}")
+    if MAKE_PLOT: 
+        plt.plot(xs, ys, 'rx')
+        plt.plot([xs[0], xs[-1]], [m*xs[0]+b, m*xs[-1]+b], color="blue")
+        plt.plot(xs, [c*x**p for x in xs], color="green")
+        plt.savefig("plot.png")
+    return m, sigma, c, gsd
+
+if __name__ == "__main__": 
+    print(converge(argv))
@@ -0,0 +1,3 @@
+The script `criterionmethodology.py` is my implementation of a benchrunner-runner that uses the criterion methodology. We take as input some program which takes `iters` as a command-line argument, times a function of interest in a tight loop which repeats `iters` many times, and then prints to stdout the batchtime (total loop time) and selftimed (total loop time divided by iters). The essense of criterion is then to sweep `iters` and perform a linear regression against iters and batchtime. The slope is the mean and the y-intercept represents some notion of shared overhead, insensitive to `iters`. Ultimately, criterion serves as a way to benchmark tasks with very short execution times, as startup overhead can be ignored. 
+
+Since we have relatively precise timing over loops, I also implemented the criterion methodolgy *geometrically*. I take the logarithm of all the x and y values, compute the linear regression over that, then exponentiate the y-intercept - this represents the geomean. The other dependent portion, which is the slope, becomes a power (the equation is y = e^b x^m), which represents *geometric overhead*, e.g. how much overhead is being added per iteration. This may do well to model any slowdowns arising from pre-allocating arrays. Additionally, since performance data is non-negative and judged multiplicatively (twice as good means numbers are half, twice has bad means numbers are doubled; these are all *factors*), the geomean and geo-standard-deviation may make more sense theoretically. However, from my testing, the geomean seams to vary wildly for programs with fleeting execution times, even between repeat runs with the same parameters. 
@@ -0,0 +1,51 @@
+#!/usr/bin/env python3
+import os
+import numpy as np
+from criterionmethodology import converge
+import sys
+
+# names = ["Optsort", "Insertionsort", "Mergesort", "Quicksort"]
+# names = ["CopyArray", "Quicksort", "Insertionsort", "Mergesort"]
+names = ["Insertionsort"]
+
+# DENSITY = 4
+DENSITY = 12
+def bounds(name): 
+    match name: 
+        case "Insertionsort": 
+            lo = 3  # 2**n ...
+            hi = 16
+        case "Quicksort": 
+            lo = 3
+            hi = 22
+        case "Mergesort": 
+            # lo = 12
+            lo = 3
+            hi = 24
+        case "Cilksort": 
+            # lo = 12
+            lo = 3
+            hi = 16#24
+        case "Optsort": 
+            lo = 3
+            hi = 16#24
+        case _: 
+            lo = 3
+            hi = 20
+    return lo, hi, (hi-lo)*DENSITY+1
+
+def dotrial(name, size):
+    return converge([sys.argv[0], "benchrunner", name, "Seq", str(int(size))])
+
+if __name__ == "__main__": 
+    for name in names: 
+        lo, hi, pts = bounds(name)
+        with open("%s_out3.csv" % name, "w") as f: 
+            f.write("# size\tmean\tstddev\tgeomean\tgeostdev\n")
+        for i in np.unique(np.logspace(lo, hi, pts, base=2).astype(int)): 
+            with open("%s_out3.csv" % name, "a") as f: 
+                try: 
+                    f.write("%d" % int(i) + "\t%f\t%f\t%f\t%f\n" % dotrial(name, i))
+                except: 
+                    pass
+
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	+The script `criterionmethodology.py` is my implementation of a benchrunner-runner that uses the criterion methodology. We take as input some program which takes `iters` as a command-line argument, times a function of interest in a tight loop which repeats `iters` many times, and then prints to stdout the batchtime (total loop time) and selftimed (total loop time divided by iters). The essense of criterion is then to sweep `iters` and perform a linear regression against iters and batchtime. The slope is the mean and the y-intercept represents some notion of shared overhead, insensitive to `iters`. Ultimately, criterion serves as a way to benchmark tasks with very short execution times, as startup overhead can be ignored.
	`2`	`+`
	`3`	+Since we have relatively precise timing over loops, I also implemented the criterion methodolgy geometrically. I take the logarithm of all the x and y values, compute the linear regression over that, then exponentiate the y-intercept - this represents the geomean. The other dependent portion, which is the slope, becomes a power (the equation is y = e^b x^m), which represents geometric overhead, e.g. how much overhead is being added per iteration. This may do well to model any slowdowns arising from pre-allocating arrays. Additionally, since performance data is non-negative and judged multiplicatively (twice as good means numbers are half, twice has bad means numbers are doubled; these are all factors), the geomean and geo-standard-deviation may make more sense theoretically. However, from my testing, the geomean seams to vary wildly for programs with fleeting execution times, even between repeat runs with the same parameters.