Move changes to collect_results.py.

petervdonovan · petervdonovan · commit a908dfce7b96 · 2022-07-09T19:05:40.000-07:00
diff --git a/C/Savina/src/micro/PingPong.lf b/C/Savina/src/micro/PingPong.lf
@@ -50,7 +50,7 @@ reactor Ping(count:int(1000000)) {
     =}
     reaction (receive) -> serve, finished {=
         if (self->pingsLeft > 0) {
-            schedule(serve, 0);
+            lf_schedule(serve, 0);
         } else {
             // reset pingsLeft for next iteration
             self->pingsLeft = self->count;
diff --git a/README.md b/README.md
@@ -121,6 +121,11 @@ A second script called `collect_results.py` provides a convenient way for collec
 ```
 collects all results from the particular multirun and stores the merged data structure in out.csv. `collect_results.py` not only merges the results, but it also calculates minimum, maximum and median execution time for each individual run. The resulting CSV does not contain the measured values of individual iterations anymore and only contains a single row per run. This behavior can be disabled with the `--raw` command line flag. With the flag set, the results from all runs are merged as say are and the resulting file contains rows for all individual runs, but no minimum, maximum and median values.
 
+As a shortcut, you may alternatively use
+```
+./collect_results.py latest out.csv
+```
+to write the latest multirun results to `out.csv`.
 
 ## How it works
 
diff --git a/runner/collect_results.py b/runner/collect_results.py
@@ -2,9 +2,12 @@
 
 
 import argparse
+import functools
+from itertools import product
 import pandas as pd
 import pathlib
 import os
+import json
 
 
 def dir_path(string):
@@ -16,14 +19,18 @@ def dir_path(string):
 
 def main():
     parser = argparse.ArgumentParser()
-    parser.add_argument("src_path", type=dir_path)
+    parser.add_argument("src_path")
     parser.add_argument("out_file")
     parser.add_argument("--raw", dest="raw", action="store_true")
     args = parser.parse_args()
 
     # collect data from all runs
+    src_path = (
+        args.src_path if args.src_path != "latest"
+        else latest_subdirectory(latest_subdirectory("./multirun"))
+    )
     data_frames = []
-    for path in pathlib.Path(args.src_path).rglob("results.csv"):
+    for path in pathlib.Path(src_path).rglob("results.csv"):
         data_frames.append(pd.read_csv(path.as_posix()))
 
     # determine min, max and median
@@ -42,8 +49,50 @@ def main():
     concat = pd.concat(data_frames, ignore_index=True)
 
     # write the concatenated results
-    concat.to_csv(args.out_file)
+    if args.out_file.endswith(".json"):
+        with open(args.out_file, "w") as f:
+            json.dump((create_json(concat)), f, indent=4)
+    else:
+        concat.to_csv(args.out_file)
+
+def create_json(all_data: pd.DataFrame) -> str:
+    group_by = ["benchmark", "target", "threads", "scheduler"]
+    name_computer = lambda group: group[0] + " (" + ", ".join(
+        f"{group_by[i]}={p}"
+        for i, p in enumerate(group_by[1:])
+        if len(all_data[p].unique()) > 1
+    ) + ")"
+    is_correct_group = lambda group: functools.reduce(
+        lambda a, b: a & b,
+        [all_data[group_by[i]].values == v for i, v in enumerate(group)]
+    )
+    return [
+        {
+            "name": name_computer(group),
+            "unit": "ms",
+            "value": all_data[is_correct_group(group)].mean_time_ms.mean(),
+            "extra": f"Target: {group[0]}"
+                f"\nTotal Iterations: {all_data[is_correct_group(group)].total_iterations.iloc[0]}"
+                f"\nThreads: {group[2]}"
+                f"\nScheduler: {group[-1]}"
+        }
+        for group in product(*[all_data[p].unique() for p in group_by])
+    ]
+
+def latest_subdirectory(parent):
+    if parent is None:
+        raise Exception(f"{parent} does not exist.")
+    subdirectories = os.listdir(parent)
+    subdirectories.sort(key=functools.cmp_to_key(compare_dirnames))
+    if not subdirectories:
+        raise Exception(f"{parent} is empty.")
+    return os.path.join(parent, subdirectories[-1])
 
+def compare_dirnames(s0, s1):
+    for number0, number1 in zip(s0.split("-"), s1.split("-")):
+        if int(number0) != int(number1):
+            return number0 - number1
+    return 0
 
 if __name__ == "__main__":
     main()
diff --git a/runner/run_benchmark.py b/runner/run_benchmark.py
@@ -6,7 +6,7 @@
 import multiprocessing
 import omegaconf
 import subprocess
-import json
+
 
 log = logging.getLogger("run_benchmark")
 
@@ -166,55 +166,28 @@ def execute_command(command):
 
 
 def write_results(times, cfg):
-    if not cfg["json"]:
-        row = {
-            "benchmark": cfg["benchmark"]["name"],
-            "target": cfg["target"]["name"],
-            "total_iterations": cfg["iterations"],
-            "threads": cfg["threads"],
-            "iteration": None,
-            "time_ms": None,
-        }
-        # also add all parameters and their values
-        row.update(cfg["benchmark"]["params"])
-        if "params" in cfg["target"]:
-            row.update(cfg["target"]["params"])
-
-        with open("results.csv", "w", newline="") as csvfile:
-            writer = csv.DictWriter(csvfile, fieldnames=row.keys())
-            writer.writeheader()
-            i = 0
-            for t in times:
-                row["iteration"] = i
-                row["time_ms"] = t
-                writer.writerow(row)
-                i += 1
-    else:
-        total_time = 0
+    row = {
+        "benchmark": cfg["benchmark"]["name"],
+        "target": cfg["target"]["name"],
+        "total_iterations": cfg["iterations"],
+        "threads": cfg["threads"],
+        "iteration": None,
+        "time_ms": None,
+    }
+    # also add all parameters and their values
+    row.update(cfg["benchmark"]["params"])
+    if "params" in cfg["target"]:
+        row.update(cfg["target"]["params"])
+
+    with open("results.csv", "w", newline="") as csvfile:
+        writer = csv.DictWriter(csvfile, fieldnames=row.keys())
+        writer.writeheader()
+        i = 0
         for t in times:
-            total_time += t
-        total_time /= cfg["iterations"]
-        data = {
-                "name": cfg["benchmark"]["name"],
-                "unit": "ms",
-                "value": total_time,
-                "extra": f"Target: {cfg['target']['name']}\nTotal Iterations: {cfg['iterations']}\nThreads: {cfg['threads']}"
-            }
-
-        try: 
-            with open("../../../benchmark_result.json", "r+") as outfile:
-                # benchmark_result.json file should be in the multirun directory
-                # update existing file    
-                contents = json.load(outfile)
-                contents.append(data)
-                outfile.seek(0)
-                json.dump(contents, outfile, indent=4)
-
-        except FileNotFoundError:
-            with open("../../../benchmark_result.json", "w+") as outfile:
-                # create new file
-                json.dump([data], outfile, indent=4)
-
+            row["iteration"] = i
+            row["time_ms"] = t
+            writer.writerow(row)
+            i += 1
 
 
 if __name__ == "__main__":