Merge pull request #2 from lf-lang/cb

cmnrd · web-flow · commit 886780e841ae · 2022-07-22T09:50:45.000+02:00
Modified runner script for continuous benchmarking
diff --git a/.gitignore b/.gitignore
@@ -9,5 +9,5 @@ multirun
 __pycache__
 *.csv
 *.txt
-
+.vscode
 
diff --git a/C/Savina/src/micro/PingPong.lf b/C/Savina/src/micro/PingPong.lf
@@ -50,7 +50,7 @@ reactor Ping(count:int(1000000)) {
     =}
     reaction (receive) -> serve, finished {=
         if (self->pingsLeft > 0) {
-            schedule(serve, 0);
+            lf_schedule(serve, 0);
         } else {
             // reset pingsLeft for next iteration
             self->pingsLeft = self->count;
diff --git a/C/Savina/src/parallelism/Trapezoidal.lf b/C/Savina/src/parallelism/Trapezoidal.lf
@@ -135,7 +135,7 @@ reactor Worker(instance:int(0)) {
 }
 
 /* [[[cog
-    cog.outl(f'main reactor Trapezoidal(numIterations:int({numIterations}), numWorkers:int({workers}), numPieces:int({pieces}), leftEndPoint:double({left}), rightEndPoint:double({right}))') 
+    cog.outl(f'main reactor Trapezoidal(numIterations:int({numIterations}), numWorkers:int({worker_reactors}), numPieces:int({pieces}), leftEndPoint:double({left}), rightEndPoint:double({right}))')
 ]]] */
     main reactor Trapezoidal(numIterations:int(12), numWorkers:int(100), numPieces:int(10000000), leftEndPoint:double(1.0), rightEndPoint:double(5.0))
 
diff --git a/README.md b/README.md
@@ -117,10 +117,11 @@ The results for a multirun are written to a directory in the scheme `multirun/<d
 
 A second script called `collect_results.py` provides a convenient way for collecting results from a multirun and merging them into a single CSV file. Simply running
 ```
-./collect_results.py multirun/<date>/<time>/ out.csv
+./collect_results.py out.csv multirun/<date>/<time>/
 ```
 collects all results from the particular multirun and stores the merged data structure in out.csv. `collect_results.py` not only merges the results, but it also calculates minimum, maximum and median execution time for each individual run. The resulting CSV does not contain the measured values of individual iterations anymore and only contains a single row per run. This behavior can be disabled with the `--raw` command line flag. With the flag set, the results from all runs are merged as say are and the resulting file contains rows for all individual runs, but no minimum, maximum and median values.
 
+As a shortcut, you may omit the multirun directory to write the latest multirun results to `out.csv`.
 
 ## How it works
 
diff --git a/runner/collect_results.py b/runner/collect_results.py
@@ -2,9 +2,12 @@
 
 
 import argparse
+import functools
+from itertools import product
 import pandas as pd
 import pathlib
 import os
+import json
 
 
 def dir_path(string):
@@ -16,14 +19,18 @@ def dir_path(string):
 
 def main():
     parser = argparse.ArgumentParser()
-    parser.add_argument("src_path", type=dir_path)
     parser.add_argument("out_file")
+    parser.add_argument("src_path", required=False, type=dir_path)
     parser.add_argument("--raw", dest="raw", action="store_true")
     args = parser.parse_args()
 
     # collect data from all runs
+    src_path = (
+        args.src_path if args.src_path is not None
+        else latest_subdirectory(latest_subdirectory("./multirun"))
+    )
     data_frames = []
-    for path in pathlib.Path(args.src_path).rglob("results.csv"):
+    for path in pathlib.Path(src_path).rglob("results.csv"):
         data_frames.append(pd.read_csv(path.as_posix()))
 
     # determine min, max and median
@@ -42,8 +49,66 @@ def main():
     concat = pd.concat(data_frames, ignore_index=True)
 
     # write the concatenated results
-    concat.to_csv(args.out_file)
+    if args.out_file.endswith(".json"):
+        with open(args.out_file, "w") as f:
+            json.dump((create_json(concat)), f, indent=4)
+    elif args.out_file.endswith(".csv"):
+        concat.to_csv(args.out_file)
+    else:
+        raise ValueError(
+            f"Expected output file extension to be \".json\" "
+            f"or \".csv\", not \"{args.out_file.split('.')[-1]}\""
+        )
+
+def create_json(all_data: pd.DataFrame) -> str:
+    group_by = ["benchmark", "target", "threads", "scheduler"]
+    def name_computer(group):
+        parenthetical = [
+            f"{p}={group[1 + i]}"
+            for i, p in enumerate(group_by[1:])
+            if p in all_data.columns and len(all_data[p].unique()) > 1
+        ]
+        if len(parenthetical) == 0:
+            return group[0]
+        return group[0] + " (" + ", ".join(parenthetical) + ")"
+    is_correct_group = lambda group: functools.reduce(
+        lambda a, b: a & b,
+        [
+            (v == None and group_by[i] not in all_data.columns)
+            or (v != None and all_data[group_by[i]].values == v)
+            for i, v in enumerate(group)
+        ]
+    )
+    return [
+        {
+            "name": name_computer(group),
+            "unit": "ms",
+            "value": all_data[is_correct_group(group)].mean_time_ms.mean(),
+            "extra": f"Target: {group[0]}"
+                f"\nTotal Iterations: {all_data[is_correct_group(group)].total_iterations.iloc[0]}"
+                + (f"\nThreads: {group[2]}" if group[2] is not None else "")
+                + (f"\nScheduler: {group[-1]}" if group[-1] is not None else "")
+        }
+        for group in product(*[
+            all_data[p].unique() if p in all_data.columns else [None]
+            for p in group_by
+        ])
+    ]
+
+def latest_subdirectory(parent):
+    if parent is None:
+        raise Exception(f"{parent} does not exist.")
+    subdirectories = os.listdir(parent)
+    subdirectories.sort(key=functools.cmp_to_key(compare_dirnames))
+    if not subdirectories:
+        raise Exception(f"{parent} is empty.")
+    return os.path.join(parent, subdirectories[-1])
 
+def compare_dirnames(s0, s1):
+    for number0, number1 in [(int(a), int(b)) for a, b in zip(s0.split("-"), s1.split("-"))]:
+        if number0 != number1:
+            return number0 - number1
+    return 0
 
 if __name__ == "__main__":
     main()
diff --git a/runner/conf/benchmark/savina_concurrency_banking.yaml b/runner/conf/benchmark/savina_concurrency_banking.yaml
@@ -2,7 +2,7 @@
 name: "Bank Transaction"
 params:
   accounts: 1000
-  transactions: 50000
+  transactions: "${problem_size.banking_transactions}"
 
 # target specific configuration
 targets:
diff --git a/runner/conf/benchmark/savina_concurrency_bndbuffer.yaml b/runner/conf/benchmark/savina_concurrency_bndbuffer.yaml
@@ -4,7 +4,7 @@ params:
   buffer_size: 50
   consumers: 40
   producers: 40
-  items_per_producer: 1000
+  items_per_producer: "${problem_size.bndbuffer_items_per_producer}"
   produce_cost: 25
   consume_cost: 25
 
diff --git a/runner/conf/benchmark/savina_concurrency_concsll.yaml b/runner/conf/benchmark/savina_concurrency_concsll.yaml
@@ -2,7 +2,7 @@
 name: "Concurrent Sorted Linked List"
 params:
   workers: 20
-  messages_per_worker: 8000
+  messages_per_worker: "${problem_size.concsll_messages_per_worker}"
   write_percentage: 10
   size_percentage: 1
 
diff --git a/runner/conf/benchmark/savina_micro_big.yaml b/runner/conf/benchmark/savina_micro_big.yaml
@@ -1,7 +1,7 @@
 # @package benchmark
 name: "Big"
 params:
-  messages: 20000
+  messages: ${problem_size.big_messages}
   actors: 120
 
 # target specific configuration
diff --git a/runner/conf/benchmark/savina_micro_pingpong.yaml b/runner/conf/benchmark/savina_micro_pingpong.yaml
@@ -1,7 +1,7 @@
 # @package benchmark
 name: "Ping Pong"
 params:
-  pings: 1000000
+  pings: "${problem_size.pingpong_pings}"
 
 # target specific configuration
 targets:
diff --git a/runner/conf/benchmark/savina_parallelism_apsp.yaml b/runner/conf/benchmark/savina_parallelism_apsp.yaml
@@ -1,8 +1,8 @@
 # @package benchmark
 name: "All-Pairs Shortest Path"
 params:
-  num_workers: 300
-  block_size: 50
+  num_workers: ${problem_size.apsp_num_workers}
+  block_size: ${problem_size.apsp_block_size}
   max_edge_weight: 100
 
 # target specific configuration
diff --git a/runner/conf/benchmark/savina_parallelism_filterbank.yaml b/runner/conf/benchmark/savina_parallelism_filterbank.yaml
@@ -1,7 +1,7 @@
 # @package benchmark
 name: "Filter Bank"
 params:
-  columns: 16384
+  columns: ${problem_size.filterbank_columns}
   simulations: 34816
   channels: 8
 
diff --git a/runner/conf/benchmark/savina_parallelism_piprecise.yaml b/runner/conf/benchmark/savina_parallelism_piprecise.yaml
@@ -2,7 +2,7 @@
 name: "Precise Pi Computation"
 params:
   workers: 20
-  precision: 5000
+  precision: ${problem_size.piprecise_precision}
 
 # target specific configuration
 targets:
diff --git a/runner/conf/benchmark/savina_parallelism_radixsort.yaml b/runner/conf/benchmark/savina_parallelism_radixsort.yaml
@@ -1,7 +1,7 @@
 # @package benchmark
 name: "Radix Sort"
 params:
-  data_size: 100000
+  data_size: ${problem_size.radixsort_data_size}
   max_value: 1152921504606846976  # 2^60
   seed: 2048
 
diff --git a/runner/conf/benchmark/savina_parallelism_recmatmul.yaml b/runner/conf/benchmark/savina_parallelism_recmatmul.yaml
@@ -2,7 +2,7 @@
 name: "Recursive Matrix Multiplication"
 params:
   workers: 20
-  data_length: 1024
+  data_length: ${problem_size.recmatmul_data_length}
   block_threshold: 16384
   priorities: 10
 
diff --git a/runner/conf/benchmark/savina_parallelism_trapezoid.yaml b/runner/conf/benchmark/savina_parallelism_trapezoid.yaml
@@ -1,7 +1,7 @@
 # @package benchmark
 name: "Trapezoidal Approximation"
 params:
-  pieces: 10000000
+  pieces: ${problem_size.trapezoid_pieces}
   workers: 100
   left: 1.0
   right: 5.0
@@ -42,7 +42,7 @@ targets:
     lf_file: "parallelism/Trapezoidal.lf"
     binary: "Trapezoidal"
     gen_args:
-      workers: ["-D", "workers=<value>"]
+      workers: ["-D", "worker_reactors=<value>"]
       pieces: ["-D", "pieces=<value>"]
       left: ["-D", "left=<value>"]
       right: ["-D", "right=<value>"]
diff --git a/runner/conf/default.yaml b/runner/conf/default.yaml
@@ -13,6 +13,7 @@ hydra:
     chdir: True
 
 defaults:
+  - problem_size: large
   - benchmark: ???
   - target: ???
   - _self_
diff --git a/runner/conf/problem_size/large.yaml b/runner/conf/problem_size/large.yaml
@@ -0,0 +1,12 @@
+pingpong_pings: 1000000
+banking_transactions: 50000
+bndbuffer_items_per_producer: 1000
+concsll_messages_per_worker: 8000
+big_messages: 20000
+apsp_num_workers: 300
+apsp_block_size: 50
+filterbank_columns: 16384
+piprecise_precision: 5000
+radixsort_data_size: 100000
+recmatmul_data_length: 1024
+trapezoid_pieces: 10000000
diff --git a/runner/conf/problem_size/small.yaml b/runner/conf/problem_size/small.yaml
@@ -0,0 +1,12 @@
+pingpong_pings: 300000
+banking_transactions: 12000
+bndbuffer_items_per_producer: 100
+concsll_messages_per_worker: 1500
+big_messages: 500
+apsp_num_workers: 198
+apsp_block_size: 33
+filterbank_columns: 100
+piprecise_precision: 4250
+radixsort_data_size: 40000
+recmatmul_data_length: 512
+trapezoid_pieces: 1250000
diff --git a/runner/conf/target/lf-c.yaml b/runner/conf/target/lf-c.yaml
@@ -12,4 +12,4 @@ run: ["bin/${benchmark.targets.lf-c.binary}"]
 parser:
   _target_: "parser.parse_lfc_output"
 params:
-  scheduler: "GEDF_NP"
+  scheduler: "NP"

-Original file line number
+Diff line change
 __pycache__
 *.csv
 *.txt
+-
 +.vscode
Original file line number	Diff line number	Diff line change
`@@ -135,7 +135,7 @@ reactor Worker(instance:int(0)) {`
`135`	`135`	`}`
`136`	`136`
`137`	`137`	`/* [[[cog`
`138`		`- cog.outl(f'main reactor Trapezoidal(numIterations:int({numIterations}), numWorkers:int({workers}), numPieces:int({pieces}), leftEndPoint:double({left}), rightEndPoint:double({right}))')`
	`138`	`+ cog.outl(f'main reactor Trapezoidal(numIterations:int({numIterations}), numWorkers:int({worker_reactors}), numPieces:int({pieces}), leftEndPoint:double({left}), rightEndPoint:double({right}))')`
`139`	`139`	`]]] */`
`140`	`140`	`main reactor Trapezoidal(numIterations:int(12), numWorkers:int(100), numPieces:int(10000000), leftEndPoint:double(1.0), rightEndPoint:double(5.0))`
`141`	`141`