Skip to content

Commit 886780e

Browse files
authored
Merge pull request #2 from lf-lang/cb
Modified runner script for continuous benchmarking
2 parents 8477420 + 463c530 commit 886780e

20 files changed

+112
-21
lines changed

.gitignore

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,5 +9,5 @@ multirun
99
__pycache__
1010
*.csv
1111
*.txt
12-
12+
.vscode
1313

C/Savina/src/micro/PingPong.lf

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ reactor Ping(count:int(1000000)) {
5050
=}
5151
reaction (receive) -> serve, finished {=
5252
if (self->pingsLeft > 0) {
53-
schedule(serve, 0);
53+
lf_schedule(serve, 0);
5454
} else {
5555
// reset pingsLeft for next iteration
5656
self->pingsLeft = self->count;

C/Savina/src/parallelism/Trapezoidal.lf

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@ reactor Worker(instance:int(0)) {
135135
}
136136

137137
/* [[[cog
138-
cog.outl(f'main reactor Trapezoidal(numIterations:int({numIterations}), numWorkers:int({workers}), numPieces:int({pieces}), leftEndPoint:double({left}), rightEndPoint:double({right}))')
138+
cog.outl(f'main reactor Trapezoidal(numIterations:int({numIterations}), numWorkers:int({worker_reactors}), numPieces:int({pieces}), leftEndPoint:double({left}), rightEndPoint:double({right}))')
139139
]]] */
140140
main reactor Trapezoidal(numIterations:int(12), numWorkers:int(100), numPieces:int(10000000), leftEndPoint:double(1.0), rightEndPoint:double(5.0))
141141

README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,10 +117,11 @@ The results for a multirun are written to a directory in the scheme `multirun/<d
117117

118118
A second script called `collect_results.py` provides a convenient way for collecting results from a multirun and merging them into a single CSV file. Simply running
119119
```
120-
./collect_results.py multirun/<date>/<time>/ out.csv
120+
./collect_results.py out.csv multirun/<date>/<time>/
121121
```
122122
collects all results from the particular multirun and stores the merged data structure in out.csv. `collect_results.py` not only merges the results, but it also calculates minimum, maximum and median execution time for each individual run. The resulting CSV does not contain the measured values of individual iterations anymore and only contains a single row per run. This behavior can be disabled with the `--raw` command line flag. With the flag set, the results from all runs are merged as say are and the resulting file contains rows for all individual runs, but no minimum, maximum and median values.
123123

124+
As a shortcut, you may omit the multirun directory to write the latest multirun results to `out.csv`.
124125

125126
## How it works
126127

runner/collect_results.py

Lines changed: 68 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,12 @@
22

33

44
import argparse
5+
import functools
6+
from itertools import product
57
import pandas as pd
68
import pathlib
79
import os
10+
import json
811

912

1013
def dir_path(string):
@@ -16,14 +19,18 @@ def dir_path(string):
1619

1720
def main():
1821
parser = argparse.ArgumentParser()
19-
parser.add_argument("src_path", type=dir_path)
2022
parser.add_argument("out_file")
23+
parser.add_argument("src_path", required=False, type=dir_path)
2124
parser.add_argument("--raw", dest="raw", action="store_true")
2225
args = parser.parse_args()
2326

2427
# collect data from all runs
28+
src_path = (
29+
args.src_path if args.src_path is not None
30+
else latest_subdirectory(latest_subdirectory("./multirun"))
31+
)
2532
data_frames = []
26-
for path in pathlib.Path(args.src_path).rglob("results.csv"):
33+
for path in pathlib.Path(src_path).rglob("results.csv"):
2734
data_frames.append(pd.read_csv(path.as_posix()))
2835

2936
# determine min, max and median
@@ -42,8 +49,66 @@ def main():
4249
concat = pd.concat(data_frames, ignore_index=True)
4350

4451
# write the concatenated results
45-
concat.to_csv(args.out_file)
52+
if args.out_file.endswith(".json"):
53+
with open(args.out_file, "w") as f:
54+
json.dump((create_json(concat)), f, indent=4)
55+
elif args.out_file.endswith(".csv"):
56+
concat.to_csv(args.out_file)
57+
else:
58+
raise ValueError(
59+
f"Expected output file extension to be \".json\" "
60+
f"or \".csv\", not \"{args.out_file.split('.')[-1]}\""
61+
)
62+
63+
def create_json(all_data: pd.DataFrame) -> str:
64+
group_by = ["benchmark", "target", "threads", "scheduler"]
65+
def name_computer(group):
66+
parenthetical = [
67+
f"{p}={group[1 + i]}"
68+
for i, p in enumerate(group_by[1:])
69+
if p in all_data.columns and len(all_data[p].unique()) > 1
70+
]
71+
if len(parenthetical) == 0:
72+
return group[0]
73+
return group[0] + " (" + ", ".join(parenthetical) + ")"
74+
is_correct_group = lambda group: functools.reduce(
75+
lambda a, b: a & b,
76+
[
77+
(v == None and group_by[i] not in all_data.columns)
78+
or (v != None and all_data[group_by[i]].values == v)
79+
for i, v in enumerate(group)
80+
]
81+
)
82+
return [
83+
{
84+
"name": name_computer(group),
85+
"unit": "ms",
86+
"value": all_data[is_correct_group(group)].mean_time_ms.mean(),
87+
"extra": f"Target: {group[0]}"
88+
f"\nTotal Iterations: {all_data[is_correct_group(group)].total_iterations.iloc[0]}"
89+
+ (f"\nThreads: {group[2]}" if group[2] is not None else "")
90+
+ (f"\nScheduler: {group[-1]}" if group[-1] is not None else "")
91+
}
92+
for group in product(*[
93+
all_data[p].unique() if p in all_data.columns else [None]
94+
for p in group_by
95+
])
96+
]
97+
98+
def latest_subdirectory(parent):
99+
if parent is None:
100+
raise Exception(f"{parent} does not exist.")
101+
subdirectories = os.listdir(parent)
102+
subdirectories.sort(key=functools.cmp_to_key(compare_dirnames))
103+
if not subdirectories:
104+
raise Exception(f"{parent} is empty.")
105+
return os.path.join(parent, subdirectories[-1])
46106

107+
def compare_dirnames(s0, s1):
108+
for number0, number1 in [(int(a), int(b)) for a, b in zip(s0.split("-"), s1.split("-"))]:
109+
if number0 != number1:
110+
return number0 - number1
111+
return 0
47112

48113
if __name__ == "__main__":
49114
main()

runner/conf/benchmark/savina_concurrency_banking.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
name: "Bank Transaction"
33
params:
44
accounts: 1000
5-
transactions: 50000
5+
transactions: "${problem_size.banking_transactions}"
66

77
# target specific configuration
88
targets:

runner/conf/benchmark/savina_concurrency_bndbuffer.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ params:
44
buffer_size: 50
55
consumers: 40
66
producers: 40
7-
items_per_producer: 1000
7+
items_per_producer: "${problem_size.bndbuffer_items_per_producer}"
88
produce_cost: 25
99
consume_cost: 25
1010

runner/conf/benchmark/savina_concurrency_concsll.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
name: "Concurrent Sorted Linked List"
33
params:
44
workers: 20
5-
messages_per_worker: 8000
5+
messages_per_worker: "${problem_size.concsll_messages_per_worker}"
66
write_percentage: 10
77
size_percentage: 1
88

runner/conf/benchmark/savina_micro_big.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# @package benchmark
22
name: "Big"
33
params:
4-
messages: 20000
4+
messages: ${problem_size.big_messages}
55
actors: 120
66

77
# target specific configuration

runner/conf/benchmark/savina_micro_pingpong.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# @package benchmark
22
name: "Ping Pong"
33
params:
4-
pings: 1000000
4+
pings: "${problem_size.pingpong_pings}"
55

66
# target specific configuration
77
targets:

0 commit comments

Comments
 (0)