Skip to content

Commit a51c767

Browse files
committed
Add flamegraphs to benchmarks
Signed-off-by: Mateusz P. Nowak <[email protected]>
1 parent 261c837 commit a51c767

File tree

17 files changed

+1113
-91
lines changed

17 files changed

+1113
-91
lines changed

devops/scripts/benchmarks/benches/base.py

Lines changed: 44 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@
1212
from options import options
1313
from utils.utils import download, run
1414
from abc import ABC, abstractmethod
15+
from utils.flamegraph import get_flamegraph
16+
from utils.logger import log
1517

1618
benchmark_tags = [
1719
BenchmarkTag("SYCL", "Benchmark uses SYCL runtime"),
@@ -61,6 +63,12 @@ def enabled(self) -> bool:
6163
By default, it returns True, but can be overridden to disable a benchmark."""
6264
return True
6365

66+
def traceable(self) -> bool:
67+
"""Returns whether this benchmark should be traced by FlameGraph.
68+
By default, it returns True, but can be overridden to disable tracing for a benchmark.
69+
"""
70+
return True
71+
6472
@abstractmethod
6573
def setup(self):
6674
pass
@@ -70,11 +78,12 @@ def teardown(self):
7078
pass
7179

7280
@abstractmethod
73-
def run(self, env_vars) -> list[Result]:
81+
def run(self, env_vars, run_flamegraph: bool = False) -> list[Result]:
7482
"""Execute the benchmark with the given environment variables.
7583
7684
Args:
7785
env_vars: Environment variables to use when running the benchmark.
86+
run_flamegraph: Whether to run benchmark under FlameGraph.
7887
7988
Returns:
8089
A list of Result objects with the benchmark results.
@@ -97,7 +106,14 @@ def get_adapter_full_path():
97106
), f"could not find adapter file {adapter_path} (and in similar lib paths)"
98107

99108
def run_bench(
100-
self, command, env_vars, ld_library=[], add_sycl=True, use_stdout=True
109+
self,
110+
command,
111+
env_vars,
112+
ld_library=[],
113+
add_sycl=True,
114+
use_stdout=True,
115+
run_flamegraph=False,
116+
extra_perf_opt=None,
101117
):
102118
env_vars = env_vars.copy()
103119
if options.ur is not None:
@@ -110,13 +126,32 @@ def run_bench(
110126
ld_libraries = options.extra_ld_libraries.copy()
111127
ld_libraries.extend(ld_library)
112128

113-
result = run(
114-
command=command,
115-
env_vars=env_vars,
116-
add_sycl=add_sycl,
117-
cwd=options.benchmark_cwd,
118-
ld_library=ld_libraries,
119-
)
129+
perf_data_file = None
130+
if self.traceable() and run_flamegraph:
131+
if extra_perf_opt is None:
132+
extra_perf_opt = []
133+
perf_data_file, command = get_flamegraph().setup(
134+
self.name(), command, extra_perf_opt
135+
)
136+
log.debug(f"FlameGraph perf data: {perf_data_file}")
137+
log.debug(f"FlameGraph command: {' '.join(command)}")
138+
139+
try:
140+
result = run(
141+
command=command,
142+
env_vars=env_vars,
143+
add_sycl=add_sycl,
144+
cwd=options.benchmark_cwd,
145+
ld_library=ld_libraries,
146+
)
147+
except subprocess.CalledProcessError:
148+
if run_flamegraph and perf_data_file:
149+
get_flamegraph().cleanup(options.benchmark_cwd, perf_data_file)
150+
raise
151+
152+
if self.traceable() and run_flamegraph and perf_data_file:
153+
svg_file = get_flamegraph().handle_output(self.name(), perf_data_file)
154+
log.info(f"FlameGraph generated: {svg_file}")
120155

121156
if use_stdout:
122157
return result.stdout.decode()

devops/scripts/benchmarks/benches/benchdnn.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,7 @@ def setup(self):
132132
if not self.bench_bin.exists():
133133
raise FileNotFoundError(f"Benchmark binary not found: {self.bench_bin}")
134134

135-
def run(self, env_vars):
135+
def run(self, env_vars, run_flamegraph: bool = False):
136136
command = [
137137
str(self.bench_bin),
138138
*self.bench_args.split(),
@@ -151,6 +151,7 @@ def run(self, env_vars):
151151
add_sycl=True,
152152
ld_library=ld_library,
153153
use_stdout=True,
154+
run_flamegraph=run_flamegraph,
154155
)
155156
result_value = self._extract_time(output)
156157

devops/scripts/benchmarks/benches/benchdnn_list.py

Lines changed: 38 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -14,29 +14,29 @@
1414

1515
# the final choice of benchmarks to run, used in CI and other environments
1616
benches_final_set = [
17-
[
18-
"sum",
19-
"f16-1",
20-
"--sdt=f16:f16:f16 --stag=abx:abx:abx --scales=1.25:3:0.5 16x2x6x4x3",
21-
False, # Do not run graph for this benchmark
22-
],
23-
[
24-
"sum",
25-
"f16-2",
26-
"--reset --ddt=f16 \
27-
--sdt=f16:f16:f16:f16:f16:f16:f16:f16:f16:f16 \
28-
--stag=abx:aBx16b:ABx16a16b:ABcd16b16a:BAcd16a16b:BAcd16b16a:aBCd16b16c:aBCd16c16b:aCBd16b16c:aCBd16c16b \
29-
--dtag=abx,aBx16b,ABx16a16b,ABcd16b16a,BAcd16a16b,BAcd16b16a,aBCd16b16c,aBCd16c16b,aCBd16b16c,aCBd16c16b \
30-
--scales=1.25:3:0.5:2:0.5:2:0.5:2:0.5:2 \
31-
16x32x48x5",
32-
False, # Do not run graph for this benchmark
33-
],
34-
[
35-
"sum",
36-
"f32-1",
37-
"--sdt=bf16:bf16:bf16 --stag=abx:abx:abx --scales=0.5:2:0.5 16x2x6x4x3",
38-
False, # Do not run graph for this benchmark
39-
],
17+
# [
18+
# "sum",
19+
# "f16-1",
20+
# "--sdt=f16:f16:f16 --stag=abx:abx:abx --scales=1.25:3:0.5 16x2x6x4x3",
21+
# False, # Do not run graph for this benchmark
22+
# ],
23+
# [
24+
# "sum",
25+
# "f16-2",
26+
# "--reset --ddt=f16 \
27+
# --sdt=f16:f16:f16:f16:f16:f16:f16:f16:f16:f16 \
28+
# --stag=abx:aBx16b:ABx16a16b:ABcd16b16a:BAcd16a16b:BAcd16b16a:aBCd16b16c:aBCd16c16b:aCBd16b16c:aCBd16c16b \
29+
# --dtag=abx,aBx16b,ABx16a16b,ABcd16b16a,BAcd16a16b,BAcd16b16a,aBCd16b16c,aBCd16c16b,aCBd16b16c,aCBd16c16b \
30+
# --scales=1.25:3:0.5:2:0.5:2:0.5:2:0.5:2 \
31+
# 16x32x48x5",
32+
# False, # Do not run graph for this benchmark
33+
# ],
34+
# [
35+
# "sum",
36+
# "f32-1",
37+
# "--sdt=bf16:bf16:bf16 --stag=abx:abx:abx --scales=0.5:2:0.5 16x2x6x4x3",
38+
# False, # Do not run graph for this benchmark
39+
# ],
4040
[
4141
"sum",
4242
"f32-2",
@@ -53,21 +53,21 @@
5353
"padding-1",
5454
"--ddt=f32 --sdt=f32:f32 --stag=aBcd16b --dtag=aBcd16b 1x8x64x64 1x8x640x1024 1x24x640x1024",
5555
],
56-
[
57-
"sum",
58-
"padding-2",
59-
"--sdt=bf16:bf16 --ddt=bf16 --stag=AB48a16b:AB48a16b --dtag=AB48a16b 512x1024",
60-
],
61-
[
62-
"graph",
63-
"sdpa-plain-f16",
64-
"--reset --dt=f16 --case=complex_fusion/mha/sdpa-plain-implicit-causal-mask-fp32-bs1.json",
65-
],
66-
[
67-
"graph",
68-
"sdpa-plain-f32",
69-
"--reset --dt=f32 --case=complex_fusion/mha/sdpa-plain-implicit-causal-mask-fp32-bs1.json",
70-
],
56+
# [
57+
# "sum",
58+
# "padding-2",
59+
# "--sdt=bf16:bf16 --ddt=bf16 --stag=AB48a16b:AB48a16b --dtag=AB48a16b 512x1024",
60+
# ],
61+
# [
62+
# "graph",
63+
# "sdpa-plain-f16",
64+
# "--reset --dt=f16 --case=complex_fusion/mha/sdpa-plain-implicit-causal-mask-fp32-bs1.json",
65+
# ],
66+
# [
67+
# "graph",
68+
# "sdpa-plain-f32",
69+
# "--reset --dt=f32 --case=complex_fusion/mha/sdpa-plain-implicit-causal-mask-fp32-bs1.json",
70+
# ],
7171
]
7272

7373
# the complete set of benchmarks aimed at gpu operations, normally too long to run in CI

devops/scripts/benchmarks/benches/compute.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -320,7 +320,7 @@ def explicit_group(self):
320320
def description(self) -> str:
321321
return ""
322322

323-
def run(self, env_vars) -> list[Result]:
323+
def run(self, env_vars, run_flamegraph: bool = False) -> list[Result]:
324324
command = [
325325
f"{self.benchmark_bin}",
326326
f"--test={self.test}",
@@ -331,7 +331,7 @@ def run(self, env_vars) -> list[Result]:
331331
command += self.bin_args()
332332
env_vars.update(self.extra_env_vars())
333333

334-
result = self.run_bench(command, env_vars)
334+
result = self.run_bench(command, env_vars, run_flamegraph=run_flamegraph)
335335
parsed_results = self.parse_output(result)
336336
ret = []
337337
for label, median, stddev, unit in parsed_results:

devops/scripts/benchmarks/benches/gromacs.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -163,7 +163,7 @@ def setup(self):
163163
ld_library=self.suite.oneapi.ld_libraries(),
164164
)
165165

166-
def run(self, env_vars):
166+
def run(self, env_vars, run_flamegraph: bool = False):
167167
model_dir = self.grappa_dir / self.model
168168

169169
env_vars.update({"SYCL_CACHE_PERSISTENT": "1"})
@@ -202,6 +202,7 @@ def run(self, env_vars):
202202
add_sycl=True,
203203
use_stdout=False,
204204
ld_library=self.suite.oneapi.ld_libraries(),
205+
run_flamegraph=run_flamegraph,
205206
)
206207

207208
if not self._validate_correctness(options.benchmark_cwd + "/md.log"):

devops/scripts/benchmarks/benches/llamacpp.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ def get_tags(self):
115115
def lower_is_better(self):
116116
return False
117117

118-
def run(self, env_vars) -> list[Result]:
118+
def run(self, env_vars, run_flamegraph: bool = False) -> list[Result]:
119119
command = [
120120
f"{self.benchmark_bin}",
121121
"--output",
@@ -141,7 +141,7 @@ def run(self, env_vars) -> list[Result]:
141141
]
142142

143143
result = self.run_bench(
144-
command, env_vars, ld_library=self.bench.oneapi.ld_libraries()
144+
command, env_vars, ld_library=self.bench.oneapi.ld_libraries(), run_flamegraph=run_flamegraph
145145
)
146146
parsed = self.parse_output(result)
147147
results = []

devops/scripts/benchmarks/benches/syclbench.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,7 @@ def setup(self):
137137
self.directory, "sycl-bench-build", self.bench_name
138138
)
139139

140-
def run(self, env_vars) -> list[Result]:
140+
def run(self, env_vars, run_flamegraph: bool = False) -> list[Result]:
141141
self.outputfile = os.path.join(self.bench.directory, self.test + ".csv")
142142

143143
command = [
@@ -151,7 +151,7 @@ def run(self, env_vars) -> list[Result]:
151151
env_vars.update(self.extra_env_vars())
152152

153153
# no output to stdout, all in outputfile
154-
self.run_bench(command, env_vars)
154+
self.run_bench(command, env_vars, run_flamegraph=run_flamegraph)
155155

156156
with open(self.outputfile, "r") as f:
157157
reader = csv.reader(f)

devops/scripts/benchmarks/benches/test.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ def notes(self) -> str:
8888
def unstable(self) -> str:
8989
return self.unstable_text
9090

91-
def run(self, env_vars) -> list[Result]:
91+
def run(self, env_vars, run_flamegraph: bool = False) -> list[Result]:
9292
random_value = self.value + random.uniform(-1 * (self.diff), self.diff)
9393
return [
9494
Result(

devops/scripts/benchmarks/benches/umf.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,7 @@ def get_names_of_benchmarks_to_be_run(self, command, env_vars):
137137

138138
return all_names
139139

140-
def run(self, env_vars) -> list[Result]:
140+
def run(self, env_vars, run_flamegraph: bool = False) -> list[Result]:
141141
command = [f"{self.benchmark_bin}"]
142142

143143
all_names = self.get_names_of_benchmarks_to_be_run(command, env_vars)
@@ -151,7 +151,7 @@ def run(self, env_vars) -> list[Result]:
151151
specific_benchmark = command + ["--benchmark_filter=^" + name + "$"]
152152

153153
result = self.run_bench(
154-
specific_benchmark, env_vars, add_sycl=False, ld_library=[self.umf_lib]
154+
specific_benchmark, env_vars, add_sycl=False, ld_library=[self.umf_lib], run_flamegraph=run_flamegraph
155155
)
156156

157157
parsed = self.parse_output(result)

devops/scripts/benchmarks/benches/velocity.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -130,15 +130,15 @@ def description(self) -> str:
130130
def get_tags(self):
131131
return ["SYCL", "application"]
132132

133-
def run(self, env_vars) -> list[Result]:
133+
def run(self, env_vars, run_flamegraph: bool = False) -> list[Result]:
134134
env_vars.update(self.extra_env_vars())
135135

136136
command = [
137137
f"{self.benchmark_bin}",
138138
]
139139
command += self.bin_args()
140140

141-
result = self.run_bench(command, env_vars, ld_library=self.ld_libraries())
141+
result = self.run_bench(command, env_vars, ld_library=self.ld_libraries(), run_flamegraph=run_flamegraph)
142142

143143
return [
144144
Result(
@@ -282,7 +282,7 @@ class QuickSilver(VelocityBase):
282282
def __init__(self, vb: VelocityBench):
283283
super().__init__("QuickSilver", "qs", vb, "MMS/CTT")
284284

285-
def run(self, env_vars) -> list[Result]:
285+
def run(self, env_vars, run_flamegraph: bool = False) -> list[Result]:
286286
# TODO: fix the crash in QuickSilver when UR_L0_USE_IMMEDIATE_COMMANDLISTS=0
287287
if (
288288
"UR_L0_USE_IMMEDIATE_COMMANDLISTS" in env_vars

0 commit comments

Comments
 (0)