Skip to content

Commit eb9e685

Browse files
[Benchmarks] Fix SubmitGraph CPU count scenarios grouping (#20239)
Fixes SubmitGraph CPU count scenarios being drawn in the same charts as SubmitGraph time measurement scenarios making them hard to read. Also, refactor a bit the `compute.py` module. --------- Co-authored-by: Łukasz Ślusarczyk <[email protected]>
1 parent 9283d86 commit eb9e685

File tree

1 file changed

+48
-41
lines changed

1 file changed

+48
-41
lines changed

devops/scripts/benchmarks/benches/compute.py

Lines changed: 48 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -3,19 +3,19 @@
33
# See LICENSE.TXT
44
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
55

6-
from itertools import product
6+
import copy
77
import csv
88
import io
9-
import copy
109
import math
1110
from enum import Enum
11+
from itertools import product
1212
from pathlib import Path
1313

14-
from .base import Benchmark, Suite, TracingType
15-
from utils.result import BenchmarkMetadata, Result
16-
from .base import Benchmark, Suite
17-
from options import options
1814
from git_project import GitProject
15+
from options import options
16+
from utils.result import BenchmarkMetadata, Result
17+
18+
from .base import Benchmark, Suite, TracingType
1919

2020

2121
class RUNTIMES(Enum):
@@ -100,66 +100,57 @@ def setup(self) -> None:
100100

101101
def additional_metadata(self) -> dict[str, BenchmarkMetadata]:
102102
metadata = {
103-
"SubmitKernel": BenchmarkMetadata(
104-
type="group",
105-
description="Measures CPU time overhead of submitting kernels through different APIs.",
106-
notes="Each layer builds on top of the previous layer, adding functionality and overhead.\n"
107-
"The first layer is the Level Zero API, the second is the Unified Runtime API, and the third is the SYCL API.\n"
108-
"The UR v2 adapter noticeably reduces UR layer overhead, also improving SYCL performance.\n"
109-
"Work is ongoing to reduce the overhead of the SYCL API\n",
110-
tags=["submit", "micro", "SYCL", "UR", "L0"],
111-
range_min=0.0,
112-
),
113103
"SinKernelGraph": BenchmarkMetadata(
114104
type="group",
115105
unstable="This benchmark combines both eager and graph execution, and may not be representative of real use cases.",
116106
tags=["submit", "memory", "proxy", "SYCL", "UR", "L0", "graph"],
117107
),
118-
"SubmitGraph": BenchmarkMetadata(
119-
type="group", tags=["submit", "micro", "SYCL", "UR", "L0", "graph"]
120-
),
121108
"FinalizeGraph": BenchmarkMetadata(
122109
type="group", tags=["finalize", "micro", "SYCL", "graph"]
123110
),
124111
}
125112

126113
# Add metadata for all SubmitKernel group variants
127-
base_metadata = metadata["SubmitKernel"]
128-
114+
submit_kernel_metadata = BenchmarkMetadata(
115+
type="group",
116+
notes="Each layer builds on top of the previous layer, adding functionality and overhead.\n"
117+
"The first layer is the Level Zero API, the second is the Unified Runtime API, and the third is the SYCL API.\n"
118+
"The UR v2 adapter noticeably reduces UR layer overhead, also improving SYCL performance.\n"
119+
"Work is ongoing to reduce the overhead of the SYCL API\n",
120+
tags=["submit", "micro", "SYCL", "UR", "L0"],
121+
range_min=0.0,
122+
)
129123
for order in ["in order", "out of order"]:
130124
for completion in ["", " with completion"]:
131125
for events in ["", " using events"]:
132126
group_name = f"SubmitKernel {order}{completion}{events} long kernel"
133-
metadata[group_name] = BenchmarkMetadata(
134-
type="group",
135-
description=f"Measures CPU time overhead of submitting {order} kernels with longer execution times through different APIs.",
136-
notes=base_metadata.notes,
137-
tags=base_metadata.tags,
138-
range_min=base_metadata.range_min,
127+
metadata[group_name] = copy.deepcopy(submit_kernel_metadata)
128+
metadata[group_name].description = (
129+
f"Measures CPU time overhead of submitting {order} kernels with longer execution times through different APIs."
139130
)
140-
141131
# CPU count variants
142132
cpu_count_group = f"{group_name}, CPU count"
143-
metadata[cpu_count_group] = BenchmarkMetadata(
144-
type="group",
145-
description=f"Measures CPU time overhead of submitting {order} kernels with longer execution times through different APIs.",
146-
notes=base_metadata.notes,
147-
tags=base_metadata.tags,
148-
range_min=base_metadata.range_min,
133+
metadata[cpu_count_group] = copy.deepcopy(submit_kernel_metadata)
134+
metadata[cpu_count_group].description = (
135+
f"Measures CPU instruction count overhead of submitting {order} kernels with longer execution times through different APIs."
149136
)
150137

151138
# Add metadata for all SubmitGraph group variants
152-
base_metadata = metadata["SubmitGraph"]
139+
submit_graph_metadata = BenchmarkMetadata(
140+
type="group", tags=["submit", "micro", "SYCL", "UR", "L0", "graph"]
141+
)
153142
for order in ["in order", "out of order"]:
154143
for completion in ["", " with completion"]:
155144
for events in ["", " using events"]:
156145
for num_kernels in self.submit_graph_num_kernels:
157-
group_name = f"SubmitGraph {order}{completion}{events}, {num_kernels} kernels"
158-
metadata[group_name] = BenchmarkMetadata(
159-
type="group",
160-
tags=base_metadata.tags,
161-
)
162-
146+
for host_tasks in ["", " use host tasks"]:
147+
group_name = f"SubmitGraph {order}{completion}{events}{host_tasks}, {num_kernels} kernels"
148+
metadata[group_name] = copy.deepcopy(submit_graph_metadata)
149+
# CPU count variants
150+
cpu_count_group = f"{group_name}, CPU count"
151+
metadata[cpu_count_group] = copy.deepcopy(
152+
submit_graph_metadata
153+
)
163154
return metadata
164155

165156
def benchmarks(self) -> list[Benchmark]:
@@ -1088,6 +1079,22 @@ def bin_args(self, run_trace: TracingType = TracingType.NONE) -> list[str]:
10881079
bin_args.append(f"--profilerType={self.profiler_type.value}")
10891080
return bin_args
10901081

1082+
def get_metadata(self) -> dict[str, BenchmarkMetadata]:
1083+
metadata_dict = super().get_metadata()
1084+
1085+
# Create CPU count variant with modified display name and explicit_group
1086+
cpu_count_name = self.name() + " CPU count"
1087+
cpu_count_metadata = copy.deepcopy(metadata_dict[self.name()])
1088+
cpu_count_display_name = self.display_name() + ", CPU count"
1089+
cpu_count_explicit_group = (
1090+
self.explicit_group() + ", CPU count" if self.explicit_group() else ""
1091+
)
1092+
cpu_count_metadata.display_name = cpu_count_display_name
1093+
cpu_count_metadata.explicit_group = cpu_count_explicit_group
1094+
metadata_dict[cpu_count_name] = cpu_count_metadata
1095+
1096+
return metadata_dict
1097+
10911098

10921099
class UllsEmptyKernel(ComputeBenchmark):
10931100
def __init__(

0 commit comments

Comments
 (0)