|
3 | 3 | # See LICENSE.TXT
|
4 | 4 | # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
5 | 5 |
|
6 |
| -from itertools import product |
| 6 | +import copy |
7 | 7 | import csv
|
8 | 8 | import io
|
9 |
| -import copy |
10 | 9 | import math
|
11 | 10 | from enum import Enum
|
| 11 | +from itertools import product |
12 | 12 | from pathlib import Path
|
13 | 13 |
|
14 |
| -from .base import Benchmark, Suite, TracingType |
15 |
| -from utils.result import BenchmarkMetadata, Result |
16 |
| -from .base import Benchmark, Suite |
17 |
| -from options import options |
18 | 14 | from git_project import GitProject
|
| 15 | +from options import options |
| 16 | +from utils.result import BenchmarkMetadata, Result |
| 17 | + |
| 18 | +from .base import Benchmark, Suite, TracingType |
19 | 19 |
|
20 | 20 |
|
21 | 21 | class RUNTIMES(Enum):
|
@@ -100,66 +100,57 @@ def setup(self) -> None:
|
100 | 100 |
|
101 | 101 | def additional_metadata(self) -> dict[str, BenchmarkMetadata]:
|
102 | 102 | metadata = {
|
103 |
| - "SubmitKernel": BenchmarkMetadata( |
104 |
| - type="group", |
105 |
| - description="Measures CPU time overhead of submitting kernels through different APIs.", |
106 |
| - notes="Each layer builds on top of the previous layer, adding functionality and overhead.\n" |
107 |
| - "The first layer is the Level Zero API, the second is the Unified Runtime API, and the third is the SYCL API.\n" |
108 |
| - "The UR v2 adapter noticeably reduces UR layer overhead, also improving SYCL performance.\n" |
109 |
| - "Work is ongoing to reduce the overhead of the SYCL API\n", |
110 |
| - tags=["submit", "micro", "SYCL", "UR", "L0"], |
111 |
| - range_min=0.0, |
112 |
| - ), |
113 | 103 | "SinKernelGraph": BenchmarkMetadata(
|
114 | 104 | type="group",
|
115 | 105 | unstable="This benchmark combines both eager and graph execution, and may not be representative of real use cases.",
|
116 | 106 | tags=["submit", "memory", "proxy", "SYCL", "UR", "L0", "graph"],
|
117 | 107 | ),
|
118 |
| - "SubmitGraph": BenchmarkMetadata( |
119 |
| - type="group", tags=["submit", "micro", "SYCL", "UR", "L0", "graph"] |
120 |
| - ), |
121 | 108 | "FinalizeGraph": BenchmarkMetadata(
|
122 | 109 | type="group", tags=["finalize", "micro", "SYCL", "graph"]
|
123 | 110 | ),
|
124 | 111 | }
|
125 | 112 |
|
126 | 113 | # Add metadata for all SubmitKernel group variants
|
127 |
| - base_metadata = metadata["SubmitKernel"] |
128 |
| - |
| 114 | + submit_kernel_metadata = BenchmarkMetadata( |
| 115 | + type="group", |
| 116 | + notes="Each layer builds on top of the previous layer, adding functionality and overhead.\n" |
| 117 | + "The first layer is the Level Zero API, the second is the Unified Runtime API, and the third is the SYCL API.\n" |
| 118 | + "The UR v2 adapter noticeably reduces UR layer overhead, also improving SYCL performance.\n" |
| 119 | + "Work is ongoing to reduce the overhead of the SYCL API\n", |
| 120 | + tags=["submit", "micro", "SYCL", "UR", "L0"], |
| 121 | + range_min=0.0, |
| 122 | + ) |
129 | 123 | for order in ["in order", "out of order"]:
|
130 | 124 | for completion in ["", " with completion"]:
|
131 | 125 | for events in ["", " using events"]:
|
132 | 126 | group_name = f"SubmitKernel {order}{completion}{events} long kernel"
|
133 |
| - metadata[group_name] = BenchmarkMetadata( |
134 |
| - type="group", |
135 |
| - description=f"Measures CPU time overhead of submitting {order} kernels with longer execution times through different APIs.", |
136 |
| - notes=base_metadata.notes, |
137 |
| - tags=base_metadata.tags, |
138 |
| - range_min=base_metadata.range_min, |
| 127 | + metadata[group_name] = copy.deepcopy(submit_kernel_metadata) |
| 128 | + metadata[group_name].description = ( |
| 129 | + f"Measures CPU time overhead of submitting {order} kernels with longer execution times through different APIs." |
139 | 130 | )
|
140 |
| - |
141 | 131 | # CPU count variants
|
142 | 132 | cpu_count_group = f"{group_name}, CPU count"
|
143 |
| - metadata[cpu_count_group] = BenchmarkMetadata( |
144 |
| - type="group", |
145 |
| - description=f"Measures CPU time overhead of submitting {order} kernels with longer execution times through different APIs.", |
146 |
| - notes=base_metadata.notes, |
147 |
| - tags=base_metadata.tags, |
148 |
| - range_min=base_metadata.range_min, |
| 133 | + metadata[cpu_count_group] = copy.deepcopy(submit_kernel_metadata) |
| 134 | + metadata[cpu_count_group].description = ( |
| 135 | + f"Measures CPU instruction count overhead of submitting {order} kernels with longer execution times through different APIs." |
149 | 136 | )
|
150 | 137 |
|
151 | 138 | # Add metadata for all SubmitGraph group variants
|
152 |
| - base_metadata = metadata["SubmitGraph"] |
| 139 | + submit_graph_metadata = BenchmarkMetadata( |
| 140 | + type="group", tags=["submit", "micro", "SYCL", "UR", "L0", "graph"] |
| 141 | + ) |
153 | 142 | for order in ["in order", "out of order"]:
|
154 | 143 | for completion in ["", " with completion"]:
|
155 | 144 | for events in ["", " using events"]:
|
156 | 145 | for num_kernels in self.submit_graph_num_kernels:
|
157 |
| - group_name = f"SubmitGraph {order}{completion}{events}, {num_kernels} kernels" |
158 |
| - metadata[group_name] = BenchmarkMetadata( |
159 |
| - type="group", |
160 |
| - tags=base_metadata.tags, |
161 |
| - ) |
162 |
| - |
| 146 | + for host_tasks in ["", " use host tasks"]: |
| 147 | + group_name = f"SubmitGraph {order}{completion}{events}{host_tasks}, {num_kernels} kernels" |
| 148 | + metadata[group_name] = copy.deepcopy(submit_graph_metadata) |
| 149 | + # CPU count variants |
| 150 | + cpu_count_group = f"{group_name}, CPU count" |
| 151 | + metadata[cpu_count_group] = copy.deepcopy( |
| 152 | + submit_graph_metadata |
| 153 | + ) |
163 | 154 | return metadata
|
164 | 155 |
|
165 | 156 | def benchmarks(self) -> list[Benchmark]:
|
@@ -1088,6 +1079,22 @@ def bin_args(self, run_trace: TracingType = TracingType.NONE) -> list[str]:
|
1088 | 1079 | bin_args.append(f"--profilerType={self.profiler_type.value}")
|
1089 | 1080 | return bin_args
|
1090 | 1081 |
|
| 1082 | + def get_metadata(self) -> dict[str, BenchmarkMetadata]: |
| 1083 | + metadata_dict = super().get_metadata() |
| 1084 | + |
| 1085 | + # Create CPU count variant with modified display name and explicit_group |
| 1086 | + cpu_count_name = self.name() + " CPU count" |
| 1087 | + cpu_count_metadata = copy.deepcopy(metadata_dict[self.name()]) |
| 1088 | + cpu_count_display_name = self.display_name() + ", CPU count" |
| 1089 | + cpu_count_explicit_group = ( |
| 1090 | + self.explicit_group() + ", CPU count" if self.explicit_group() else "" |
| 1091 | + ) |
| 1092 | + cpu_count_metadata.display_name = cpu_count_display_name |
| 1093 | + cpu_count_metadata.explicit_group = cpu_count_explicit_group |
| 1094 | + metadata_dict[cpu_count_name] = cpu_count_metadata |
| 1095 | + |
| 1096 | + return metadata_dict |
| 1097 | + |
1091 | 1098 |
|
1092 | 1099 | class UllsEmptyKernel(ComputeBenchmark):
|
1093 | 1100 | def __init__(
|
|
0 commit comments