Skip to content

Commit 4585816

Browse files
committed
Merge remote-tracking branch 'origin/sycl' into pool_fix_with_e2e_test
2 parents db67768 + 72b85b6 commit 4585816

File tree

73 files changed

+2340
-703
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

73 files changed

+2340
-703
lines changed

clang/lib/Driver/ToolChains/SYCL.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -68,8 +68,7 @@ const char *SYCLInstallationDetector::findLibspirvPath(
6868
SmallString<128> LibraryPath(Path);
6969
llvm::sys::path::append(LibraryPath, a, b, c, Basename);
7070

71-
if (Args.hasArgNoClaim(options::OPT__HASH_HASH_HASH) ||
72-
llvm::sys::fs::exists(LibraryPath))
71+
if (llvm::sys::fs::exists(LibraryPath))
7372
return Args.MakeArgString(LibraryPath);
7473

7574
return nullptr;

clang/test/Driver/sycl-libspirv-toolchain.cpp

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -34,17 +34,16 @@
3434
// RUN: | FileCheck %s -DINSTALL_DIR=%{install_dir} -DRESOURCE_DIR=%{resource_dir} --check-prefixes=CHECK-DIR
3535
// CHECK-DIR: "-cc1"{{.*}} "-fsycl-is-device"{{.*}} "-mlink-builtin-bitcode" "[[INSTALL_DIR]]{{.*[\\/]}}remangled-{{.*}}.libspirv-nvptx64-nvidia-cuda.bc"
3636
//
37-
// The `-###` option disables file existence checks
37+
// If libspirv path doesn't exist, error is reported.
3838
// DEFINE: %{nonexistent_dir} = %/S/Inputs/SYCL/does_not_exist/lib/clang/resource_dir
39-
// RUN: %clang -### -ccc-install-dir %{nonexistent_dir} -fsycl -fsycl-targets=nvptx64-nvidia-cuda -nocudalib %s 2>&1 \
39+
// RUN: not %clang -### -ccc-install-dir %{nonexistent_dir} -fsycl -fsycl-targets=nvptx64-nvidia-cuda -nocudalib %s 2>&1 \
4040
// RUN: | FileCheck %s -DDIR=%{nonexistent_dir} --check-prefixes=CHECK-HHH-NONEXISTENT
41-
// CHECK-HHH-NONEXISTENT: "-cc1"{{.*}} "-fsycl-is-device"{{.*}} "-mlink-builtin-bitcode" "[[DIR]]{{.*[\\/]}}remangled-{{.*}}.libspirv-nvptx64-nvidia-cuda.bc"
41+
// CHECK-HHH-NONEXISTENT: error: cannot find 'remangled-{{.*}}.libspirv-nvptx64-nvidia-cuda.bc'; provide path to libspirv library via '-fsycl-libspirv-path', or pass '-fno-sycl-libspirv' to build without linking with libspirv
4242
//
43-
// RUN: %clang -### -ccc-install-dir %{nonexistent_dir} -fsycl -fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend --offload-arch=gfx908 -nogpulib %s 2>&1 \
43+
// RUN: not %clang -### -ccc-install-dir %{nonexistent_dir} -fsycl -fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend --offload-arch=gfx908 -nogpulib %s 2>&1 \
4444
// RUN: | FileCheck %s -DDIR=%{nonexistent_dir} --check-prefixes=CHECK-AMDGCN-HHH-NONEXISTENT
45-
// CHECK-AMDGCN-HHH-NONEXISTENT: "-cc1"{{.*}} "-fsycl-is-device"{{.*}} "-mlink-builtin-bitcode" "[[DIR]]{{.*[\\/]}}remangled-{{.*}}.libspirv-amdgcn-amd-amdhsa.bc"
45+
// CHECK-AMDGCN-HHH-NONEXISTENT: clang: error: cannot find 'remangled-{{.*}}.libspirv-amdgcn-amd-amdhsa.bc'; provide path to libspirv library via '-fsycl-libspirv-path', or pass '-fno-sycl-libspirv' to build without linking with libspirv
4646
//
47-
// `-fdriver-only` has no such special handling, so it will not find the file
4847
// RUN: not %clang -fdriver-only -ccc-install-dir %{nonexistent_dir} -fsycl -fsycl-targets=nvptx64-nvidia-cuda -nocudalib %s 2>&1 \
4948
// RUN: | FileCheck %s -DDIR=%{nonexistent_dir} --check-prefixes=CHECK-DO-NONEXISTENT
5049
// CHECK-DO-NONEXISTENT: error: cannot find 'remangled-{{.*}}.libspirv-nvptx64-nvidia-cuda.bc'; provide path to libspirv library via '-fsycl-libspirv-path', or pass '-fno-sycl-libspirv' to build without linking with libspirv
Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
// RUN: %clangxx -fsycl-device-only -fsycl-targets=native_cpu %s -### 2>&1 | FileCheck %s
2-
// RUN: %clangxx -fsycl-device-only -fsycl-targets=native_cpu --target=aarch64-unknown-linux-gnu %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-AARCH64
1+
// RUN: %clangxx -fsycl-device-only -fsycl-targets=native_cpu -fno-sycl-libspirv %s -### 2>&1 | FileCheck %s
2+
// RUN: %clangxx -fsycl-device-only -fsycl-targets=native_cpu --target=aarch64-unknown-linux-gnu -fno-sycl-libspirv %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-AARCH64
33

44

55
// checks that the host triple is native_cpu, the device triple is set, and that the sycl-native-cpu LLVM option is set
@@ -8,29 +8,29 @@
88
// checks that the target triples are set correctly when the target is set explicitly
99
// CHECK-AARCH64: clang{{.*}}"-triple" "native_cpu"{{.*}}"-aux-triple" "aarch64-unknown-linux-gnu" {{.*}}"-D" "__SYCL_NATIVE_CPU__"
1010

11-
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fsycl -fsycl-targets=native_cpu -g %s 2>&1 | FileCheck -check-prefix=CHECK-LINUX %s
11+
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fsycl -fsycl-targets=native_cpu -g -fno-sycl-libspirv %s 2>&1 | FileCheck -check-prefix=CHECK-LINUX %s
1212
// CHECK-LINUX: {{.*}}"-fsycl-is-device"{{.*}}"-dwarf-version=[[DVERSION:.*]]" "-debugger-tuning=gdb"
1313
// CHECK-LINUX-DAG: {{.*}}"-fsycl-is-host"{{.*}}"-dwarf-version=[[DVERSION]]" "-debugger-tuning=gdb"
1414
// CHECK-LINUX-NOT: codeview
1515

16-
// RUN: %clang -### --target=x86_64-windows-msvc -fsycl -fsycl-targets=native_cpu -g %s 2>&1 | FileCheck -check-prefix=CHECK-WIN %s
16+
// RUN: %clang -### --target=x86_64-windows-msvc -fsycl -fsycl-targets=native_cpu -g -fno-sycl-libspirv %s 2>&1 | FileCheck -check-prefix=CHECK-WIN %s
1717
// CHECK-WIN: {{.*}}"-fsycl-is-device"{{.*}}"-gcodeview"
1818
// CHECK-WIN-DAG: {{.*}}"-fsycl-is-host"{{.*}}"-gcodeview"
1919
// CHECK-WIN-NOT: dwarf
2020

2121
// checks that -sycl-opt is not enabled by default on NativeCPU so that the full llvm optimization is enabled
2222
// Also check that we pass the expected backend options.
23-
// RUN: %clang -fsycl -fsycl-targets=native_cpu --target=aarch64-unknown-linux-gnu -march=armv9.4-a -### %s 2>&1 | FileCheck -check-prefix=CHECK-OPTS %s
23+
// RUN: %clang -fsycl -fsycl-targets=native_cpu --target=aarch64-unknown-linux-gnu -march=armv9.4-a -fno-sycl-libspirv -### %s 2>&1 | FileCheck -check-prefix=CHECK-OPTS %s
2424
// CHECK-OPTS: clang{{.*}}"-triple" "native_cpu"{{.*}}"-aux-triple" "[[TRIPLE:[^"]*]]"
2525
// CHECK-OPTS: clang{{.*}}"-triple" "[[TRIPLE]]"{{.*}}"-fsycl-is-device"
2626
// CHECK-OPTS-NOT: -sycl-opt
2727
// CHECK-OPTS-SAME: "-Wno-override-module" "-mllvm" "-sycl-native-cpu-backend"
2828
// CHECK-OPTS-SAME: "-aux-target-feature" "+v9.4a"
2929

30-
// RUN: %clangxx -fsycl -fsycl-targets=spir64 %s -### 2>&1 | FileCheck -check-prefix=CHECK-NONATIVECPU %s
30+
// RUN: %clangxx -fsycl -fsycl-targets=spir64 -fno-sycl-libspirv %s -### 2>&1 | FileCheck -check-prefix=CHECK-NONATIVECPU %s
3131
// CHECK-NONATIVECPU-NOT: "-D" "__SYCL_NATIVE_CPU__"
3232

3333
// Checking that coverage testing options are accepted by native_cpu, and that device and host compilation invocations receive the same options
34-
// RUN: %clangxx -fsycl -fsycl-targets=native_cpu -Werror -fno-profile-instr-generate -fprofile-instr-generate -fno-coverage-mapping -fcoverage-mapping -### %s 2>&1 | FileCheck %s --check-prefix=CHECK_COV_INVO
34+
// RUN: %clangxx -fsycl -fsycl-targets=native_cpu -Werror -fno-profile-instr-generate -fprofile-instr-generate -fno-coverage-mapping -fcoverage-mapping -fno-sycl-libspirv -Wno-unsafe-libspirv-not-linked -### %s 2>&1 | FileCheck %s --check-prefix=CHECK_COV_INVO
3535
// CHECK_COV_INVO:{{.*}}clang{{.*}}"-fsycl-is-device"{{.*}} "-D" "__SYCL_NATIVE_CPU__"{{.*}}"-fprofile-instrument=clang"{{.*}}"-fcoverage-mapping" "-fcoverage-compilation-dir={{.*}}"
3636
// CHECK_COV_INVO:{{.*}}clang{{.*}}"-fsycl-is-host"{{.*}}"-fprofile-instrument=clang"{{.*}}"-fcoverage-mapping" "-fcoverage-compilation-dir={{.*}}"

devops/dependencies.json

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,9 @@
1919
"root": "{DEPS_ROOT}/opencl/runtime/linux/oclgpu"
2020
},
2121
"level_zero": {
22-
"github_tag": "v1.22.4",
23-
"version": "v1.22.4",
24-
"url": "https://github.com/oneapi-src/level-zero/releases/tag/v1.22.4",
22+
"github_tag": "v1.24.2",
23+
"version": "v1.24.2",
24+
"url": "https://github.com/oneapi-src/level-zero/releases/tag/v1.24.2",
2525
"root": "{DEPS_ROOT}/opencl/runtime/linux/oclgpu"
2626
},
2727
"tbb": {

devops/scripts/benchmarks/PERFORMANCE_TUNING.md

Lines changed: 11 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -6,38 +6,16 @@ For framework-specific information, see [README.md](README.md) and [CONTRIB.md](
66
## Table of Contents
77

88
- [Overview](#overview)
9-
- [System Configuration](#system-configuration)
109
- [CPU Tuning](#cpu-tuning)
1110
- [GPU Configuration](#gpu-configuration)
11+
- [Driver Version](#driver-version)
1212
- [Perf Configuration](#perf-configuration)
1313
- [Environment Variables](#environment-variables)
1414

1515
## Overview
1616

1717
Performance benchmarking requires a stable and optimized system environment to produce reliable and reproducible results. This guide covers essential system tuning steps for reducing run-to-run variance in benchmark results.
1818

19-
## System Configuration
20-
21-
### Kernel Parameters
22-
23-
Add the following to `/etc/default/grub` in `GRUB_CMDLINE_LINUX`:
24-
```
25-
# Disable CPU frequency scaling
26-
# intel_pstate=disable
27-
28-
# Isolate CPUs for benchmark workloads (example: reserve cores 2-7), preventing other processes
29-
# from using them.
30-
# isolcpus=2-7
31-
32-
GRUB_CMDLINE_LINUX="intel_pstate=disable isolcpus=2-7 <other_options>"
33-
```
34-
35-
Update GRUB and reboot:
36-
```bash
37-
sudo update-grub
38-
sudo reboot
39-
```
40-
4119
## CPU Tuning
4220

4321
### CPU Frequency Scaling
@@ -66,7 +44,7 @@ After=multi-user.target
6644
6745
[Service]
6846
Type=oneshot
69-
ExecStart=/usr/bin/cpupower frequency-set --governor performance && sysctl --system
47+
ExecStart=/usr/bin/cpupower frequency-set --governor performance
7048
7149
[Install]
7250
WantedBy=multi-user.target
@@ -99,7 +77,12 @@ cat /sys/class/drm/card1/device/vendor # Should be 0x8086 for Intel
9977
cat /sys/class/drm/card1/device/device # Device ID
10078
```
10179

102-
Verify the max frequency is set to the true max. For Arc B580, the maximum frequency is 2850 MHz. To see this value, run “cat /sys/class/drm/card1/device/tile0/gt0/freq0/max_freq”. If the above value is not equal to the max frequency, set it as such:
80+
Verify the max frequency is set to the true max. For Arc B580, the maximum frequency is 2850 MHz. To see this value, run:
81+
```bash
82+
cat /sys/class/drm/card1/device/tile0/gt0/freq0/max_freq
83+
```
84+
85+
If the above value is not equal to the max frequency, set it as such:
10386
```bash
10487
# Arc B580 (Battlemage)
10588
echo 2850 > /sys/class/drm/card1/device/tile0/gt0/freq0/max_freq
@@ -118,9 +101,9 @@ max_freq=$(cat /sys/class/drm/card1/gt_max_freq_mhz)
118101
echo $max_freq | sudo tee /sys/class/drm/card1/gt_min_freq_mhz
119102
```
120103

121-
The result can be verified using tools such as oneprof or unitrace to track frequency over time for some arbitrary benchmark (many iterations of a small problem size is recommended). The frequency should remain fixed assuming thermal throttling does not occur.
104+
The result can be verified using tools such as `oneprof` or `unitrace` to track frequency over time for some arbitrary benchmark (many iterations of a small problem size is recommended). The frequency should remain fixed assuming thermal throttling does not occur.
122105

123-
## Driver version
106+
## Driver Version
124107
Make sure you are using the latest driver (Ubuntu)
125108
```bash
126109
sudo apt update && sudo apt upgrade
@@ -145,6 +128,7 @@ Make the setting persistent across reboots by adding it to sysctl configuration:
145128
echo 'kernel.perf_event_paranoid = -1' | sudo tee -a /etc/sysctl.d/99-perf.conf
146129

147130
# Apply immediately
131+
sudo sysctl kernel.perf_event_paranoid=-1
148132
sudo sysctl -p
149133
```
150134

devops/scripts/benchmarks/benches/base.py

Lines changed: 61 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,24 @@
77
import shutil
88
import subprocess
99
from pathlib import Path
10+
from enum import Enum
1011
from utils.result import BenchmarkMetadata, BenchmarkTag, Result
1112
from options import options
1213
from utils.utils import download, run
1314
from abc import ABC, abstractmethod
1415
from utils.unitrace import get_unitrace
16+
from utils.flamegraph import get_flamegraph
1517
from utils.logger import log
1618

19+
20+
class TracingType(Enum):
21+
"""Enumeration of available tracing types."""
22+
23+
NONE = ""
24+
UNITRACE = "unitrace"
25+
FLAMEGRAPH = "flamegraph"
26+
27+
1728
benchmark_tags = [
1829
BenchmarkTag("SYCL", "Benchmark uses SYCL runtime"),
1930
BenchmarkTag("UR", "Benchmark uses Unified Runtime API"),
@@ -62,12 +73,17 @@ def enabled(self) -> bool:
6273
By default, it returns True, but can be overridden to disable a benchmark."""
6374
return True
6475

65-
def traceable(self) -> bool:
66-
"""Returns whether this benchmark should be traced by Unitrace.
67-
By default, it returns True, but can be overridden to disable tracing for a benchmark.
76+
def traceable(self, tracing_type: TracingType) -> bool:
77+
"""Returns whether this benchmark should be traced by the specified tracing method.
78+
By default, it returns True for all tracing types, but can be overridden
79+
to disable specific tracing methods for a benchmark.
6880
"""
6981
return True
7082

83+
def tracing_enabled(self, run_trace, force_trace, tr_type: TracingType):
84+
"""Returns whether tracing is enabled for the given type."""
85+
return (self.traceable(tr_type) or force_trace) and run_trace == tr_type
86+
7187
@abstractmethod
7288
def setup(self):
7389
pass
@@ -77,12 +93,18 @@ def teardown(self):
7793
pass
7894

7995
@abstractmethod
80-
def run(self, env_vars, run_unitrace: bool = False) -> list[Result]:
96+
def run(
97+
self,
98+
env_vars,
99+
run_trace: TracingType = TracingType.NONE,
100+
force_trace: bool = False,
101+
) -> list[Result]:
81102
"""Execute the benchmark with the given environment variables.
82103
83104
Args:
84105
env_vars: Environment variables to use when running the benchmark.
85-
run_unitrace: Whether to run benchmark under Unitrace.
106+
run_trace: The type of tracing to run (NONE, UNITRACE, or FLAMEGRAPH).
107+
force_trace: If True, ignore the traceable() method and force tracing.
86108
87109
Returns:
88110
A list of Result objects with the benchmark results.
@@ -111,8 +133,9 @@ def run_bench(
111133
ld_library=[],
112134
add_sycl=True,
113135
use_stdout=True,
114-
run_unitrace=False,
115-
extra_unitrace_opt=None,
136+
run_trace: TracingType = TracingType.NONE,
137+
extra_trace_opt=None,
138+
force_trace: bool = False,
116139
):
117140
env_vars = env_vars.copy()
118141
if options.ur is not None:
@@ -125,15 +148,26 @@ def run_bench(
125148
ld_libraries = options.extra_ld_libraries.copy()
126149
ld_libraries.extend(ld_library)
127150

128-
if self.traceable() and run_unitrace:
129-
if extra_unitrace_opt is None:
130-
extra_unitrace_opt = []
151+
unitrace_output = None
152+
if self.tracing_enabled(run_trace, force_trace, TracingType.UNITRACE):
153+
if extra_trace_opt is None:
154+
extra_trace_opt = []
131155
unitrace_output, command = get_unitrace().setup(
132-
self.name(), command, extra_unitrace_opt
156+
self.name(), command, extra_trace_opt
133157
)
134158
log.debug(f"Unitrace output: {unitrace_output}")
135159
log.debug(f"Unitrace command: {' '.join(command)}")
136160

161+
# flamegraph run
162+
163+
perf_data_file = None
164+
if self.tracing_enabled(run_trace, force_trace, TracingType.FLAMEGRAPH):
165+
perf_data_file, command = get_flamegraph().setup(
166+
self.name(), self.get_suite_name(), command
167+
)
168+
log.debug(f"FlameGraph perf data: {perf_data_file}")
169+
log.debug(f"FlameGraph command: {' '.join(command)}")
170+
137171
try:
138172
result = run(
139173
command=command,
@@ -143,13 +177,27 @@ def run_bench(
143177
ld_library=ld_libraries,
144178
)
145179
except subprocess.CalledProcessError:
146-
if run_unitrace:
180+
if run_trace == TracingType.UNITRACE and unitrace_output:
147181
get_unitrace().cleanup(options.benchmark_cwd, unitrace_output)
182+
if run_trace == TracingType.FLAMEGRAPH and perf_data_file:
183+
get_flamegraph().cleanup(perf_data_file)
148184
raise
149185

150-
if self.traceable() and run_unitrace:
186+
if (
187+
self.tracing_enabled(run_trace, force_trace, TracingType.UNITRACE)
188+
and unitrace_output
189+
):
151190
get_unitrace().handle_output(unitrace_output)
152191

192+
if (
193+
self.tracing_enabled(run_trace, force_trace, TracingType.FLAMEGRAPH)
194+
and perf_data_file
195+
):
196+
svg_file = get_flamegraph().handle_output(
197+
self.name(), perf_data_file, self.get_suite_name()
198+
)
199+
log.info(f"FlameGraph generated: {svg_file}")
200+
153201
if use_stdout:
154202
return result.stdout.decode()
155203
else:

devops/scripts/benchmarks/benches/benchdnn.py

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
from pathlib import Path
88

9-
from .base import Suite, Benchmark
9+
from .base import Suite, Benchmark, TracingType
1010
from options import options
1111
from utils.utils import git_clone, run, create_build_path
1212
from utils.result import Result
@@ -132,7 +132,18 @@ def setup(self):
132132
if not self.bench_bin.exists():
133133
raise FileNotFoundError(f"Benchmark binary not found: {self.bench_bin}")
134134

135-
def run(self, env_vars, run_unitrace: bool = False) -> list[Result]:
135+
def run(
136+
self,
137+
env_vars,
138+
run_trace: TracingType = TracingType.NONE,
139+
force_trace: bool = False,
140+
) -> list[Result]:
141+
# Determine extra trace options based on tracing type
142+
if run_trace == TracingType.UNITRACE:
143+
extra_trace_opt = ["--chrome-dnn-logging"]
144+
else:
145+
extra_trace_opt = None
146+
136147
command = [
137148
str(self.bench_bin),
138149
*self.bench_args.split(),
@@ -151,8 +162,9 @@ def run(self, env_vars, run_unitrace: bool = False) -> list[Result]:
151162
add_sycl=True,
152163
ld_library=ld_library,
153164
use_stdout=True,
154-
run_unitrace=run_unitrace,
155-
extra_unitrace_opt=["--chrome-dnn-logging"],
165+
run_trace=run_trace,
166+
extra_trace_opt=extra_trace_opt,
167+
force_trace=force_trace,
156168
)
157169
result_value = self._extract_time(output)
158170

0 commit comments

Comments
 (0)