Skip to content

Commit eb728ff

Browse files
committed
Merge branch 'sycl' into msan-always-reserve-origin-mem
2 parents 4369958 + ac9dcf3 commit eb728ff

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

50 files changed

+2194
-677
lines changed
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
# This workflow checks for ready-to-merge PRs - if a PR is open, not a draft,
2+
# passed all checks, and has been approved, it will ping @intel/llvm-gatekeepers
3+
# if this group has not already been mentioned or if the last mention was more
4+
# than $days days ago.
5+
6+
name: Check ready-to-merge PRs
7+
8+
on:
9+
schedule:
10+
- cron: '0 * * * *' # every hour
11+
workflow_dispatch:
12+
13+
permissions: read-all
14+
15+
jobs:
16+
notify-ready-prs:
17+
permissions:
18+
pull-requests: write
19+
runs-on: ubuntu-latest
20+
steps:
21+
- name: Check
22+
env:
23+
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
24+
run: |
25+
# Number of days before repeating the gatekeepers ping
26+
days=3
27+
days_in_seconds=$((days*24*60*60))
28+
29+
# Function to ping gatekeepers and print debug info
30+
ping_gatekeepers() {
31+
pr_number=$1
32+
gh pr comment "$pr_number" --repo intel/llvm --body "@intel/llvm-gatekeepers please consider merging"
33+
echo "Pinged @intel/llvm-gatekeepers for https://github.com/intel/llvm/pull/$pr_number"
34+
}
35+
36+
# Get the list of suitable PRs
37+
prs=$(gh pr list --search "is:open review:approved draft:no status:success" --repo intel/llvm --json number --jq '.[].number')
38+
now=$(date -u +%s)
39+
for pr in $prs; do
40+
# Get the timestamp of the latest comment mentioning @intel/llvm-gatekeepers
41+
latest_ts=$(gh pr view $pr --repo intel/llvm --json comments \
42+
--jq '[.comments[] | select(.body | test("@intel/llvm-gatekeepers")) | .createdAt] | last')
43+
# If there is no previous mention, ping the gatekeepers
44+
if [[ -z "$latest_ts" ]]; then
45+
ping_gatekeepers "$pr"
46+
# If the latest mention is older than $days, ping the gatekeepers again
47+
else
48+
comment_time=$(date -u -d "$latest_ts" +%s)
49+
age=$((now - comment_time))
50+
if (( age >= days_in_seconds )); then
51+
ping_gatekeepers "$pr"
52+
fi
53+
fi
54+
done

buildbot/configure.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,11 @@ def do_configure(args, passthrough_args):
8282
if libclc_enabled:
8383
llvm_enable_projects += ";libclc"
8484

85+
# DeviceRTL uses -fuse-ld=lld, so enable lld.
86+
if args.offload:
87+
llvm_enable_projects += ";lld"
88+
sycl_enabled_backends.append("offload")
89+
8590
if args.cuda:
8691
llvm_targets_to_build += ";NVPTX"
8792
libclc_targets_to_build = libclc_nvidia_target_names
@@ -210,6 +215,12 @@ def do_configure(args, passthrough_args):
210215
"-DSYCL_ENABLE_MAJOR_RELEASE_PREVIEW_LIB={}".format(sycl_preview_lib),
211216
"-DBUG_REPORT_URL=https://github.com/intel/llvm/issues",
212217
]
218+
if args.offload:
219+
cmake_cmd.extend(
220+
[
221+
"-DUR_BUILD_ADAPTER_OFFLOAD=ON",
222+
]
223+
)
213224

214225
if libclc_enabled:
215226
cmake_cmd.extend(
@@ -340,6 +351,11 @@ def main():
340351
default="AMD",
341352
help="choose hardware platform for HIP backend",
342353
)
354+
parser.add_argument(
355+
"--offload",
356+
action="store_true",
357+
help="Enable UR liboffload adapter (experimental)",
358+
)
343359
parser.add_argument(
344360
"--level_zero_adapter_version",
345361
type=str,

devops/dependencies-igc-dev.json

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
{
22
"linux": {
33
"igc_dev": {
4-
"github_tag": "igc-dev-47c1f6e",
5-
"version": "47c1f6e",
6-
"updated_at": "2025-08-20T15:46:36Z",
7-
"url": "https://api.github.com/repos/intel/intel-graphics-compiler/actions/artifacts/3810490997/zip",
4+
"github_tag": "igc-dev-46629d9",
5+
"version": "46629d9",
6+
"updated_at": "2025-08-30T10:44:04Z",
7+
"url": "https://api.github.com/repos/intel/intel-graphics-compiler/actions/artifacts/3889106305/zip",
88
"root": "{DEPS_ROOT}/opencl/runtime/linux/oclgpu"
99
}
1010
}

devops/dependencies.json

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,9 @@
1919
"root": "{DEPS_ROOT}/opencl/runtime/linux/oclgpu"
2020
},
2121
"level_zero": {
22-
"github_tag": "v1.22.4",
23-
"version": "v1.22.4",
24-
"url": "https://github.com/oneapi-src/level-zero/releases/tag/v1.22.4",
22+
"github_tag": "v1.24.2",
23+
"version": "v1.24.2",
24+
"url": "https://github.com/oneapi-src/level-zero/releases/tag/v1.24.2",
2525
"root": "{DEPS_ROOT}/opencl/runtime/linux/oclgpu"
2626
},
2727
"tbb": {

devops/scripts/benchmarks/PERFORMANCE_TUNING.md

Lines changed: 11 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -6,38 +6,16 @@ For framework-specific information, see [README.md](README.md) and [CONTRIB.md](
66
## Table of Contents
77

88
- [Overview](#overview)
9-
- [System Configuration](#system-configuration)
109
- [CPU Tuning](#cpu-tuning)
1110
- [GPU Configuration](#gpu-configuration)
11+
- [Driver Version](#driver-version)
1212
- [Perf Configuration](#perf-configuration)
1313
- [Environment Variables](#environment-variables)
1414

1515
## Overview
1616

1717
Performance benchmarking requires a stable and optimized system environment to produce reliable and reproducible results. This guide covers essential system tuning steps for reducing run-to-run variance in benchmark results.
1818

19-
## System Configuration
20-
21-
### Kernel Parameters
22-
23-
Add the following to `/etc/default/grub` in `GRUB_CMDLINE_LINUX`:
24-
```
25-
# Disable CPU frequency scaling
26-
# intel_pstate=disable
27-
28-
# Isolate CPUs for benchmark workloads (example: reserve cores 2-7), preventing other processes
29-
# from using them.
30-
# isolcpus=2-7
31-
32-
GRUB_CMDLINE_LINUX="intel_pstate=disable isolcpus=2-7 <other_options>"
33-
```
34-
35-
Update GRUB and reboot:
36-
```bash
37-
sudo update-grub
38-
sudo reboot
39-
```
40-
4119
## CPU Tuning
4220

4321
### CPU Frequency Scaling
@@ -66,7 +44,7 @@ After=multi-user.target
6644
6745
[Service]
6846
Type=oneshot
69-
ExecStart=/usr/bin/cpupower frequency-set --governor performance && sysctl --system
47+
ExecStart=/usr/bin/cpupower frequency-set --governor performance
7048
7149
[Install]
7250
WantedBy=multi-user.target
@@ -99,7 +77,12 @@ cat /sys/class/drm/card1/device/vendor # Should be 0x8086 for Intel
9977
cat /sys/class/drm/card1/device/device # Device ID
10078
```
10179

102-
Verify the max frequency is set to the true max. For Arc B580, the maximum frequency is 2850 MHz. To see this value, run “cat /sys/class/drm/card1/device/tile0/gt0/freq0/max_freq”. If the above value is not equal to the max frequency, set it as such:
80+
Verify the max frequency is set to the true max. For Arc B580, the maximum frequency is 2850 MHz. To see this value, run:
81+
```bash
82+
cat /sys/class/drm/card1/device/tile0/gt0/freq0/max_freq
83+
```
84+
85+
If the above value is not equal to the max frequency, set it as such:
10386
```bash
10487
# Arc B580 (Battlemage)
10588
echo 2850 > /sys/class/drm/card1/device/tile0/gt0/freq0/max_freq
@@ -118,9 +101,9 @@ max_freq=$(cat /sys/class/drm/card1/gt_max_freq_mhz)
118101
echo $max_freq | sudo tee /sys/class/drm/card1/gt_min_freq_mhz
119102
```
120103

121-
The result can be verified using tools such as oneprof or unitrace to track frequency over time for some arbitrary benchmark (many iterations of a small problem size is recommended). The frequency should remain fixed assuming thermal throttling does not occur.
104+
The result can be verified using tools such as `oneprof` or `unitrace` to track frequency over time for some arbitrary benchmark (many iterations of a small problem size is recommended). The frequency should remain fixed assuming thermal throttling does not occur.
122105

123-
## Driver version
106+
## Driver Version
124107
Make sure you are using the latest driver (Ubuntu)
125108
```bash
126109
sudo apt update && sudo apt upgrade
@@ -145,6 +128,7 @@ Make the setting persistent across reboots by adding it to sysctl configuration:
145128
echo 'kernel.perf_event_paranoid = -1' | sudo tee -a /etc/sysctl.d/99-perf.conf
146129

147130
# Apply immediately
131+
sudo sysctl kernel.perf_event_paranoid=-1
148132
sudo sysctl -p
149133
```
150134

devops/scripts/benchmarks/benches/base.py

Lines changed: 61 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,24 @@
77
import shutil
88
import subprocess
99
from pathlib import Path
10+
from enum import Enum
1011
from utils.result import BenchmarkMetadata, BenchmarkTag, Result
1112
from options import options
1213
from utils.utils import download, run
1314
from abc import ABC, abstractmethod
1415
from utils.unitrace import get_unitrace
16+
from utils.flamegraph import get_flamegraph
1517
from utils.logger import log
1618

19+
20+
class TracingType(Enum):
21+
"""Enumeration of available tracing types."""
22+
23+
NONE = ""
24+
UNITRACE = "unitrace"
25+
FLAMEGRAPH = "flamegraph"
26+
27+
1728
benchmark_tags = [
1829
BenchmarkTag("SYCL", "Benchmark uses SYCL runtime"),
1930
BenchmarkTag("UR", "Benchmark uses Unified Runtime API"),
@@ -62,12 +73,17 @@ def enabled(self) -> bool:
6273
By default, it returns True, but can be overridden to disable a benchmark."""
6374
return True
6475

65-
def traceable(self) -> bool:
66-
"""Returns whether this benchmark should be traced by Unitrace.
67-
By default, it returns True, but can be overridden to disable tracing for a benchmark.
76+
def traceable(self, tracing_type: TracingType) -> bool:
77+
"""Returns whether this benchmark should be traced by the specified tracing method.
78+
By default, it returns True for all tracing types, but can be overridden
79+
to disable specific tracing methods for a benchmark.
6880
"""
6981
return True
7082

83+
def tracing_enabled(self, run_trace, force_trace, tr_type: TracingType):
84+
"""Returns whether tracing is enabled for the given type."""
85+
return (self.traceable(tr_type) or force_trace) and run_trace == tr_type
86+
7187
@abstractmethod
7288
def setup(self):
7389
pass
@@ -77,12 +93,18 @@ def teardown(self):
7793
pass
7894

7995
@abstractmethod
80-
def run(self, env_vars, run_unitrace: bool = False) -> list[Result]:
96+
def run(
97+
self,
98+
env_vars,
99+
run_trace: TracingType = TracingType.NONE,
100+
force_trace: bool = False,
101+
) -> list[Result]:
81102
"""Execute the benchmark with the given environment variables.
82103
83104
Args:
84105
env_vars: Environment variables to use when running the benchmark.
85-
run_unitrace: Whether to run benchmark under Unitrace.
106+
run_trace: The type of tracing to run (NONE, UNITRACE, or FLAMEGRAPH).
107+
force_trace: If True, ignore the traceable() method and force tracing.
86108
87109
Returns:
88110
A list of Result objects with the benchmark results.
@@ -111,8 +133,9 @@ def run_bench(
111133
ld_library=[],
112134
add_sycl=True,
113135
use_stdout=True,
114-
run_unitrace=False,
115-
extra_unitrace_opt=None,
136+
run_trace: TracingType = TracingType.NONE,
137+
extra_trace_opt=None,
138+
force_trace: bool = False,
116139
):
117140
env_vars = env_vars.copy()
118141
if options.ur is not None:
@@ -125,15 +148,26 @@ def run_bench(
125148
ld_libraries = options.extra_ld_libraries.copy()
126149
ld_libraries.extend(ld_library)
127150

128-
if self.traceable() and run_unitrace:
129-
if extra_unitrace_opt is None:
130-
extra_unitrace_opt = []
151+
unitrace_output = None
152+
if self.tracing_enabled(run_trace, force_trace, TracingType.UNITRACE):
153+
if extra_trace_opt is None:
154+
extra_trace_opt = []
131155
unitrace_output, command = get_unitrace().setup(
132-
self.name(), command, extra_unitrace_opt
156+
self.name(), command, extra_trace_opt
133157
)
134158
log.debug(f"Unitrace output: {unitrace_output}")
135159
log.debug(f"Unitrace command: {' '.join(command)}")
136160

161+
# flamegraph run
162+
163+
perf_data_file = None
164+
if self.tracing_enabled(run_trace, force_trace, TracingType.FLAMEGRAPH):
165+
perf_data_file, command = get_flamegraph().setup(
166+
self.name(), self.get_suite_name(), command
167+
)
168+
log.debug(f"FlameGraph perf data: {perf_data_file}")
169+
log.debug(f"FlameGraph command: {' '.join(command)}")
170+
137171
try:
138172
result = run(
139173
command=command,
@@ -143,13 +177,27 @@ def run_bench(
143177
ld_library=ld_libraries,
144178
)
145179
except subprocess.CalledProcessError:
146-
if run_unitrace:
180+
if run_trace == TracingType.UNITRACE and unitrace_output:
147181
get_unitrace().cleanup(options.benchmark_cwd, unitrace_output)
182+
if run_trace == TracingType.FLAMEGRAPH and perf_data_file:
183+
get_flamegraph().cleanup(perf_data_file)
148184
raise
149185

150-
if self.traceable() and run_unitrace:
186+
if (
187+
self.tracing_enabled(run_trace, force_trace, TracingType.UNITRACE)
188+
and unitrace_output
189+
):
151190
get_unitrace().handle_output(unitrace_output)
152191

192+
if (
193+
self.tracing_enabled(run_trace, force_trace, TracingType.FLAMEGRAPH)
194+
and perf_data_file
195+
):
196+
svg_file = get_flamegraph().handle_output(
197+
self.name(), perf_data_file, self.get_suite_name()
198+
)
199+
log.info(f"FlameGraph generated: {svg_file}")
200+
153201
if use_stdout:
154202
return result.stdout.decode()
155203
else:

devops/scripts/benchmarks/benches/benchdnn.py

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
from pathlib import Path
88

9-
from .base import Suite, Benchmark
9+
from .base import Suite, Benchmark, TracingType
1010
from options import options
1111
from utils.utils import git_clone, run, create_build_path
1212
from utils.result import Result
@@ -132,7 +132,18 @@ def setup(self):
132132
if not self.bench_bin.exists():
133133
raise FileNotFoundError(f"Benchmark binary not found: {self.bench_bin}")
134134

135-
def run(self, env_vars, run_unitrace: bool = False) -> list[Result]:
135+
def run(
136+
self,
137+
env_vars,
138+
run_trace: TracingType = TracingType.NONE,
139+
force_trace: bool = False,
140+
) -> list[Result]:
141+
# Determine extra trace options based on tracing type
142+
if run_trace == TracingType.UNITRACE:
143+
extra_trace_opt = ["--chrome-dnn-logging"]
144+
else:
145+
extra_trace_opt = None
146+
136147
command = [
137148
str(self.bench_bin),
138149
*self.bench_args.split(),
@@ -151,8 +162,9 @@ def run(self, env_vars, run_unitrace: bool = False) -> list[Result]:
151162
add_sycl=True,
152163
ld_library=ld_library,
153164
use_stdout=True,
154-
run_unitrace=run_unitrace,
155-
extra_unitrace_opt=["--chrome-dnn-logging"],
165+
run_trace=run_trace,
166+
extra_trace_opt=extra_trace_opt,
167+
force_trace=force_trace,
156168
)
157169
result_value = self._extract_time(output)
158170

0 commit comments

Comments
 (0)