intel
diff --git a/‎.github/workflows/sycl-check-ready-to-merge-prs.yml‎
Lines changed: 54 additions & 0 deletions b/‎.github/workflows/sycl-check-ready-to-merge-prs.yml‎
Lines changed: 54 additions & 0 deletions
diff --git a/‎buildbot/configure.py‎
Lines changed: 16 additions & 0 deletions b/‎buildbot/configure.py‎
Lines changed: 16 additions & 0 deletions
diff --git a/‎devops/dependencies-igc-dev.json‎
Lines changed: 4 additions & 4 deletions b/‎devops/dependencies-igc-dev.json‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎devops/dependencies.json‎
Lines changed: 3 additions & 3 deletions b/‎devops/dependencies.json‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎devops/scripts/benchmarks/PERFORMANCE_TUNING.md‎
Lines changed: 11 additions & 27 deletions b/‎devops/scripts/benchmarks/PERFORMANCE_TUNING.md‎
Lines changed: 11 additions & 27 deletions
diff --git a/‎devops/scripts/benchmarks/benches/base.py‎
Lines changed: 61 additions & 13 deletions b/‎devops/scripts/benchmarks/benches/base.py‎
Lines changed: 61 additions & 13 deletions
diff --git a/‎devops/scripts/benchmarks/benches/benchdnn.py‎
Lines changed: 16 additions & 4 deletions b/‎devops/scripts/benchmarks/benches/benchdnn.py‎
Lines changed: 16 additions & 4 deletions
@@ -0,0 +1,54 @@
+# This workflow checks for ready-to-merge PRs - if a PR is open, not a draft,
+# passed all checks, and has been approved, it will ping @intel/llvm-gatekeepers
+# if this group has not already been mentioned or if the last mention was more
+# than $days days ago.
+
+name: Check ready-to-merge PRs
+
+on:
+  schedule:
+    - cron: '0 * * * *' # every hour
+  workflow_dispatch:
+
+permissions: read-all
+
+jobs:
+  notify-ready-prs:
+    permissions:
+      pull-requests: write
+    runs-on: ubuntu-latest
+    steps:
+      - name: Check
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          # Number of days before repeating the gatekeepers ping
+          days=3
+          days_in_seconds=$((days*24*60*60))
+
+          # Function to ping gatekeepers and print debug info
+          ping_gatekeepers() {
+            pr_number=$1
+            gh pr comment "$pr_number" --repo intel/llvm --body "@intel/llvm-gatekeepers please consider merging"
+            echo "Pinged @intel/llvm-gatekeepers for https://github.com/intel/llvm/pull/$pr_number"
+          }
+
+          # Get the list of suitable PRs
+          prs=$(gh pr list --search "is:open review:approved draft:no status:success" --repo intel/llvm --json number --jq '.[].number')
+          now=$(date -u +%s)
+          for pr in $prs; do
+            # Get the timestamp of the latest comment mentioning @intel/llvm-gatekeepers
+            latest_ts=$(gh pr view $pr --repo intel/llvm --json comments \
+              --jq '[.comments[] | select(.body | test("@intel/llvm-gatekeepers")) | .createdAt] | last')
+            # If there is no previous mention, ping the gatekeepers
+            if [[ -z "$latest_ts" ]]; then
+              ping_gatekeepers "$pr"
+            # If the latest mention is older than $days, ping the gatekeepers again
+            else
+              comment_time=$(date -u -d "$latest_ts" +%s)
+              age=$((now - comment_time))
+              if (( age >= days_in_seconds )); then
+                ping_gatekeepers "$pr"
+              fi
+            fi
+          done
@@ -82,6 +82,11 @@ def do_configure(args, passthrough_args):
     if libclc_enabled:
         llvm_enable_projects += ";libclc"
 
+    # DeviceRTL uses -fuse-ld=lld, so enable lld.
+    if args.offload:
+        llvm_enable_projects += ";lld"
+        sycl_enabled_backends.append("offload")
+
     if args.cuda:
         llvm_targets_to_build += ";NVPTX"
         libclc_targets_to_build = libclc_nvidia_target_names
@@ -210,6 +215,12 @@ def do_configure(args, passthrough_args):
         "-DSYCL_ENABLE_MAJOR_RELEASE_PREVIEW_LIB={}".format(sycl_preview_lib),
         "-DBUG_REPORT_URL=https://github.com/intel/llvm/issues",
     ]
+    if args.offload:
+        cmake_cmd.extend(
+            [
+                "-DUR_BUILD_ADAPTER_OFFLOAD=ON",
+            ]
+        )
 
     if libclc_enabled:
         cmake_cmd.extend(
@@ -340,6 +351,11 @@ def main():
         default="AMD",
         help="choose hardware platform for HIP backend",
     )
+    parser.add_argument(
+        "--offload",
+        action="store_true",
+        help="Enable UR liboffload adapter (experimental)",
+    )
     parser.add_argument(
         "--level_zero_adapter_version",
         type=str,
 
@@ -1,10 +1,10 @@
 {
   "linux": {
     "igc_dev": {
-      "github_tag": "igc-dev-47c1f6e",
-      "version": "47c1f6e",
-      "updated_at": "2025-08-20T15:46:36Z",
-      "url": "https://api.github.com/repos/intel/intel-graphics-compiler/actions/artifacts/3810490997/zip",
+      "github_tag": "igc-dev-46629d9",
+      "version": "46629d9",
+      "updated_at": "2025-08-30T10:44:04Z",
+      "url": "https://api.github.com/repos/intel/intel-graphics-compiler/actions/artifacts/3889106305/zip",
       "root": "{DEPS_ROOT}/opencl/runtime/linux/oclgpu"
     }
   }
 
@@ -19,9 +19,9 @@
       "root": "{DEPS_ROOT}/opencl/runtime/linux/oclgpu"
     },
     "level_zero": {
-      "github_tag": "v1.22.4",
-      "version": "v1.22.4",
-      "url": "https://github.com/oneapi-src/level-zero/releases/tag/v1.22.4",
+      "github_tag": "v1.24.2",
+      "version": "v1.24.2",
+      "url": "https://github.com/oneapi-src/level-zero/releases/tag/v1.24.2",
       "root": "{DEPS_ROOT}/opencl/runtime/linux/oclgpu"
     },
     "tbb": {
 
@@ -6,38 +6,16 @@ For framework-specific information, see [README.md](README.md) and [CONTRIB.md](
 ## Table of Contents
 
 - [Overview](#overview)
-- [System Configuration](#system-configuration)
 - [CPU Tuning](#cpu-tuning)
 - [GPU Configuration](#gpu-configuration)
+- [Driver Version](#driver-version)
 - [Perf Configuration](#perf-configuration)
 - [Environment Variables](#environment-variables)
 
 ## Overview
 
 Performance benchmarking requires a stable and optimized system environment to produce reliable and reproducible results. This guide covers essential system tuning steps for reducing run-to-run variance in benchmark results.
 
-## System Configuration
-
-### Kernel Parameters
-
-Add the following to `/etc/default/grub` in `GRUB_CMDLINE_LINUX`:
-```
-# Disable CPU frequency scaling
-# intel_pstate=disable
-
-# Isolate CPUs for benchmark workloads (example: reserve cores 2-7), preventing other processes
-# from using them.
-# isolcpus=2-7
-
-GRUB_CMDLINE_LINUX="intel_pstate=disable isolcpus=2-7 <other_options>"
-```
-
-Update GRUB and reboot:
-```bash
-sudo update-grub
-sudo reboot
-```
-
 ## CPU Tuning
 
 ### CPU Frequency Scaling
@@ -66,7 +44,7 @@ After=multi-user.target
 
 [Service]
 Type=oneshot
-ExecStart=/usr/bin/cpupower frequency-set --governor performance && sysctl --system
+ExecStart=/usr/bin/cpupower frequency-set --governor performance
 
 [Install]
 WantedBy=multi-user.target
@@ -99,7 +77,12 @@ cat /sys/class/drm/card1/device/vendor  # Should be 0x8086 for Intel
 cat /sys/class/drm/card1/device/device  # Device ID
 ```
 
-Verify the max frequency is set to the true max. For Arc B580, the maximum frequency is 2850 MHz. To see this value, run “cat /sys/class/drm/card1/device/tile0/gt0/freq0/max_freq”. If the above value is not equal to the max frequency, set it as such:
+Verify the max frequency is set to the true max. For Arc B580, the maximum frequency is 2850 MHz. To see this value, run:
+```bash
+cat /sys/class/drm/card1/device/tile0/gt0/freq0/max_freq
+```
+
+If the above value is not equal to the max frequency, set it as such:
 ```bash
 # Arc B580 (Battlemage)
 echo 2850 > /sys/class/drm/card1/device/tile0/gt0/freq0/max_freq
@@ -118,9 +101,9 @@ max_freq=$(cat /sys/class/drm/card1/gt_max_freq_mhz)
 echo $max_freq | sudo tee /sys/class/drm/card1/gt_min_freq_mhz
 ```
 
-The result can be verified using tools such as oneprof or unitrace to track frequency over time for some arbitrary benchmark (many iterations of a small problem size is recommended). The frequency should remain fixed assuming thermal throttling does not occur.
+The result can be verified using tools such as `oneprof` or `unitrace` to track frequency over time for some arbitrary benchmark (many iterations of a small problem size is recommended). The frequency should remain fixed assuming thermal throttling does not occur.
 
-## Driver version
+## Driver Version
 Make sure you are using the latest driver (Ubuntu)
 ```bash
 sudo apt update && sudo apt upgrade
@@ -145,6 +128,7 @@ Make the setting persistent across reboots by adding it to sysctl configuration:
 echo 'kernel.perf_event_paranoid = -1' | sudo tee -a /etc/sysctl.d/99-perf.conf
 
 # Apply immediately
+sudo sysctl kernel.perf_event_paranoid=-1
 sudo sysctl -p
 ```
 
 
@@ -7,13 +7,24 @@
 import shutil
 import subprocess
 from pathlib import Path
+from enum import Enum
 from utils.result import BenchmarkMetadata, BenchmarkTag, Result
 from options import options
 from utils.utils import download, run
 from abc import ABC, abstractmethod
 from utils.unitrace import get_unitrace
+from utils.flamegraph import get_flamegraph
 from utils.logger import log
 
+
+class TracingType(Enum):
+    """Enumeration of available tracing types."""
+
+    NONE = ""
+    UNITRACE = "unitrace"
+    FLAMEGRAPH = "flamegraph"
+
+
 benchmark_tags = [
     BenchmarkTag("SYCL", "Benchmark uses SYCL runtime"),
     BenchmarkTag("UR", "Benchmark uses Unified Runtime API"),
@@ -62,12 +73,17 @@ def enabled(self) -> bool:
         By default, it returns True, but can be overridden to disable a benchmark."""
         return True
 
-    def traceable(self) -> bool:
-        """Returns whether this benchmark should be traced by Unitrace.
-        By default, it returns True, but can be overridden to disable tracing for a benchmark.
+    def traceable(self, tracing_type: TracingType) -> bool:
+        """Returns whether this benchmark should be traced by the specified tracing method.
+        By default, it returns True for all tracing types, but can be overridden
+        to disable specific tracing methods for a benchmark.
         """
         return True
 
+    def tracing_enabled(self, run_trace, force_trace, tr_type: TracingType):
+        """Returns whether tracing is enabled for the given type."""
+        return (self.traceable(tr_type) or force_trace) and run_trace == tr_type
+
     @abstractmethod
     def setup(self):
         pass
@@ -77,12 +93,18 @@ def teardown(self):
         pass
 
     @abstractmethod
-    def run(self, env_vars, run_unitrace: bool = False) -> list[Result]:
+    def run(
+        self,
+        env_vars,
+        run_trace: TracingType = TracingType.NONE,
+        force_trace: bool = False,
+    ) -> list[Result]:
         """Execute the benchmark with the given environment variables.
 
         Args:
             env_vars: Environment variables to use when running the benchmark.
-            run_unitrace: Whether to run benchmark under Unitrace.
+            run_trace: The type of tracing to run (NONE, UNITRACE, or FLAMEGRAPH).
+            force_trace: If True, ignore the traceable() method and force tracing.
 
         Returns:
             A list of Result objects with the benchmark results.
@@ -111,8 +133,9 @@ def run_bench(
         ld_library=[],
         add_sycl=True,
         use_stdout=True,
-        run_unitrace=False,
-        extra_unitrace_opt=None,
+        run_trace: TracingType = TracingType.NONE,
+        extra_trace_opt=None,
+        force_trace: bool = False,
     ):
         env_vars = env_vars.copy()
         if options.ur is not None:
@@ -125,15 +148,26 @@ def run_bench(
         ld_libraries = options.extra_ld_libraries.copy()
         ld_libraries.extend(ld_library)
 
-        if self.traceable() and run_unitrace:
-            if extra_unitrace_opt is None:
-                extra_unitrace_opt = []
+        unitrace_output = None
+        if self.tracing_enabled(run_trace, force_trace, TracingType.UNITRACE):
+            if extra_trace_opt is None:
+                extra_trace_opt = []
             unitrace_output, command = get_unitrace().setup(
-                self.name(), command, extra_unitrace_opt
+                self.name(), command, extra_trace_opt
             )
             log.debug(f"Unitrace output: {unitrace_output}")
             log.debug(f"Unitrace command: {' '.join(command)}")
 
+        # flamegraph run
+
+        perf_data_file = None
+        if self.tracing_enabled(run_trace, force_trace, TracingType.FLAMEGRAPH):
+            perf_data_file, command = get_flamegraph().setup(
+                self.name(), self.get_suite_name(), command
+            )
+            log.debug(f"FlameGraph perf data: {perf_data_file}")
+            log.debug(f"FlameGraph command: {' '.join(command)}")
+
         try:
             result = run(
                 command=command,
@@ -143,13 +177,27 @@ def run_bench(
                 ld_library=ld_libraries,
             )
         except subprocess.CalledProcessError:
-            if run_unitrace:
+            if run_trace == TracingType.UNITRACE and unitrace_output:
                 get_unitrace().cleanup(options.benchmark_cwd, unitrace_output)
+            if run_trace == TracingType.FLAMEGRAPH and perf_data_file:
+                get_flamegraph().cleanup(perf_data_file)
             raise
 
-        if self.traceable() and run_unitrace:
+        if (
+            self.tracing_enabled(run_trace, force_trace, TracingType.UNITRACE)
+            and unitrace_output
+        ):
             get_unitrace().handle_output(unitrace_output)
 
+        if (
+            self.tracing_enabled(run_trace, force_trace, TracingType.FLAMEGRAPH)
+            and perf_data_file
+        ):
+            svg_file = get_flamegraph().handle_output(
+                self.name(), perf_data_file, self.get_suite_name()
+            )
+            log.info(f"FlameGraph generated: {svg_file}")
+
         if use_stdout:
             return result.stdout.decode()
         else:
 
@@ -6,7 +6,7 @@
 
 from pathlib import Path
 
-from .base import Suite, Benchmark
+from .base import Suite, Benchmark, TracingType
 from options import options
 from utils.utils import git_clone, run, create_build_path
 from utils.result import Result
@@ -132,7 +132,18 @@ def setup(self):
         if not self.bench_bin.exists():
             raise FileNotFoundError(f"Benchmark binary not found: {self.bench_bin}")
 
-    def run(self, env_vars, run_unitrace: bool = False) -> list[Result]:
+    def run(
+        self,
+        env_vars,
+        run_trace: TracingType = TracingType.NONE,
+        force_trace: bool = False,
+    ) -> list[Result]:
+        # Determine extra trace options based on tracing type
+        if run_trace == TracingType.UNITRACE:
+            extra_trace_opt = ["--chrome-dnn-logging"]
+        else:
+            extra_trace_opt = None
+
         command = [
             str(self.bench_bin),
             *self.bench_args.split(),
@@ -151,8 +162,9 @@ def run(self, env_vars, run_unitrace: bool = False) -> list[Result]:
             add_sycl=True,
             ld_library=ld_library,
             use_stdout=True,
-            run_unitrace=run_unitrace,
-            extra_unitrace_opt=["--chrome-dnn-logging"],
+            run_trace=run_trace,
+            extra_trace_opt=extra_trace_opt,
+            force_trace=force_trace,
         )
         result_value = self._extract_time(output)
Original file line number	Diff line number	Diff line change
`@@ -1,10 +1,10 @@`
`1`	`1`	`{`
`2`	`2`	`"linux": {`
`3`	`3`	`"igc_dev": {`
`4`		`- "github_tag": "igc-dev-47c1f6e",`
`5`		`- "version": "47c1f6e",`
`6`		`- "updated_at": "2025-08-20T15:46:36Z",`
`7`		`- "url": "https://api.github.com/repos/intel/intel-graphics-compiler/actions/artifacts/3810490997/zip",`
	`4`	`+ "github_tag": "igc-dev-46629d9",`
	`5`	`+ "version": "46629d9",`
	`6`	`+ "updated_at": "2025-08-30T10:44:04Z",`
	`7`	`+ "url": "https://api.github.com/repos/intel/intel-graphics-compiler/actions/artifacts/3889106305/zip",`
`8`	`8`	`"root": "{DEPS_ROOT}/opencl/runtime/linux/oclgpu"`
`9`	`9`	`}`
`10`	`10`	`}`