diff --git a/devops/actions/run-tests/benchmark/action.yml b/devops/actions/run-tests/benchmark/action.yml index 073501dcbb227..a52ae23695b08 100644 --- a/devops/actions/run-tests/benchmark/action.yml +++ b/devops/actions/run-tests/benchmark/action.yml @@ -79,25 +79,13 @@ runs: python3 ./devops/scripts/benchmarks/presets.py query "$PRESET" [ "$?" -ne 0 ] && exit 1 # Stop workflow if invalid preset echo "PRESET=$PRESET" >> $GITHUB_ENV - - name: Compute CPU core range to run benchmarks on + - name: Set NUMA node to run benchmarks on shell: bash run: | - # Compute the core range for the first NUMA node; second node is used by - # UMF. Skip the first 4 cores as the kernel is likely to schedule more - # work on these. - CORES="$(lscpu | awk ' - /NUMA node0 CPU|On-line CPU/ {line=$0} - END { - split(line, a, " ") - split(a[4], b, ",") - sub(/^0/, "4", b[1]) - print b[1] - }')" - echo "CPU core range to use: $CORES" - echo "CORES=$CORES" >> $GITHUB_ENV - - ZE_AFFINITY_MASK=0 - echo "ZE_AFFINITY_MASK=$ZE_AFFINITY_MASK" >> $GITHUB_ENV + # Set CPU and GPU affinity for the first NUMA node; second node is used by UMF + NUMA_NODE=0 + echo "ZE_AFFINITY_MASK=$NUMA_NODE" >> $GITHUB_ENV + echo "NUMA_NODE=$NUMA_NODE" >> $GITHUB_ENV # Compute-benchmarks relies on UR static libraries, cmake config files, etc. # DPC++ doesn't ship with these files. The easiest way of obtaining these @@ -199,7 +187,8 @@ runs: WORKDIR="$(realpath ./llvm_test_workdir)" if [ -n "$WORKDIR" ] && [ -d "$WORKDIR" ] && [[ "$WORKDIR" == *llvm_test_workdir* ]]; then rm -rf "$WORKDIR" ; fi - taskset -c "$CORES" ./devops/scripts/benchmarks/main.py "$WORKDIR" \ + numactl --cpunodebind "$NUMA_NODE" --membind "$NUMA_NODE" \ + ./devops/scripts/benchmarks/main.py "$WORKDIR" \ --sycl "$(realpath ./toolchain)" \ --ur "$(realpath ./ur/install)" \ --adapter "$FORCELOAD_ADAPTER" \ diff --git a/devops/scripts/benchmarks/benches/base.py b/devops/scripts/benchmarks/benches/base.py index bbbedaf629bf5..5707929b12898 100644 --- a/devops/scripts/benchmarks/benches/base.py +++ b/devops/scripts/benchmarks/benches/base.py @@ -6,15 +6,18 @@ import os import shutil import subprocess -from pathlib import Path +from abc import ABC, abstractmethod from enum import Enum -from utils.result import BenchmarkMetadata, BenchmarkTag, Result +from pathlib import Path + +from psutil import Process + from options import options -from utils.utils import download, run -from abc import ABC, abstractmethod -from utils.unitrace import get_unitrace from utils.flamegraph import get_flamegraph from utils.logger import log +from utils.result import BenchmarkMetadata, BenchmarkTag, Result +from utils.unitrace import get_unitrace +from utils.utils import download, run class TracingType(Enum): @@ -167,6 +170,8 @@ def run_bench( log.debug(f"FlameGraph perf data: {perf_data_file}") log.debug(f"FlameGraph command: {' '.join(command)}") + command = self.taskset_cmd() + command + try: result = run( command=command, @@ -268,6 +273,27 @@ def get_metadata(self) -> dict[str, BenchmarkMetadata]: ) } + def taskset_cmd(self) -> list[str]: + """Returns a list of strings with taskset usage for core pinning. + Pin compute benchmarks to a CPU cores set to ensure consistent results + and non-zero CPU count measurements (e.g. avoid E-cores). Exactly 4 cores + are pinned by default to satisfy multiple threads benchmarks. It is assumed + that they have the maximum, or at least the same, frequency. + """ + get_core_frequency = ( + lambda num: open( + f"/sys/devices/system/cpu/cpu{num}/cpufreq/cpuinfo_max_freq" + ) + .read() + .strip() + ) + selected_cores = [str(core) for core in Process().cpu_affinity()[:4]] # type: ignore + if len({get_core_frequency(core) for core in selected_cores}) > 1: + log.warning( + f"Selected cores for pinning have differing max frequencies: {selected_cores}" + ) + return ["taskset", "-c", ",".join(selected_cores)] + class Suite(ABC): @abstractmethod diff --git a/devops/scripts/benchmarks/benches/compute.py b/devops/scripts/benchmarks/benches/compute.py index f59f2e9b9bf83..4c311576448ef 100644 --- a/devops/scripts/benchmarks/benches/compute.py +++ b/devops/scripts/benchmarks/benches/compute.py @@ -3,7 +3,6 @@ # See LICENSE.TXT # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -import copy import csv import io import math diff --git a/devops/scripts/benchmarks/requirements.txt b/devops/scripts/benchmarks/requirements.txt index 9283c797eaf47..d9c7264a92c6e 100644 --- a/devops/scripts/benchmarks/requirements.txt +++ b/devops/scripts/benchmarks/requirements.txt @@ -3,3 +3,4 @@ mpld3==0.5.10 dataclasses-json==0.6.7 PyYAML==6.0.1 Mako==1.3.0 +psutil>=7.0.0 diff --git a/devops/scripts/install_build_tools.sh b/devops/scripts/install_build_tools.sh index 1a1aa6dccda63..c7e097c356f1f 100755 --- a/devops/scripts/install_build_tools.sh +++ b/devops/scripts/install_build_tools.sh @@ -28,7 +28,8 @@ apt update && apt install -yqq \ libzstd-dev \ linux-tools-generic \ linux-tools-common \ - time + time \ + numactl # To obtain latest release of spriv-tool. # Same as what's done in SPRIV-LLVM-TRANSLATOR: