diff --git a/.github/workflows/sycl-benchmark-aggregate.yml b/.github/workflows/sycl-benchmark-aggregate.yml new file mode 100644 index 0000000000000..87f7ef718160a --- /dev/null +++ b/.github/workflows/sycl-benchmark-aggregate.yml @@ -0,0 +1,52 @@ +name: Aggregate compute-benchmark averages from historical data + +# The benchmarking workflow in sycl-linux-run-tests.yml passes or fails based on +# how the benchmark results compare to a historical average: This historical +# average is calculated in this workflow, which aggregates historical data and +# produces measures of central tendency (median in this case) used for this +# purpose. + +on: + workflow_dispatch: + inputs: + lookback_days: + description: | + Number of days from today to look back in historical results for: + This sets the age limit of data used in average calculation: Any + benchmark results created before `lookback_days` from today is + excluded from being aggregated in the historical average. + type: number + required: true + workflow_call: + inputs: + lookback_days: + type: number + required: true + secrets: + LLVM_SYCL_BENCHMARK_TOKEN: + description: | + Github token used by the faceless account to push newly calculated + medians. + required: true + + +permissions: + contents: read + +jobs: + aggregate: + name: Aggregate average (median) value for all metrics + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + sparse-checkout: | + devops/scripts/benchmarking + devops/benchmarking + devops/actions/benchmarking + - name: Aggregate benchmark results and produce historical average + uses: ./devops/actions/benchmarking/aggregate + with: + lookback_days: ${{ inputs.lookback_days }} + env: + GITHUB_TOKEN: ${{ secrets.LLVM_SYCL_BENCHMARK_TOKEN }} diff --git a/.github/workflows/sycl-linux-run-tests.yml b/.github/workflows/sycl-linux-run-tests.yml index 7a4dd382e8bca..710ef9f8cbb9b 100644 --- a/.github/workflows/sycl-linux-run-tests.yml +++ b/.github/workflows/sycl-linux-run-tests.yml @@ -25,7 +25,7 @@ on: required: False tests_selector: description: | - Two possible options: "e2e" and "cts". + Three possible options: "e2e", "cts", and "compute-benchmarks". type: string default: "e2e" @@ -152,6 +152,7 @@ on: options: - e2e - cts + - compute-benchmarks env: description: | @@ -314,3 +315,12 @@ jobs: sycl_cts_artifact: ${{ inputs.sycl_cts_artifact }} target_devices: ${{ inputs.target_devices }} retention-days: ${{ inputs.retention-days }} + + - name: Run compute-benchmarks on SYCL + if: inputs.tests_selector == 'compute-benchmarks' + uses: ./devops/actions/run-tests/benchmark + with: + target_devices: ${{ inputs.target_devices }} + env: + RUNNER_TAG: ${{ inputs.runner }} + GITHUB_TOKEN: ${{ secrets.LLVM_SYCL_BENCHMARK_TOKEN }} diff --git a/.github/workflows/sycl-nightly.yml b/.github/workflows/sycl-nightly.yml index 572284125449f..243919404bc6a 100644 --- a/.github/workflows/sycl-nightly.yml +++ b/.github/workflows/sycl-nightly.yml @@ -243,6 +243,46 @@ jobs: sycl_toolchain_decompress_command: ${{ needs.ubuntu2204_build.outputs.artifact_decompress_command }} sycl_cts_artifact: sycl_cts_bin + aggregate_benchmark_results: + if: always() && !cancelled() + name: Aggregate benchmark results and produce historical averages + uses: ./.github/workflows/sycl-benchmark-aggregate.yml + secrets: + LLVM_SYCL_BENCHMARK_TOKEN: ${{ secrets.LLVM_SYCL_BENCHMARK_TOKEN }} + with: + lookback_days: 100 + + run-sycl-benchmarks: + needs: [ubuntu2204_build, aggregate_benchmark_results] + if: ${{ always() && !cancelled() && needs.ubuntu2204_build.outputs.build_conclusion == 'success' }} + strategy: + fail-fast: false + matrix: + include: + - name: Run compute-benchmarks on L0 Gen12 + runner: '["Linux", "gen12"]' + image_options: -u 1001 --device=/dev/dri -v /dev/dri/by-path:/dev/dri/by-path --privileged --cap-add SYS_ADMIN + target_devices: level_zero:gpu + reset_intel_gpu: true + - name: Run compute-benchmarks on L0 PVC + runner: '["Linux", "pvc"]' + image_options: -u 1001 --device=/dev/dri -v /dev/dri/by-path:/dev/dri/by-path --privileged --cap-add SYS_ADMIN + target_devices: level_zero:gpu + reset_intel_gpu: false + uses: ./.github/workflows/sycl-linux-run-tests.yml + secrets: inherit + with: + name: ${{ matrix.name }} + runner: ${{ matrix.runner }} + image_options: ${{ matrix.image_options }} + target_devices: ${{ matrix.target_devices }} + tests_selector: compute-benchmarks + reset_intel_gpu: ${{ matrix.reset_intel_gpu }} + ref: ${{ github.sha }} + sycl_toolchain_artifact: sycl_linux_default + sycl_toolchain_archive: ${{ needs.ubuntu2204_build.outputs.artifact_archive_name }} + sycl_toolchain_decompress_command: ${{ needs.ubuntu2204_build.outputs.artifact_decompress_command }} + nightly_build_upload: name: Nightly Build Upload if: ${{ github.ref_name == 'sycl' }} diff --git a/devops/actions/benchmarking/aggregate/action.yml b/devops/actions/benchmarking/aggregate/action.yml new file mode 100644 index 0000000000000..c062636684b1f --- /dev/null +++ b/devops/actions/benchmarking/aggregate/action.yml @@ -0,0 +1,95 @@ +name: 'Aggregate compute-benchmark results and produce historical averages' + +# The benchmarking workflow in sycl-linux-run-tests.yml passes or fails based on +# how the benchmark results compare to a historical average: This historical +# average is calculated in this composite workflow, which aggregates historical +# data and produces measures of central tendency (median in this case) used for +# this purpose. +# +# This action assumes that /devops has been checked out in ./devops. This action +# also assumes that GITHUB_TOKEN was properly set in env, because according to +# Github, that's apparently the recommended way to pass a secret into a github +# action: +# +# https://docs.github.com/en/actions/security-for-github-actions/security-guides/using-secrets-in-github-actions#accessing-your-secrets +# + +inputs: + lookback_days: + type: number + required: true + +runs: + using: "composite" + steps: + - name: Obtain oldest timestamp allowed for data in aggregation + shell: bash + run: | + # DO NOT use inputs.lookback_days directly, only use SANITIZED_TIMESTAMP. + SANITIZED_LOOKBACK_DAYS="$(echo '${{ inputs.lookback_days }}' | grep -oE '^[0-9]+$')" + if [ -z "$SANITIZED_LOOKBACK_DAYS" ]; then + echo "Please ensure inputs.lookback_days is a number." + exit 1 + fi + SANITIZED_TIMESTAMP="$(date -d "$SANITIZED_LOOKBACK_DAYS days ago" +%Y%m%d_%H%M%S)" + if [ -z "$(echo "$SANITIZED_TIMESTAMP" | grep -oE '^[0-9]{8}_[0-9]{6}$' )" ]; then + echo "Invalid timestamp generated: is inputs.lookback_days valid?" + exit 1 + fi + echo "SANITIZED_TIMESTAMP=$SANITIZED_TIMESTAMP" >> $GITHUB_ENV + - name: Load benchmarking configuration + shell: bash + run: | + $(python ./devops/scripts/benchmarking/load_config.py ./devops constants) + echo "SANITIZED_PERF_RES_GIT_REPO=$SANITIZED_PERF_RES_GIT_REPO" >> $GITHUB_ENV + echo "SANITIZED_PERF_RES_GIT_BRANCH=$SANITIZED_PERF_RES_GIT_BRANCH" >> $GITHUB_ENV + - name: Checkout historical performance results repository + shell: bash + run: | + if [ ! -d ./llvm-ci-perf-results ]; then + git clone -b "$SANITIZED_PERF_RES_GIT_BRANCH" "https://github.com/$SANITIZED_PERF_RES_GIT_REPO" ./llvm-ci-perf-results + fi + - name: Run aggregator on historical results + shell: bash + run: | + # The current format of the historical results respository is: + # + # /// + # + # Thus, a min/max depth of 3 is used to enumerate all test cases in the + # repository. Test name is also derived from here. + find ./llvm-ci-perf-results -mindepth 3 -maxdepth 3 -type d ! -path '*.git*' | + while read -r dir; do + test_name="$(basename "$dir")" + python ./devops/scripts/benchmarking/aggregate.py ./devops "$test_name" "$dir" "$SANITIZED_TIMESTAMP" + done + - name: Upload average to the repo + shell: bash + run: | + cd ./llvm-ci-perf-results + git config user.name "SYCL Benchmarking Bot" + git config user.email "sys_sycl_benchmarks@intel.com" + git pull + # Make sure changes have been made + if git diff --quiet && git diff --cached --quiet; then + echo "No changes to median, skipping push." + else + git add . + git commit -m "[GHA] Aggregate median data from $SANITIZED_TIMESTAMP to $(date +%Y%m%d_%H%M%S)" + git push "https://$GITHUB_TOKEN@github.com/$SANITIZED_PERF_RES_GIT_REPO.git" "$SANITIZED_PERF_RES_GIT_BRANCH" + fi + - name: Find aggregated average results artifact here + if: always() + shell: bash + run: | + cat << EOF + # + # Artifact link for aggregated averages here: + # + EOF + - name: Archive new medians + if: always() + uses: actions/upload-artifact@v4 + with: + name: llvm-ci-perf-results new medians + path: ./llvm-ci-perf-results/**/*-median.csv diff --git a/devops/actions/run-tests/benchmark/action.yml b/devops/actions/run-tests/benchmark/action.yml new file mode 100644 index 0000000000000..7f69fdf832982 --- /dev/null +++ b/devops/actions/run-tests/benchmark/action.yml @@ -0,0 +1,107 @@ +name: 'Run compute-benchmarks' + +# Run compute-benchmarks on SYCL +# +# This action assumes SYCL is in ./toolchain, and that /devops has been +# checked out in ./devops. This action also assumes that GITHUB_TOKEN +# was properly set in env, because according to Github, that's apparently the +# recommended way to pass a secret into a github action: +# +# https://docs.github.com/en/actions/security-for-github-actions/security-guides/using-secrets-in-github-actions#accessing-your-secrets +# +# This action also expects a RUNNER_TAG environment variable to be set to the +# runner tag used to run this workflow: Currently, only gen12 and pvc on Linux +# are fully supported. Although this workflow won't stop you from running other +# devices, note that only gen12 and pvc has been tested to work. +# + +inputs: + target_devices: + type: string + required: True + +runs: + using: "composite" + steps: + - name: Check specified runner type / target backend + shell: bash + env: + TARGET_DEVICE: ${{ inputs.target_devices }} + run: | + case "$RUNNER_TAG" in + '["Linux", "gen12"]' | '["Linux", "pvc"]') ;; + *) + echo "#" + echo "# WARNING: Only gen12/pvc on Linux is fully supported." + echo "# This workflow is not guaranteed to work with other runners." + echo "#" ;; + esac + + # input.target_devices is not directly used, as this allows code injection + case "$TARGET_DEVICE" in + level_zero:*) ;; + *) + echo "#" + echo "# WARNING: Only level_zero backend is fully supported." + echo "# This workflow is not guaranteed to work with other backends." + echo "#" ;; + esac + - name: Run compute-benchmarks + shell: bash + run: | + cat << EOF + # + # NOTE TO DEVELOPERS: + # + + Check latter steps of the workflow: This job produces an artifact with: + - benchmark results from passing/failing tests + - log containing all failing (too slow) benchmarks + - log containing all erroring benchmarks + + While this step in the workflow provides debugging output describing this + information, it might be easier to inspect the logs from the artifact + instead. + + EOF + export ONEAPI_DEVICE_SELECTOR="${{ inputs.target_devices }}" + export CMPLR_ROOT=./toolchain + echo "-----" + sycl-ls + echo "-----" + ./devops/scripts/benchmarking/benchmark.sh -n '${{ runner.name }}' -s || exit 1 + - name: Push compute-benchmarks results + if: always() + shell: bash + run: | + # TODO -- waiting on security clearance + # Load configuration values + $(python ./devops/scripts/benchmarking/load_config.py ./devops constants) + + cd "./llvm-ci-perf-results" + git config user.name "SYCL Benchmarking Bot" + git config user.email "sys_sycl_benchmarks@intel.com" + git pull + git add . + # Make sure changes have been made + if git diff --quiet && git diff --cached --quiet; then + echo "No new results added, skipping push." + else + git commit -m "[GHA] Upload compute-benchmarks results from https://github.com/intel/llvm/actions/runs/${{ github.run_id }}" + git push "https://$GITHUB_TOKEN@github.com/$SANITIZED_PERF_RES_GIT_REPO.git" "$SANITIZED_PERF_RES_GIT_BRANCH" + fi + - name: Find benchmark result artifact here + if: always() + shell: bash + run: | + cat << EOF + # + # Artifact link for benchmark results here: + # + EOF + - name: Archive compute-benchmark results + if: always() + uses: actions/upload-artifact@v4 + with: + name: Compute-benchmark run ${{ github.run_id }} (${{ runner.name }}) + path: ./artifact diff --git a/devops/benchmarking/config.ini b/devops/benchmarking/config.ini new file mode 100644 index 0000000000000..c0b3ca9c31c9e --- /dev/null +++ b/devops/benchmarking/config.ini @@ -0,0 +1,44 @@ +; +; This file contains configuration options to change the behaviour of the +; benchmarking workflow in sycl-linux-run-tests.yml. +; +; DO NOT USE THE CONTENTS OF THIS FILE DIRECTLY -- Due to security concerns, The +; contents of this file must be sanitized first before use. +; See: /devops/scripts/benchmarking/common.py +; + +; Compute-benchmark compile/run options +[compute_bench] +; Value for -j during compilation of compute-benchmarks +compile_jobs = 2 +; Number of iterations to run compute-benchmark tests +iterations = 100 + +; Options for benchmark result metrics (to record/compare against) +[metrics] +; Sets the metrics to record/aggregate in the historical average. +; Format: comma-separated list of column names in compute-benchmark results +recorded = Median,StdDev +; Sets the tolerance for each recorded metric and their allowed deviation from +; the historical average. Metrics not included here are not compared against +; when passing/failing benchmark results. +; Format: comma-separated list of : +tolerances = Median:0.5 + +; Options for computing historical averages +[average] +; Number of days (from today) to look back for results when computing historical +; average +cutoff_range = 7 +; Minimum number of samples required to compute a historical average +min_threshold = 3 + +; ONEAPI_DEVICE_SELECTOR linting/options +[device_selector] +; Backends to allow in device_selector +enabled_backends = level_zero,opencl,cuda,hip +; native_cpu is disabled + +; Devices to allow in device_selector +enabled_devices = cpu,gpu +; fpga is disabled diff --git a/devops/benchmarking/constants.ini b/devops/benchmarking/constants.ini new file mode 100644 index 0000000000000..9281ece8f4950 --- /dev/null +++ b/devops/benchmarking/constants.ini @@ -0,0 +1,48 @@ +; +; This file defines constants used throughout the benchmarking workflow in +; sycl-linux-run-tests.yml. If you're trying to change the behavior of this +; workflow, you're likely looking for /devops/benchmarking/config.ini instead. +; +; DO NOT USE THE CONTENTS OF THIS FILE DIRECTLY -- Due to security concerns, The +; contents of this file must be sanitized first before use. +; See: /devops/scripts/benchmarking/common.py +; + +; Constants for compute-benchmarks +[compute_bench] +git_repo = intel/compute-benchmarks +git_branch = master +git_commit = 230a3db4d8d03c0e9a663988f7c3abbd1137a1e0 +; path = ./compute-benchmarks + +; Constants for git repo storing benchmark performance results +[perf_res] +git_repo = intel/llvm-ci-perf-results +git_branch = main +; Path to clone performance result repo +; path = ./llvm-ci-perf-results + +; It was decided that paths should be hardcoded throughout this workflow for +; security reasons and ease of readability. Do not use paths as constants. + +; ; Constants for artifacts +; [artifact] +; ; Path to root folder storing benchmark CI artifact +; path = ./artifact +; ; Path (relative to artifact.path) to cache compute-benchmark results +; ; +; ; If a test result does not get moved out of this catch-all cache path, it is +; ; considered to have failed +; output_cache = ./artifact/failed_tests +; ; Path (relative to artifact.path) to cache passing compute-benchmark results +; passing_cache = ./artifact/passing_tests + +; [timestamp] +; ; Timestamp format used for +; format = %%Y%%m%%d_%%H%%M%%S + +; [benchmark_log] +; ; Log file for test cases that perform over the allowed variance +; slow = ./artifact/benchmarks_failed.log +; ; Log file for test cases that errored / failed to build +; error = ./artifact/benchmarks_errored.log diff --git a/devops/benchmarking/enabled_tests.conf b/devops/benchmarking/enabled_tests.conf new file mode 100644 index 0000000000000..20659cbea636d --- /dev/null +++ b/devops/benchmarking/enabled_tests.conf @@ -0,0 +1,8 @@ +# Test cases to be enabled: +api_overhead_benchmark_sycl +memory_benchmark_sycl +miscellaneous_benchmark_sycl +ulls_benchmark_sycl + +# As of January 2025, these are every compute-benchmark tests with a SYCL +# implementation. diff --git a/devops/scripts/benchmarking/aggregate.py b/devops/scripts/benchmarking/aggregate.py new file mode 100644 index 0000000000000..f62a8ffed83c5 --- /dev/null +++ b/devops/scripts/benchmarking/aggregate.py @@ -0,0 +1,205 @@ +import csv +import sys +from pathlib import Path +import heapq +import statistics +from common import Validate, SanitizedConfig +from abc import ABC, abstractmethod +import os + + +class Aggregator(ABC): + """ + Aggregator classes used to "aggregate" a pool of elements, and produce an + "average" (precisely, some "measure of central tendency") from the elements. + """ + + @staticmethod + @abstractmethod + def get_type() -> str: + """ + Return a string indicating the type of average this aggregator + produces. + """ + pass + + @abstractmethod + def add(self, n: float): + """ + Add/aggregate an element to the pool of elements used by this aggregator + to produce an average calculation. + """ + pass + + @abstractmethod + def get_avg(self) -> float: + """ + Produce an average from the pool of elements aggregated using add(). + """ + pass + + +class SimpleMedian(Aggregator): + """ + Simple median calculation: if the number of samples being generated are low, + this is the fastest median method. + """ + + def __init__(self): + self.elements = [] + + @staticmethod + def get_type() -> str: + return "median" + + def add(self, n: float): + self.elements.append(n) + + def get_avg(self) -> float: + return statistics.median(self.elements) + + +class StreamingMedian(Aggregator): + """ + Calculate medians incrementally using heaps: Theoretically the fastest way + to calculate a median from a stream of elements, but realistically is only + faster when dealing with huge numbers of samples that would be generated by + i.e. enabling this workflow in precommit and using longer periods of time. + """ + + def __init__(self): + # Gist: we keep a minheap and a maxheap, and store the median as the top + # of the minheap. When a new element comes it gets put into the heap + # based on if the element is bigger than the current median. Then, the + # heaps are heapified and the median is repopulated by heapify. + self.minheap_larger = [] + self.maxheap_smaller = [] + + @staticmethod + def get_type() -> str: + return "median" + + # Note: numbers on maxheap should be negative, as heapq + # is minheap by default + + def add(self, n: float): + if len(self.maxheap_smaller) == 0 or -self.maxheap_smaller[0] >= n: + heapq.heappush(self.maxheap_smaller, -n) + else: + heapq.heappush(self.minheap_larger, n) + + # Ensure minheap has more elements than maxheap + if len(self.maxheap_smaller) > len(self.minheap_larger) + 1: + heapq.heappush(self.minheap_larger, -heapq.heappop(self.maxheap_smaller)) + elif len(self.maxheap_smaller) < len(self.minheap_larger): + heapq.heappush(self.maxheap_smaller, -heapq.heappop(self.minheap_larger)) + + def get_avg(self) -> float: + if len(self.maxheap_smaller) == len(self.minheap_larger): + # Equal number of elements smaller and larger than "median": + # thus, there are two median values. The median would then become + # the average of both median values. + return (-self.maxheap_smaller[0] + self.minheap_larger[0]) / 2.0 + else: + # Otherwise, median is always in minheap, as minheap is always + # bigger + return -self.maxheap_smaller[0] + + +class Aggregate: + """ + Static class providing methods for aggregating data + """ + + @staticmethod + def hist_avg( + benchmark_name: str, res_dir: str, cutoff: str, aggregator=SimpleMedian + ): + if not os.path.isdir(res_dir): + print(f"Not a directory: {res_dir}.", file=sys.stderr) + exit(1) + + def get_csv_samples() -> list[str]: + """Get all valid .csv samples from the results folder.""" + cache_dir = Path(f"{res_dir}") + # Filter all benchmark .csv files in the result directory: + return list( + filter( + # Make sure the .csv "file" is a file: + lambda f: f.is_file() + # Make sure timestamp of .csv file is good format: + # [-19:-4] corresponds to the timestamp in the filename. + and Validate.timestamp(str(f)[-19:-4]) + # Make sure timestamp is bigger than cutoff timestamp: + and str(f)[-19:-4] > cutoff, + cache_dir.glob(f"{benchmark_name}-*_*.csv"), + ) + ) + + # Calculate median of every desired metric: + samples_aggregate = dict() + filtered_samples = get_csv_samples() + if len(filtered_samples) == 0: + print( + f"WARNING: No results for {benchmark_name} found from {cutoff} to now", + file=sys.stderr, + ) + for sample_path in filtered_samples: + with open(sample_path, "r") as sample_file: + for sample in csv.DictReader(sample_file): + test = sample["TestCase"] + # Construct entry in aggregator for test if it doesn't exist + # already: + if test not in samples_aggregate: + samples_aggregate[test] = { + metric: aggregator() + for metric in SanitizedConfig.METRICS_TOLERANCES + } + + # For each metric of concern, add to aggregator: + for metric in SanitizedConfig.METRICS_TOLERANCES: + sample_value = Validate.sanitize_stat(sample[metric]) + if not isinstance(sample_value, float): + print( + f"Malformatted statistic in {str(sample_path)}: " + + f"'{sample[metric]}' for {test}." + ) + exit(1) + # Add metric from sample for current test to aggregate: + samples_aggregate[test][metric].add(sample_value) + + # Calculate + write new average (from samples_aggregate) in new .csv file: + with open( + f"{res_dir}/{benchmark_name}-{aggregator.get_type()}.csv", "w" + ) as output_csv: + writer = csv.DictWriter( + output_csv, + fieldnames=["TestCase", *SanitizedConfig.METRICS_TOLERANCES.keys()], + ) + writer.writeheader() + for test in samples_aggregate: + writer.writerow( + {"TestCase": test} + | { + metric: samples_aggregate[test][metric].get_avg() + for metric in SanitizedConfig.METRICS_TOLERANCES + } + ) + + +if __name__ == "__main__": + if len(sys.argv) != 5: + print( + f"Usage: {sys.argv[0]} " + ) + exit(1) + if not Validate.timestamp(sys.argv[4]): + print(f"Bad cutoff timestamp, please use YYYYMMDD_HHMMSS.", file=sys.stderr) + exit(1) + if not Validate.filepath(sys.argv[1]): + print(f"Not a valid filepath: {sys.argv[1]}", file=sys.stderr) + exit(1) + # If the filepath provided passed filepath validation, then it is clean + SanitizedConfig.load(sys.argv[1]) + + Aggregate.hist_avg(sys.argv[2], sys.argv[3], sys.argv[4]) diff --git a/devops/scripts/benchmarking/benchmark.sh b/devops/scripts/benchmarking/benchmark.sh new file mode 100755 index 0000000000000..bbfd669774f9a --- /dev/null +++ b/devops/scripts/benchmarking/benchmark.sh @@ -0,0 +1,300 @@ +#!/bin/sh + +# +# benchmark.sh: Benchmark dpcpp using compute-benchmarks +# + +usage () { + >&2 echo "Usage: $0 -t [-B ] + -n Github runner name -- Required + -c Clean up working directory + -C Clean up working directory and exit + -s Cache results + +This script builds and runs benchmarks from compute-benchmarks." + exit 1 +} + +# Ensures test cases read from enabled_tests.conf contains no malicious content +_validate_testname () { + if [ -n "$(printf "%s" "$1" | sed "s/[a-zA-Z_]*//g")" ]; then + echo "Illegal characters in $TEST_CONFIG. Permitted characters: a-zA-Z_" + exit 1 + fi +} + +clone_perf_res() { + echo "### Cloning llvm-ci-perf-results ($SANITIZED_PERF_RES_GIT_REPO:$SANITIZED_PERF_RES_GIT_BRANCH) ###" + git clone -b "$SANITIZED_PERF_RES_GIT_BRANCH" "https://github.com/$SANITIZED_PERF_RES_GIT_REPO" ./llvm-ci-perf-results + [ "$?" -ne 0 ] && exit "$?" +} + +clone_compute_bench() { + echo "### Cloning compute-benchmarks ($SANITIZED_COMPUTE_BENCH_GIT_REPO:$SANITIZED_COMPUTE_BENCH_GIT_BRANCH) ###" + git clone -b "$SANITIZED_COMPUTE_BENCH_GIT_BRANCH" \ + --recurse-submodules "https://github.com/$SANITIZED_COMPUTE_BENCH_GIT_REPO" \ + ./compute-benchmarks + if [ ! -d "./compute-benchmarks" ]; then + echo "Failed to clone compute-benchmarks." + exit 1 + elif [ -n "$SANITIZED_COMPUTE_BENCH_GIT_COMMIT" ]; then + cd ./compute-benchmarks + git checkout "$SANITIZED_COMPUTE_BENCH_GIT_COMMIT" + if [ "$?" -ne 0 ]; then + echo "Failed to get compute-benchmarks commit '$SANITIZED_COMPUTE_BENCH_GIT_COMMIT'." + exit 1 + fi + cd - + fi +} + +build_compute_bench() { + echo "### Building compute-benchmarks ($SANITIZED_COMPUTE_BENCH_GIT_REPO:$SANITIZED_COMPUTE_BENCH_GIT_BRANCH) ###" + mkdir ./compute-benchmarks/build && cd ./compute-benchmarks/build && + # No reason to turn on ccache, if this docker image will be disassembled later on + cmake .. -DBUILD_SYCL=ON -DBUILD_L0=OFF -DBUILD=OCL=OFF -DCCACHE_ALLOWED=FALSE + # TODO enable mechanism for opting into L0 and OCL -- the concept is to + # subtract OCL/L0 times from SYCL times in hopes of deriving SYCL runtime + # overhead, but this is mostly an idea that needs to be mulled upon. + + if [ "$?" -eq 0 ]; then + while IFS= read -r case; do + # Skip lines starting with '#' + [ "${case##\#*}" ] || continue + + _validate_testname "$case" + make "-j$SANITIZED_COMPUTE_BENCH_COMPILE_JOBS" "$case" + done < "$TESTS_CONFIG" + fi + cd - +} + +# Check if the number of samples for a given test case is less than a threshold +# set in benchmark-ci.conf +# +# Usage: +samples_under_threshold () { + # Directory doesn't exist, samples automatically under threshold + [ ! -d "./llvm-ci-perf-results/$1" ] && return 0 + file_count="$(find "./llvm-ci-perf-results/$1" -maxdepth 1 -type f | wc -l )" + [ "$file_count" -lt "$SANITIZED_AVERAGE_MIN_THRESHOLD" ] +} + +# Check for a regression via compare.py +# +# Usage: check_regression +check_regression() { + csv_relpath="$(dirname "$1")" + csv_name="$(basename "$1")" + if samples_under_threshold "$csv_relpath"; then + echo "Not enough samples to construct a good average, performance\ + check skipped!" + return 0 # Success status + fi + python "$DEVOPS_PATH/scripts/benchmarking/compare.py" \ + "$DEVOPS_PATH" "$csv_relpath" "$csv_name" + return $? +} + +# Move the results of our benchmark into the git repo, and save benchmark +# results to artifact archive +# +# Usage: cache +cache() { + mkdir -p "$(dirname ./artifact/passing_tests/$1)" "$(dirname ./artifact/failed_tests/$1)" + cp "./artifact/failed_tests/$1" "./artifact/passing_tests/$1" + mkdir -p "$(dirname ./llvm-ci-perf-results/$1)" + mv "./artifact/failed_tests/$1" "./llvm-ci-perf-results/$1" +} + +# Check for a regression + cache if no regression found +# +# Usage: check_and_cache +check_and_cache() { + echo "Checking $1..." + if check_regression $1; then + if [ "$CACHE_RESULTS" -eq "1" ]; then + echo "Caching $1..." + cache $1 + fi + else + [ "$CACHE_RESULTS" -eq "1" ] && echo "Regression found -- Not caching!" + fi +} + +# Run and process the results of each enabled benchmark in enabled_tests.conf +process_benchmarks() { + echo "### Running and processing selected benchmarks ###" + if [ -z "$TESTS_CONFIG" ]; then + echo "Setting tests to run via cli is not currently supported." + exit 1 + else + rm ./artifact/benchmarks_errored.log ./artifact/benchmarks_failed.log 2> /dev/null + mkdir -p ./artifact + # Loop through each line of enabled_tests.conf, but ignore lines in the + # test config starting with #'s: + grep "^[^#]" "$TESTS_CONFIG" | while read -r testcase; do + _validate_testname "$testcase" + echo "# Running $testcase..." + + # The benchmark results git repo and this script's output both share + # the following directory structure: + # + # /// + # + # Instead of specifying 2 paths with a slightly different root + # folder name for every function we use, we can use a relative path + # to represent the file in both folders. + # + # Figure out the relative path of our testcase result: + test_dir_relpath="$DEVICE_SELECTOR_DIRNAME/$RUNNER/$testcase" + output_csv_relpath="$test_dir_relpath/$testcase-$TIMESTAMP.csv" + mkdir -p "./artifact/failed_tests/$test_dir_relpath" # Ensure directory exists + + # Tests are first placed in ./artifact/failed_tests, and are only + # moved to passing_tests or the performance results repo if the + # benchmark results are passing + output_csv="./artifact/failed_tests/$output_csv_relpath" + "./compute-benchmarks/build/bin/$testcase" --csv \ + --iterations="$SANITIZED_COMPUTE_BENCH_ITERATIONS" > "$output_csv" + + exit_status="$?" + if [ "$exit_status" -eq 0 ] && [ -s "$output_csv" ]; then + # Filter out header lines not in csv format: + tail +8 "$output_csv" > .tmp_res + mv .tmp_res "$output_csv" + check_and_cache $output_csv_relpath + else + echo "[ERROR] $testcase returned exit status $exit_status" + echo "-- $testcase: error $exit_status" >> ./artifact/benchmarks_errored.log + fi + done + fi +} + +# Handle failures + produce a report on what failed +process_results() { + fail=0 + if [ -s ./artifact/benchmarks_failed.log ]; then + printf "\n### Tests performing over acceptable range of average: ###\n" + cat ./artifact/benchmarks_failed.log + echo "" + fail=2 + fi + if [ -s ./artifact/benchmarks_errored.log ]; then + printf "\n### Tests that failed to run: ###\n" + cat ./artifact/benchmarks_errored.log + echo "" + fail=1 + fi + exit $fail +} + +cleanup() { + echo "### Cleaning up compute-benchmark builds from prior runs ###" + rm -rf ./compute-benchmarks + rm -rf ./llvm-ci-perf-results + [ ! -z "$_exit_after_cleanup" ] && exit +} + +load_configs() { + # This script needs to know where the intel/llvm "/devops" directory is, + # containing all the configuration files and the compare script. + # + # If this is not provided, this function tries to guess where the files + # are based on how the script is called, and verifies that all necessary + # configs and scripts are reachable. + + # This benchmarking script is usually at: + # + # /devops/scripts/benchmarking/benchmark.sh + # + # Derive /devops based on location of this script: + [ -z "$DEVOPS_PATH" ] && DEVOPS_PATH="$(dirname "$0")/../.." + if [ -z "$(printf '%s' "$DEVOPS_PATH" | grep -oE '^[a-zA-Z0-9._\/-]+$')" ]; then + echo "Bad DEVOPS_PATH, please specify DEVOPS_PATH variable." + exit 1 + fi + + TESTS_CONFIG="$(realpath "$DEVOPS_PATH/benchmarking/enabled_tests.conf")" + COMPARE_PATH="$(realpath "$DEVOPS_PATH/scripts/benchmarking/compare.py")" + LOAD_CONFIG_PY="$(realpath "$DEVOPS_PATH/scripts/benchmarking/load_config.py")" + + for file in \ + "$TESTS_CONFIG" "$COMPARE_PATH" "$LOAD_CONFIG_PY" + do + if [ ! -f "$file" ]; then + echo "Please provide path to /devops in DEVOPS_PATH." + exit -1 + fi + done + + $(python "$LOAD_CONFIG_PY" "$DEVOPS_PATH" config) + $(python "$LOAD_CONFIG_PY" "$DEVOPS_PATH" constants) +} + +##### + +load_configs + +COMPUTE_BENCH_COMPILE_FLAGS="" +CACHE_RESULTS="0" +# Timestamp format is YYYYMMDD_HHMMSS +TIMESTAMP="$(date +%Y%m%d_%H%M%S)" + +# CLI flags + overrides to configuration options: +while getopts "n:cCs" opt; do + case "$opt" in + n) + if [ -n "$(printf "%s" "$OPTARG" | sed "s/[a-zA-Z0-9_-]*//g")" ]; then + echo "Illegal characters in runner name." + exit 1 + fi + RUNNER="$OPTARG" + ;; + # Cleanup status is saved in a var to ensure all arguments are processed before + # performing cleanup + c) _cleanup=1 ;; + C) _cleanup=1 && _exit_after_cleanup=1 ;; + s) CACHE_RESULTS=1;; + \?) usage ;; + esac +done + +# Check all necessary variables exist: +if [ -z "$CMPLR_ROOT" ]; then + echo "Please set CMPLR_ROOT first; it is needed by compute-benchmarks to build." + exit 1 +elif [ -z "$ONEAPI_DEVICE_SELECTOR" ]; then + echo "Please set ONEAPI_DEVICE_SELECTOR first to specify which device to use." + exit 1 +elif [ -z "$RUNNER" ]; then + echo "Please specify runner name using -n first; it is needed for storing/comparing benchmark results." + exit 1 +fi + +# Make sure ONEAPI_DEVICE_SELECTOR doesn't try to enable multiple devices at the +# same time, or use specific device id's +_dev_sel_backend_re="$(echo "$SANITIZED_DEVICE_SELECTOR_ENABLED_BACKENDS" | sed 's/,/|/g')" +_dev_sel_device_re="$(echo "$SANITIZED_DEVICE_SELECTOR_ENABLED_DEVICES" | sed 's/,/|/g')" +_dev_sel_re="s/($_dev_sel_backend_re):($_dev_sel_device_re)//" +if [ -n "$(echo "$ONEAPI_DEVICE_SELECTOR" | sed -E "$_dev_sel_re")" ]; then + echo "Unsupported ONEAPI_DEVICE_SELECTOR value: please ensure only one \ +device is selected, and devices are not selected by indices." + echo "Enabled backends: $SANITIZED_DEVICE_SELECTOR_ENABLED_BACKENDS" + echo "Enabled device types: $SANITIZED_DEVICE_SELECTOR_ENABLED_DEVICES" + exit 1 +fi +# ONEAPI_DEVICE_SELECTOR values are not valid directory names in unix: this +# value lets us use ONEAPI_DEVICE_SELECTOR as actual directory names +DEVICE_SELECTOR_DIRNAME="$(echo "$ONEAPI_DEVICE_SELECTOR" | sed 's/:/-/')" + +# Clean up and delete all cached files if specified: +[ ! -z "$_cleanup" ] && cleanup +# Clone and build only if they aren't already cached/deleted: +[ ! -d ./llvm-ci-perf-results ] && clone_perf_res +[ ! -d ./compute-benchmarks ] && clone_compute_bench +[ ! -d ./compute-benchmarks/build ] && build_compute_bench +# Process benchmarks: +process_benchmarks +process_results \ No newline at end of file diff --git a/devops/scripts/benchmarking/common.py b/devops/scripts/benchmarking/common.py new file mode 100644 index 0000000000000..c400b686db90f --- /dev/null +++ b/devops/scripts/benchmarking/common.py @@ -0,0 +1,196 @@ +import re +import os +import sys +import string +import configparser + + +class Validate: + """Static class containing methods for validating various fields""" + + @staticmethod + def filepath(path: str) -> bool: + """ + Returns True if path is clean (no illegal characters), otherwise False. + """ + filepath_re = re.compile(r"[a-zA-Z0-9\/\._\-]+") + return filepath_re.match(path) is not None + + @staticmethod + def timestamp(t: str) -> bool: + """ + Returns True if t is in form YYYYMMDD_HHMMSS, otherwise False. + """ + timestamp_re = re.compile( + r"^\d{4}(0[1-9]|1[0-2])([0-2][0-9]|3[01])_([01][0-9]|2[0-3])[0-5][0-9][0-5][0-9]$" + ) + return timestamp_re.match(t) is not None + + @staticmethod + def sanitize_stat(stat: str) -> float: + """ + Sanitize statistics found in compute-benchmark output csv files. Returns + float if sanitized, None if not sanitizable. + """ + # Get rid of % + if stat[-1] == "%": + stat = stat[:-1] + + # Cast to float: If cast succeeds, the statistic is clean. + try: + return float(stat) + except ValueError: + return None + + +class SanitizedConfig: + """ + Static class for holding sanitized configuration values used within python. + + Configuration option names follow
_