diff --git a/.github/workflows/sycl-docs.yml b/.github/workflows/sycl-docs.yml index 5c1e8e425111b..7bb6a568892a8 100644 --- a/.github/workflows/sycl-docs.yml +++ b/.github/workflows/sycl-docs.yml @@ -49,7 +49,13 @@ jobs: mkdir clang mv $GITHUB_WORKSPACE/build/tools/sycl/doc/html/* . mv $GITHUB_WORKSPACE/build/tools/clang/docs/html/* clang/ + cp -r $GITHUB_WORKSPACE/repo/devops/scripts/benchmarks/html benchmarks touch .nojekyll + # Update benchmarking dashboard configuration + cat << EOF > benchmarks/config.js + remoteDataUrl = 'https://raw.githubusercontent.com/intel/llvm-ci-perf-results/refs/heads/unify-ci/data.json'; + defaultCompareNames = ["Baseline_PVC_L0"]; + EOF # Upload the generated docs as an artifact and deploy to GitHub Pages. - name: Upload artifact uses: actions/upload-pages-artifact@v3 diff --git a/.github/workflows/sycl-linux-run-tests.yml b/.github/workflows/sycl-linux-run-tests.yml index 42066d4d1fee2..ac3e6341cc797 100644 --- a/.github/workflows/sycl-linux-run-tests.yml +++ b/.github/workflows/sycl-linux-run-tests.yml @@ -25,7 +25,7 @@ on: required: False tests_selector: description: | - Three possible options: "e2e", "cts", and "compute-benchmarks". + Three possible options: "e2e", "cts", and "benchmarks". type: string default: "e2e" @@ -111,6 +111,33 @@ on: default: '' required: False + benchmark_upload_results: + description: | + Set to true to upload results to git repository storing benchmarking + results. + type: string + default: 'false' + required: False + benchmark_save_name: + description: | + Save name to use for benchmark results: Save names are stored in + metadata of result file, and are used to identify benchmark results in + the same series (e.g. same configuration, same device, etc.). + + Note: Currently, benchmark result filenames are in the format of + ___YYYYMMDD_HHMMSS.json + type: string + default: '' + required: False + benchmark_preset: + description: | + Name of benchmark preset to run. + + See /devops/scripts/benchmarks/presets.py for all presets available. + type: string + default: 'Minimal' + required: False + workflow_dispatch: inputs: runner: @@ -150,7 +177,7 @@ on: options: - e2e - cts - - compute-benchmarks + - benchmarks env: description: | @@ -303,11 +330,14 @@ jobs: target_devices: ${{ inputs.target_devices }} retention-days: ${{ inputs.retention-days }} - - name: Run compute-benchmarks on SYCL - if: inputs.tests_selector == 'compute-benchmarks' + - name: Run benchmarks + if: inputs.tests_selector == 'benchmarks' uses: ./devops/actions/run-tests/benchmark with: target_devices: ${{ inputs.target_devices }} + upload_results: ${{ inputs.benchmark_upload_results }} + save_name: ${{ inputs.benchmark_save_name }} + preset: ${{ inputs.benchmark_preset }} env: RUNNER_TAG: ${{ inputs.runner }} GITHUB_TOKEN: ${{ secrets.LLVM_SYCL_BENCHMARK_TOKEN }} diff --git a/.github/workflows/sycl-nightly.yml b/.github/workflows/sycl-nightly.yml index 1ccf4a2498e6b..c6cffa66f5fea 100644 --- a/.github/workflows/sycl-nightly.yml +++ b/.github/workflows/sycl-nightly.yml @@ -274,35 +274,30 @@ jobs: sycl_toolchain_archive: ${{ needs.build-win.outputs.artifact_archive_name }} sycl_cts_artifact: sycl_cts_bin_win - aggregate_benchmark_results: - if: github.repository == 'intel/llvm' && !cancelled() - name: Aggregate benchmark results and produce historical averages - uses: ./.github/workflows/sycl-benchmark-aggregate.yml - secrets: - LLVM_SYCL_BENCHMARK_TOKEN: ${{ secrets.LLVM_SYCL_BENCHMARK_TOKEN }} - with: - lookback_days: 100 - run-sycl-benchmarks: - needs: [ubuntu2204_build, aggregate_benchmark_results] + needs: [ubuntu2204_build] if: ${{ always() && !cancelled() && needs.ubuntu2204_build.outputs.build_conclusion == 'success' }} strategy: - fail-fast: false matrix: include: - - name: Run compute-benchmarks on L0 PVC + - ref: ${{ github.sha }} + save_name: Baseline runner: '["PVC_PERF"]' - image_options: -u 1001 --device=/dev/dri -v /dev/dri/by-path:/dev/dri/by-path --privileged --cap-add SYS_ADMIN - target_devices: level_zero:gpu + backend: 'level_zero:gpu' + preset: Minimal uses: ./.github/workflows/sycl-linux-run-tests.yml secrets: inherit with: - name: ${{ matrix.name }} + name: Run compute-benchmarks (${{ matrix.runner }}, ${{ matrix.backend }}) runner: ${{ matrix.runner }} - image_options: ${{ matrix.image_options }} - target_devices: ${{ matrix.target_devices }} - tests_selector: compute-benchmarks - repo_ref: ${{ github.sha }} + image: ghcr.io/intel/llvm/sycl_ubuntu2404_nightly:latest + image_options: -u 1001 --device=/dev/dri -v /dev/dri/by-path:/dev/dri/by-path --privileged --cap-add SYS_ADMIN + target_devices: ${{ matrix.backend }} + tests_selector: benchmarks + benchmark_upload_results: true + benchmark_save_name: ${{ matrix.save_name }} + benchmark_preset: ${{ matrix.preset }} + repo_ref: ${{ matrix.ref }} sycl_toolchain_artifact: sycl_linux_default sycl_toolchain_archive: ${{ needs.ubuntu2204_build.outputs.artifact_archive_name }} sycl_toolchain_decompress_command: ${{ needs.ubuntu2204_build.outputs.artifact_decompress_command }} diff --git a/.github/workflows/sycl-ur-perf-benchmarking.yml b/.github/workflows/sycl-ur-perf-benchmarking.yml index 23fbb1ad903b4..c8900baa78368 100644 --- a/.github/workflows/sycl-ur-perf-benchmarking.yml +++ b/.github/workflows/sycl-ur-perf-benchmarking.yml @@ -1,12 +1,138 @@ -name: Benchmarks +name: Run Benchmarks -# This workflow is a WIP: this workflow file acts as a placeholder. +on: + workflow_call: + inputs: + preset: + type: string + description: | + Benchmark presets to run: See /devops/scripts/benchmarks/presets.py + required: false + default: 'Minimal' # Only compute-benchmarks + pr_no: + type: string + description: | + PR no. to build SYCL from if specified: SYCL will be built from HEAD + of incoming branch used by the specified PR no. -on: [ workflow_dispatch ] + If both pr_no and commit_hash are empty, the latest SYCL nightly build + will be used. + required: false + default: '' + commit_hash: + type: string + description: | + Commit hash (within intel/llvm) to build SYCL from if specified. + + If both pr_no and commit_hash are empty, the latest commit in + deployment branch will be used. + required: false + default: '' + upload_results: + type: string # true/false: workflow_dispatch does not support booleans + required: true + runner: + type: string + required: true + backend: + type: string + required: true + + workflow_dispatch: + inputs: + preset: + type: choice + description: | + Benchmark presets to run, See /devops/scripts/benchmarks/presets.py. Hint: Minimal is compute-benchmarks only. + options: + - Full + - SYCL + - Minimal + - Normal + - Test + default: 'Minimal' # Only compute-benchmarks + pr_no: + type: string + description: | + PR no. to build SYCL from: + + SYCL will be built from HEAD of incoming branch. + required: false + default: '' + commit_hash: + type: string + description: | + Commit hash (within intel/llvm) to build SYCL from: + + Leave both pr_no and commit_hash empty to use latest commit. + required: false + default: '' + upload_results: + description: 'Save and upload results' + type: choice + options: + - false + - true + default: true + runner: + type: choice + options: + - '["PVC_PERF"]' + backend: + description: Backend to use + type: choice + options: + - 'level_zero:gpu' + - 'level_zero_v2:gpu' + # As of #17407, sycl-linux-build now builds v2 by default + +permissions: read-all jobs: - do-nothing: - runs-on: ubuntu-latest - steps: - - run: echo 'This workflow is a WIP.' + build_sycl: + name: Build SYCL + uses: ./.github/workflows/sycl-linux-build.yml + with: + build_ref: | + ${{ + inputs.commit_hash != '' && inputs.commit_hash || + inputs.pr_no != '' && format('refs/pull/{0}/head', inputs.pr_no) || + github.ref + }} + build_cache_root: "/__w/" + build_artifact_suffix: "prod_noassert" + build_cache_suffix: "prod_noassert" + build_configure_extra_args: "--no-assertions" + build_image: "ghcr.io/intel/llvm/sycl_ubuntu2404_nightly:latest" + cc: clang + cxx: clang++ + changes: '[]' + run_benchmarks_build: + name: Run Benchmarks on Build + needs: [ build_sycl ] + strategy: + matrix: + include: + - ref: ${{ inputs.commit_hash != '' && inputs.commit_hash || format('refs/pull/{0}/head', inputs.pr_no) }} + save_name: ${{ inputs.commit_hash != '' && format('Commit{0}', inputs.commit_hash) || format('PR{0}', inputs.pr_no) }} + # Set default values if not specified: + runner: ${{ inputs.runner || '["PVC_PERF"]' }} + backend: ${{ inputs.backend || 'level_zero:gpu' }} + uses: ./.github/workflows/sycl-linux-run-tests.yml + secrets: inherit + with: + name: Run compute-benchmarks (${{ matrix.save_name }}, ${{ matrix.runner }}, ${{ matrix.backend }}) + runner: ${{ matrix.runner }} + image: ghcr.io/intel/llvm/sycl_ubuntu2404_nightly:latest + image_options: -u 1001 --device=/dev/dri -v /dev/dri/by-path:/dev/dri/by-path --privileged --cap-add SYS_ADMIN + target_devices: ${{ matrix.backend }} + tests_selector: benchmarks + benchmark_upload_results: ${{ inputs.upload_results }} + benchmark_save_name: ${{ matrix.save_name }} + benchmark_preset: ${{ inputs.preset }} + repo_ref: ${{ matrix.ref }} + devops_ref: ${{ github.ref }} + sycl_toolchain_artifact: sycl_linux_prod_noassert + sycl_toolchain_archive: ${{ needs.build_sycl.outputs.artifact_archive_name }} + sycl_toolchain_decompress_command: ${{ needs.build_sycl.outputs.artifact_decompress_command }} diff --git a/devops/actions/benchmarking/aggregate/action.yml b/devops/actions/benchmarking/aggregate/action.yml deleted file mode 100644 index c062636684b1f..0000000000000 --- a/devops/actions/benchmarking/aggregate/action.yml +++ /dev/null @@ -1,95 +0,0 @@ -name: 'Aggregate compute-benchmark results and produce historical averages' - -# The benchmarking workflow in sycl-linux-run-tests.yml passes or fails based on -# how the benchmark results compare to a historical average: This historical -# average is calculated in this composite workflow, which aggregates historical -# data and produces measures of central tendency (median in this case) used for -# this purpose. -# -# This action assumes that /devops has been checked out in ./devops. This action -# also assumes that GITHUB_TOKEN was properly set in env, because according to -# Github, that's apparently the recommended way to pass a secret into a github -# action: -# -# https://docs.github.com/en/actions/security-for-github-actions/security-guides/using-secrets-in-github-actions#accessing-your-secrets -# - -inputs: - lookback_days: - type: number - required: true - -runs: - using: "composite" - steps: - - name: Obtain oldest timestamp allowed for data in aggregation - shell: bash - run: | - # DO NOT use inputs.lookback_days directly, only use SANITIZED_TIMESTAMP. - SANITIZED_LOOKBACK_DAYS="$(echo '${{ inputs.lookback_days }}' | grep -oE '^[0-9]+$')" - if [ -z "$SANITIZED_LOOKBACK_DAYS" ]; then - echo "Please ensure inputs.lookback_days is a number." - exit 1 - fi - SANITIZED_TIMESTAMP="$(date -d "$SANITIZED_LOOKBACK_DAYS days ago" +%Y%m%d_%H%M%S)" - if [ -z "$(echo "$SANITIZED_TIMESTAMP" | grep -oE '^[0-9]{8}_[0-9]{6}$' )" ]; then - echo "Invalid timestamp generated: is inputs.lookback_days valid?" - exit 1 - fi - echo "SANITIZED_TIMESTAMP=$SANITIZED_TIMESTAMP" >> $GITHUB_ENV - - name: Load benchmarking configuration - shell: bash - run: | - $(python ./devops/scripts/benchmarking/load_config.py ./devops constants) - echo "SANITIZED_PERF_RES_GIT_REPO=$SANITIZED_PERF_RES_GIT_REPO" >> $GITHUB_ENV - echo "SANITIZED_PERF_RES_GIT_BRANCH=$SANITIZED_PERF_RES_GIT_BRANCH" >> $GITHUB_ENV - - name: Checkout historical performance results repository - shell: bash - run: | - if [ ! -d ./llvm-ci-perf-results ]; then - git clone -b "$SANITIZED_PERF_RES_GIT_BRANCH" "https://github.com/$SANITIZED_PERF_RES_GIT_REPO" ./llvm-ci-perf-results - fi - - name: Run aggregator on historical results - shell: bash - run: | - # The current format of the historical results respository is: - # - # /// - # - # Thus, a min/max depth of 3 is used to enumerate all test cases in the - # repository. Test name is also derived from here. - find ./llvm-ci-perf-results -mindepth 3 -maxdepth 3 -type d ! -path '*.git*' | - while read -r dir; do - test_name="$(basename "$dir")" - python ./devops/scripts/benchmarking/aggregate.py ./devops "$test_name" "$dir" "$SANITIZED_TIMESTAMP" - done - - name: Upload average to the repo - shell: bash - run: | - cd ./llvm-ci-perf-results - git config user.name "SYCL Benchmarking Bot" - git config user.email "sys_sycl_benchmarks@intel.com" - git pull - # Make sure changes have been made - if git diff --quiet && git diff --cached --quiet; then - echo "No changes to median, skipping push." - else - git add . - git commit -m "[GHA] Aggregate median data from $SANITIZED_TIMESTAMP to $(date +%Y%m%d_%H%M%S)" - git push "https://$GITHUB_TOKEN@github.com/$SANITIZED_PERF_RES_GIT_REPO.git" "$SANITIZED_PERF_RES_GIT_BRANCH" - fi - - name: Find aggregated average results artifact here - if: always() - shell: bash - run: | - cat << EOF - # - # Artifact link for aggregated averages here: - # - EOF - - name: Archive new medians - if: always() - uses: actions/upload-artifact@v4 - with: - name: llvm-ci-perf-results new medians - path: ./llvm-ci-perf-results/**/*-median.csv diff --git a/devops/actions/run-tests/benchmark/action.yml b/devops/actions/run-tests/benchmark/action.yml index e357e2bddec30..182e08422b9dd 100644 --- a/devops/actions/run-tests/benchmark/action.yml +++ b/devops/actions/run-tests/benchmark/action.yml @@ -1,24 +1,30 @@ -name: 'Run compute-benchmarks' - -# Run compute-benchmarks on SYCL -# -# This action assumes SYCL is in ./toolchain, and that /devops has been -# checked out in ./devops. This action also assumes that GITHUB_TOKEN -# was properly set in env, because according to Github, that's apparently the -# recommended way to pass a secret into a github action: -# -# https://docs.github.com/en/actions/security-for-github-actions/security-guides/using-secrets-in-github-actions#accessing-your-secrets +name: 'Run benchmarks' + +# This action assumes the following prerequisites: # -# This action also expects a RUNNER_TAG environment variable to be set to the -# runner tag used to run this workflow: Currently, only gen12 and pvc on Linux -# are fully supported. Although this workflow won't stop you from running other -# devices, note that only gen12 and pvc has been tested to work. +# - SYCL is placed in ./toolchain -- TODO change this +# - /devops has been checked out in ./devops. +# - env.GITHUB_TOKEN was properly set, because according to Github, that's +# apparently the recommended way to pass a secret into a github action: + +# https://docs.github.com/en/actions/security-for-github-actions/security-guides/using-secrets-in-github-actions#accessing-your-secrets # +# - env.RUNNER_TAG set to the runner tag used to run this workflow: Currently, +# only specific runners are fully supported. inputs: target_devices: type: string required: True + upload_results: + type: string + required: True + save_name: + type: string + required: True + preset: + type: string + required: True runs: using: "composite" @@ -27,16 +33,24 @@ runs: shell: bash env: TARGET_DEVICE: ${{ inputs.target_devices }} + PRESET: ${{ inputs.preset }} run: | case "$RUNNER_TAG" in - '["Linux", "gen12"]' | '["Linux", "pvc"]') ;; + '["PVC_PERF"]' ) ;; *) echo "#" - echo "# WARNING: Only gen12/pvc on Linux is fully supported." + echo "# WARNING: Only specific tuned runners are fully supported." echo "# This workflow is not guaranteed to work with other runners." echo "#" ;; esac + # Ensure runner name has nothing injected + # TODO: in terms of security, is this overkill? + if [ -z "$(printf '%s' "$RUNNER_NAME" | grep -oE '^[a-zA-Z0-9_-]+$')" ]; then + echo "Bad runner name, please ensure runner name is [a-zA-Z0-9_-]." + exit 1 + fi + # input.target_devices is not directly used, as this allows code injection case "$TARGET_DEVICE" in level_zero:*) ;; @@ -46,11 +60,15 @@ runs: echo "# This workflow is not guaranteed to work with other backends." echo "#" ;; esac + echo "ONEAPI_DEVICE_SELECTOR=$TARGET_DEVICE" >> $GITHUB_ENV + + # Make sure specified preset is a known value and is not malicious + python3 ./devops/scripts/benchmarks/presets.py query "$PRESET" + [ "$?" -ne 0 ] && exit 1 # Stop workflow if invalid preset + echo "PRESET=$PRESET" >> $GITHUB_ENV - name: Compute CPU core range to run benchmarks on shell: bash run: | - # Taken from ur-benchmark-reusable.yml: - # Compute the core range for the first NUMA node; second node is used by # UMF. Skip the first 4 cores as the kernel is likely to schedule more # work on these. @@ -67,62 +85,131 @@ runs: ZE_AFFINITY_MASK=0 echo "ZE_AFFINITY_MASK=$ZE_AFFINITY_MASK" >> $GITHUB_ENV + - name: Checkout results repo + shell: bash + run: | + git clone -b unify-ci https://github.com/intel/llvm-ci-perf-results - name: Run compute-benchmarks + env: + # Need to append "__" to save name in order to follow + # conventions: + SAVE_PREFIX: ${{ inputs.save_name }} shell: bash run: | - cat << EOF - # - # NOTE TO DEVELOPERS: - # - - Check latter steps of the workflow: This job produces an artifact with: - - benchmark results from passing/failing tests - - log containing all failing (too slow) benchmarks - - log containing all erroring benchmarks - - While this step in the workflow provides debugging output describing this - information, it might be easier to inspect the logs from the artifact - instead. - - EOF - export ONEAPI_DEVICE_SELECTOR="${{ inputs.target_devices }}" + # TODO generate summary + display helpful message here export CMPLR_ROOT=./toolchain echo "-----" sycl-ls echo "-----" - taskset -c "$CORES" ./devops/scripts/benchmarking/benchmark.sh -n '${{ runner.name }}' -s || exit 1 - - name: Push compute-benchmarks results + # Using --break-system-packages because: + # - venv is not installed + # - unable to install anything via pip, as python packages in the docker + # container are managed by apt + # - apt is unable to install anything due to unresolved dpkg dependencies, + # as a result of how the sycl nightly images are created + pip install --user --break-system-packages -r ./devops/scripts/benchmarks/requirements.txt + echo "-----" + + # clang builds have git repo / commit hashes in their --version output, + # same goes for dpcpp. Obtain git repo / commit hash info this way: + + # First line of --version is formatted 'clang version ... ( )' + # thus we parse for ( ): + sycl_git_info="$(clang++ --version | head -n 1 | grep -oE '\([^ ]+ [a-f0-9]+\)$' | tr -d '()')" + if [ -z "$sycl_git_info" ]; then + echo "Error: Unable to deduce SYCL build source repo/commit: Are you sure dpcpp variable is in PATH?" + exit 1 + fi + sycl_git_repo="$(printf "$sycl_git_info" | cut -d' ' -f1)" + sycl_git_commit="$(printf "$sycl_git_info" | cut -d' ' -f2)" + + case "$ONEAPI_DEVICE_SELECTOR" in + level_zero:*) SAVE_SUFFIX="L0" ;; + level_zero_v2:*) SAVE_SUFFIX="L0v2" ;; + opencl:*) SAVE_SUFFIX="OCL" ;; + *) SAVE_SUFFIX="${ONEAPI_DEVICE_SELECTOR%%:*}";; + esac + # TODO accomodate for different GPUs and backends + SAVE_NAME="${SAVE_PREFIX}_PVC_${SAVE_SUFFIX}" + SAVE_TIMESTAMP="$(date -u +'%Y%m%d_%H%M%S')" # Timestamps are in UTC time + + taskset -c "$CORES" ./devops/scripts/benchmarks/main.py \ + "$(realpath ./llvm_test_workdir)" \ + --sycl "$(realpath ./toolchain)" \ + --save "$SAVE_NAME" \ + --output-html remote \ + --results-dir "./llvm-ci-perf-results/" \ + --output-dir "./llvm-ci-perf-results/" \ + --preset "$PRESET" \ + --timestamp-override "$SAVE_TIMESTAMP" \ + --github-repo "$sycl_git_repo" \ + --git-commit "$sycl_git_commit" + echo "-----" + python3 ./devops/scripts/benchmarks/compare.py to_hist \ + --name "$SAVE_NAME" \ + --compare-file "./llvm-ci-perf-results/results/${SAVE_NAME}_${SAVE_TIMESTAMP}.json" \ + --results-dir "./llvm-ci-perf-results/results/" + echo "-----" + + - name: Cache changes to benchmark folder for archival purposes if: always() shell: bash + run: | + cd "./llvm-ci-perf-results" + git add . + for diff in $(git diff HEAD --name-only); do + mkdir -p "../cached_changes/$(dirname $diff)" + cp "$diff" "../cached_changes/$diff" + done + - name: Push benchmarks results + if: inputs.upload_results == 'true' && always() + shell: bash run: | - # TODO -- waiting on security clearance - # Load configuration values - $(python ./devops/scripts/benchmarking/load_config.py ./devops constants) - cd "./llvm-ci-perf-results" git config user.name "SYCL Benchmarking Bot" git config user.email "sys_sycl_benchmarks@intel.com" - git pull - git add . - # Make sure changes have been made + results_branch="unify-ci" + if git diff --quiet && git diff --cached --quiet; then echo "No new results added, skipping push." - else - git commit -m "[GHA] Upload compute-benchmarks results from https://github.com/intel/llvm/actions/runs/${{ github.run_id }}" - git push "https://$GITHUB_TOKEN@github.com/$SANITIZED_PERF_RES_GIT_REPO.git" "$SANITIZED_PERF_RES_GIT_BRANCH" + exit 0 fi - - name: Find benchmark result artifact here - if: always() - shell: bash - run: | - cat << EOF - # - # Artifact link for benchmark results here: - # - EOF - - name: Archive compute-benchmark results + + for attempt in 1 2 3; do + echo "Attempt $attempt to push new results" + git add . + git commit -m "[GHA] Upload compute-benchmarks results from https://github.com/intel/llvm/actions/runs/${{ github.run_id }}" + results_file="$(git diff HEAD~1 --name-only -- results/ | head -n 1)" + + if git push "https://$GITHUB_TOKEN@github.com/intel/llvm-ci-perf-results.git" "$results_branch"; then + echo "Push succeeded" + break + fi + + echo "Push failed, retrying..." + if [ -n "$results_file" ]; then + cached_result="$(mktemp -d)/$(basename $results_file)" + mv "$results_file" "$cached_result" + + git reset --hard "origin/$results_branch" + git pull origin "$results_branch" + + mv "$cached_result" "$results_file" + fi + + echo "Regenerating data.json..." + cd ../ + ./devops/scripts/benchmarks/main.py \ + "$(realpath ./llvm_test_workdir)" \ + --output-html remote \ + --results-dir "./llvm-ci-perf-results/" \ + --output-dir "./llvm-ci-perf-results/" \ + --dry-run + cd - + done + - name: Archive benchmark results if: always() uses: actions/upload-artifact@v4 with: - name: Compute-benchmark run ${{ github.run_id }} (${{ runner.name }}) - path: ./artifact + name: Benchmark run ${{ github.run_id }} (${{ runner.name }}) + path: ./cached_changes diff --git a/devops/benchmarking/config.ini b/devops/benchmarking/config.ini deleted file mode 100644 index 988d1d9f08af9..0000000000000 --- a/devops/benchmarking/config.ini +++ /dev/null @@ -1,44 +0,0 @@ -; -; This file contains configuration options to change the behaviour of the -; benchmarking workflow in sycl-linux-run-tests.yml. -; -; DO NOT USE THE CONTENTS OF THIS FILE DIRECTLY -- Due to security concerns, The -; contents of this file must be sanitized first before use. -; See: /devops/scripts/benchmarking/common.py -; - -; Compute-benchmark compile/run options -[compute_bench] -; Value for -j during compilation of compute-benchmarks -compile_jobs = 40 -; Number of iterations to run compute-benchmark tests -iterations = 5000 - -; Options for benchmark result metrics (to record/compare against) -[metrics] -; Sets the metrics to record/aggregate in the historical average. -; Format: comma-separated list of column names in compute-benchmark results -recorded = Median,StdDev -; Sets the tolerance for each recorded metric and their allowed deviation from -; the historical average. Metrics not included here are not compared against -; when passing/failing benchmark results. -; Format: comma-separated list of : -tolerances = Median:0.08 - -; Options for computing historical averages -[average] -; Number of days (from today) to look back for results when computing historical -; average -cutoff_range = 7 -; Minimum number of samples required to compute a historical average -min_threshold = 10 - -; ONEAPI_DEVICE_SELECTOR linting/options -[device_selector] -; Backends to allow in device_selector -enabled_backends = level_zero,opencl,cuda,hip -; native_cpu is disabled - -; Devices to allow in device_selector -enabled_devices = cpu,gpu -; fpga is disabled diff --git a/devops/benchmarking/constants.ini b/devops/benchmarking/constants.ini deleted file mode 100644 index 9281ece8f4950..0000000000000 --- a/devops/benchmarking/constants.ini +++ /dev/null @@ -1,48 +0,0 @@ -; -; This file defines constants used throughout the benchmarking workflow in -; sycl-linux-run-tests.yml. If you're trying to change the behavior of this -; workflow, you're likely looking for /devops/benchmarking/config.ini instead. -; -; DO NOT USE THE CONTENTS OF THIS FILE DIRECTLY -- Due to security concerns, The -; contents of this file must be sanitized first before use. -; See: /devops/scripts/benchmarking/common.py -; - -; Constants for compute-benchmarks -[compute_bench] -git_repo = intel/compute-benchmarks -git_branch = master -git_commit = 230a3db4d8d03c0e9a663988f7c3abbd1137a1e0 -; path = ./compute-benchmarks - -; Constants for git repo storing benchmark performance results -[perf_res] -git_repo = intel/llvm-ci-perf-results -git_branch = main -; Path to clone performance result repo -; path = ./llvm-ci-perf-results - -; It was decided that paths should be hardcoded throughout this workflow for -; security reasons and ease of readability. Do not use paths as constants. - -; ; Constants for artifacts -; [artifact] -; ; Path to root folder storing benchmark CI artifact -; path = ./artifact -; ; Path (relative to artifact.path) to cache compute-benchmark results -; ; -; ; If a test result does not get moved out of this catch-all cache path, it is -; ; considered to have failed -; output_cache = ./artifact/failed_tests -; ; Path (relative to artifact.path) to cache passing compute-benchmark results -; passing_cache = ./artifact/passing_tests - -; [timestamp] -; ; Timestamp format used for -; format = %%Y%%m%%d_%%H%%M%%S - -; [benchmark_log] -; ; Log file for test cases that perform over the allowed variance -; slow = ./artifact/benchmarks_failed.log -; ; Log file for test cases that errored / failed to build -; error = ./artifact/benchmarks_errored.log diff --git a/devops/benchmarking/enabled_tests.conf b/devops/benchmarking/enabled_tests.conf deleted file mode 100644 index 20659cbea636d..0000000000000 --- a/devops/benchmarking/enabled_tests.conf +++ /dev/null @@ -1,8 +0,0 @@ -# Test cases to be enabled: -api_overhead_benchmark_sycl -memory_benchmark_sycl -miscellaneous_benchmark_sycl -ulls_benchmark_sycl - -# As of January 2025, these are every compute-benchmark tests with a SYCL -# implementation. diff --git a/devops/scripts/benchmarking/aggregate.py b/devops/scripts/benchmarking/aggregate.py deleted file mode 100644 index f62a8ffed83c5..0000000000000 --- a/devops/scripts/benchmarking/aggregate.py +++ /dev/null @@ -1,205 +0,0 @@ -import csv -import sys -from pathlib import Path -import heapq -import statistics -from common import Validate, SanitizedConfig -from abc import ABC, abstractmethod -import os - - -class Aggregator(ABC): - """ - Aggregator classes used to "aggregate" a pool of elements, and produce an - "average" (precisely, some "measure of central tendency") from the elements. - """ - - @staticmethod - @abstractmethod - def get_type() -> str: - """ - Return a string indicating the type of average this aggregator - produces. - """ - pass - - @abstractmethod - def add(self, n: float): - """ - Add/aggregate an element to the pool of elements used by this aggregator - to produce an average calculation. - """ - pass - - @abstractmethod - def get_avg(self) -> float: - """ - Produce an average from the pool of elements aggregated using add(). - """ - pass - - -class SimpleMedian(Aggregator): - """ - Simple median calculation: if the number of samples being generated are low, - this is the fastest median method. - """ - - def __init__(self): - self.elements = [] - - @staticmethod - def get_type() -> str: - return "median" - - def add(self, n: float): - self.elements.append(n) - - def get_avg(self) -> float: - return statistics.median(self.elements) - - -class StreamingMedian(Aggregator): - """ - Calculate medians incrementally using heaps: Theoretically the fastest way - to calculate a median from a stream of elements, but realistically is only - faster when dealing with huge numbers of samples that would be generated by - i.e. enabling this workflow in precommit and using longer periods of time. - """ - - def __init__(self): - # Gist: we keep a minheap and a maxheap, and store the median as the top - # of the minheap. When a new element comes it gets put into the heap - # based on if the element is bigger than the current median. Then, the - # heaps are heapified and the median is repopulated by heapify. - self.minheap_larger = [] - self.maxheap_smaller = [] - - @staticmethod - def get_type() -> str: - return "median" - - # Note: numbers on maxheap should be negative, as heapq - # is minheap by default - - def add(self, n: float): - if len(self.maxheap_smaller) == 0 or -self.maxheap_smaller[0] >= n: - heapq.heappush(self.maxheap_smaller, -n) - else: - heapq.heappush(self.minheap_larger, n) - - # Ensure minheap has more elements than maxheap - if len(self.maxheap_smaller) > len(self.minheap_larger) + 1: - heapq.heappush(self.minheap_larger, -heapq.heappop(self.maxheap_smaller)) - elif len(self.maxheap_smaller) < len(self.minheap_larger): - heapq.heappush(self.maxheap_smaller, -heapq.heappop(self.minheap_larger)) - - def get_avg(self) -> float: - if len(self.maxheap_smaller) == len(self.minheap_larger): - # Equal number of elements smaller and larger than "median": - # thus, there are two median values. The median would then become - # the average of both median values. - return (-self.maxheap_smaller[0] + self.minheap_larger[0]) / 2.0 - else: - # Otherwise, median is always in minheap, as minheap is always - # bigger - return -self.maxheap_smaller[0] - - -class Aggregate: - """ - Static class providing methods for aggregating data - """ - - @staticmethod - def hist_avg( - benchmark_name: str, res_dir: str, cutoff: str, aggregator=SimpleMedian - ): - if not os.path.isdir(res_dir): - print(f"Not a directory: {res_dir}.", file=sys.stderr) - exit(1) - - def get_csv_samples() -> list[str]: - """Get all valid .csv samples from the results folder.""" - cache_dir = Path(f"{res_dir}") - # Filter all benchmark .csv files in the result directory: - return list( - filter( - # Make sure the .csv "file" is a file: - lambda f: f.is_file() - # Make sure timestamp of .csv file is good format: - # [-19:-4] corresponds to the timestamp in the filename. - and Validate.timestamp(str(f)[-19:-4]) - # Make sure timestamp is bigger than cutoff timestamp: - and str(f)[-19:-4] > cutoff, - cache_dir.glob(f"{benchmark_name}-*_*.csv"), - ) - ) - - # Calculate median of every desired metric: - samples_aggregate = dict() - filtered_samples = get_csv_samples() - if len(filtered_samples) == 0: - print( - f"WARNING: No results for {benchmark_name} found from {cutoff} to now", - file=sys.stderr, - ) - for sample_path in filtered_samples: - with open(sample_path, "r") as sample_file: - for sample in csv.DictReader(sample_file): - test = sample["TestCase"] - # Construct entry in aggregator for test if it doesn't exist - # already: - if test not in samples_aggregate: - samples_aggregate[test] = { - metric: aggregator() - for metric in SanitizedConfig.METRICS_TOLERANCES - } - - # For each metric of concern, add to aggregator: - for metric in SanitizedConfig.METRICS_TOLERANCES: - sample_value = Validate.sanitize_stat(sample[metric]) - if not isinstance(sample_value, float): - print( - f"Malformatted statistic in {str(sample_path)}: " - + f"'{sample[metric]}' for {test}." - ) - exit(1) - # Add metric from sample for current test to aggregate: - samples_aggregate[test][metric].add(sample_value) - - # Calculate + write new average (from samples_aggregate) in new .csv file: - with open( - f"{res_dir}/{benchmark_name}-{aggregator.get_type()}.csv", "w" - ) as output_csv: - writer = csv.DictWriter( - output_csv, - fieldnames=["TestCase", *SanitizedConfig.METRICS_TOLERANCES.keys()], - ) - writer.writeheader() - for test in samples_aggregate: - writer.writerow( - {"TestCase": test} - | { - metric: samples_aggregate[test][metric].get_avg() - for metric in SanitizedConfig.METRICS_TOLERANCES - } - ) - - -if __name__ == "__main__": - if len(sys.argv) != 5: - print( - f"Usage: {sys.argv[0]} " - ) - exit(1) - if not Validate.timestamp(sys.argv[4]): - print(f"Bad cutoff timestamp, please use YYYYMMDD_HHMMSS.", file=sys.stderr) - exit(1) - if not Validate.filepath(sys.argv[1]): - print(f"Not a valid filepath: {sys.argv[1]}", file=sys.stderr) - exit(1) - # If the filepath provided passed filepath validation, then it is clean - SanitizedConfig.load(sys.argv[1]) - - Aggregate.hist_avg(sys.argv[2], sys.argv[3], sys.argv[4]) diff --git a/devops/scripts/benchmarking/benchmark.sh b/devops/scripts/benchmarking/benchmark.sh deleted file mode 100755 index bbfd669774f9a..0000000000000 --- a/devops/scripts/benchmarking/benchmark.sh +++ /dev/null @@ -1,300 +0,0 @@ -#!/bin/sh - -# -# benchmark.sh: Benchmark dpcpp using compute-benchmarks -# - -usage () { - >&2 echo "Usage: $0 -t [-B ] - -n Github runner name -- Required - -c Clean up working directory - -C Clean up working directory and exit - -s Cache results - -This script builds and runs benchmarks from compute-benchmarks." - exit 1 -} - -# Ensures test cases read from enabled_tests.conf contains no malicious content -_validate_testname () { - if [ -n "$(printf "%s" "$1" | sed "s/[a-zA-Z_]*//g")" ]; then - echo "Illegal characters in $TEST_CONFIG. Permitted characters: a-zA-Z_" - exit 1 - fi -} - -clone_perf_res() { - echo "### Cloning llvm-ci-perf-results ($SANITIZED_PERF_RES_GIT_REPO:$SANITIZED_PERF_RES_GIT_BRANCH) ###" - git clone -b "$SANITIZED_PERF_RES_GIT_BRANCH" "https://github.com/$SANITIZED_PERF_RES_GIT_REPO" ./llvm-ci-perf-results - [ "$?" -ne 0 ] && exit "$?" -} - -clone_compute_bench() { - echo "### Cloning compute-benchmarks ($SANITIZED_COMPUTE_BENCH_GIT_REPO:$SANITIZED_COMPUTE_BENCH_GIT_BRANCH) ###" - git clone -b "$SANITIZED_COMPUTE_BENCH_GIT_BRANCH" \ - --recurse-submodules "https://github.com/$SANITIZED_COMPUTE_BENCH_GIT_REPO" \ - ./compute-benchmarks - if [ ! -d "./compute-benchmarks" ]; then - echo "Failed to clone compute-benchmarks." - exit 1 - elif [ -n "$SANITIZED_COMPUTE_BENCH_GIT_COMMIT" ]; then - cd ./compute-benchmarks - git checkout "$SANITIZED_COMPUTE_BENCH_GIT_COMMIT" - if [ "$?" -ne 0 ]; then - echo "Failed to get compute-benchmarks commit '$SANITIZED_COMPUTE_BENCH_GIT_COMMIT'." - exit 1 - fi - cd - - fi -} - -build_compute_bench() { - echo "### Building compute-benchmarks ($SANITIZED_COMPUTE_BENCH_GIT_REPO:$SANITIZED_COMPUTE_BENCH_GIT_BRANCH) ###" - mkdir ./compute-benchmarks/build && cd ./compute-benchmarks/build && - # No reason to turn on ccache, if this docker image will be disassembled later on - cmake .. -DBUILD_SYCL=ON -DBUILD_L0=OFF -DBUILD=OCL=OFF -DCCACHE_ALLOWED=FALSE - # TODO enable mechanism for opting into L0 and OCL -- the concept is to - # subtract OCL/L0 times from SYCL times in hopes of deriving SYCL runtime - # overhead, but this is mostly an idea that needs to be mulled upon. - - if [ "$?" -eq 0 ]; then - while IFS= read -r case; do - # Skip lines starting with '#' - [ "${case##\#*}" ] || continue - - _validate_testname "$case" - make "-j$SANITIZED_COMPUTE_BENCH_COMPILE_JOBS" "$case" - done < "$TESTS_CONFIG" - fi - cd - -} - -# Check if the number of samples for a given test case is less than a threshold -# set in benchmark-ci.conf -# -# Usage: -samples_under_threshold () { - # Directory doesn't exist, samples automatically under threshold - [ ! -d "./llvm-ci-perf-results/$1" ] && return 0 - file_count="$(find "./llvm-ci-perf-results/$1" -maxdepth 1 -type f | wc -l )" - [ "$file_count" -lt "$SANITIZED_AVERAGE_MIN_THRESHOLD" ] -} - -# Check for a regression via compare.py -# -# Usage: check_regression -check_regression() { - csv_relpath="$(dirname "$1")" - csv_name="$(basename "$1")" - if samples_under_threshold "$csv_relpath"; then - echo "Not enough samples to construct a good average, performance\ - check skipped!" - return 0 # Success status - fi - python "$DEVOPS_PATH/scripts/benchmarking/compare.py" \ - "$DEVOPS_PATH" "$csv_relpath" "$csv_name" - return $? -} - -# Move the results of our benchmark into the git repo, and save benchmark -# results to artifact archive -# -# Usage: cache -cache() { - mkdir -p "$(dirname ./artifact/passing_tests/$1)" "$(dirname ./artifact/failed_tests/$1)" - cp "./artifact/failed_tests/$1" "./artifact/passing_tests/$1" - mkdir -p "$(dirname ./llvm-ci-perf-results/$1)" - mv "./artifact/failed_tests/$1" "./llvm-ci-perf-results/$1" -} - -# Check for a regression + cache if no regression found -# -# Usage: check_and_cache -check_and_cache() { - echo "Checking $1..." - if check_regression $1; then - if [ "$CACHE_RESULTS" -eq "1" ]; then - echo "Caching $1..." - cache $1 - fi - else - [ "$CACHE_RESULTS" -eq "1" ] && echo "Regression found -- Not caching!" - fi -} - -# Run and process the results of each enabled benchmark in enabled_tests.conf -process_benchmarks() { - echo "### Running and processing selected benchmarks ###" - if [ -z "$TESTS_CONFIG" ]; then - echo "Setting tests to run via cli is not currently supported." - exit 1 - else - rm ./artifact/benchmarks_errored.log ./artifact/benchmarks_failed.log 2> /dev/null - mkdir -p ./artifact - # Loop through each line of enabled_tests.conf, but ignore lines in the - # test config starting with #'s: - grep "^[^#]" "$TESTS_CONFIG" | while read -r testcase; do - _validate_testname "$testcase" - echo "# Running $testcase..." - - # The benchmark results git repo and this script's output both share - # the following directory structure: - # - # /// - # - # Instead of specifying 2 paths with a slightly different root - # folder name for every function we use, we can use a relative path - # to represent the file in both folders. - # - # Figure out the relative path of our testcase result: - test_dir_relpath="$DEVICE_SELECTOR_DIRNAME/$RUNNER/$testcase" - output_csv_relpath="$test_dir_relpath/$testcase-$TIMESTAMP.csv" - mkdir -p "./artifact/failed_tests/$test_dir_relpath" # Ensure directory exists - - # Tests are first placed in ./artifact/failed_tests, and are only - # moved to passing_tests or the performance results repo if the - # benchmark results are passing - output_csv="./artifact/failed_tests/$output_csv_relpath" - "./compute-benchmarks/build/bin/$testcase" --csv \ - --iterations="$SANITIZED_COMPUTE_BENCH_ITERATIONS" > "$output_csv" - - exit_status="$?" - if [ "$exit_status" -eq 0 ] && [ -s "$output_csv" ]; then - # Filter out header lines not in csv format: - tail +8 "$output_csv" > .tmp_res - mv .tmp_res "$output_csv" - check_and_cache $output_csv_relpath - else - echo "[ERROR] $testcase returned exit status $exit_status" - echo "-- $testcase: error $exit_status" >> ./artifact/benchmarks_errored.log - fi - done - fi -} - -# Handle failures + produce a report on what failed -process_results() { - fail=0 - if [ -s ./artifact/benchmarks_failed.log ]; then - printf "\n### Tests performing over acceptable range of average: ###\n" - cat ./artifact/benchmarks_failed.log - echo "" - fail=2 - fi - if [ -s ./artifact/benchmarks_errored.log ]; then - printf "\n### Tests that failed to run: ###\n" - cat ./artifact/benchmarks_errored.log - echo "" - fail=1 - fi - exit $fail -} - -cleanup() { - echo "### Cleaning up compute-benchmark builds from prior runs ###" - rm -rf ./compute-benchmarks - rm -rf ./llvm-ci-perf-results - [ ! -z "$_exit_after_cleanup" ] && exit -} - -load_configs() { - # This script needs to know where the intel/llvm "/devops" directory is, - # containing all the configuration files and the compare script. - # - # If this is not provided, this function tries to guess where the files - # are based on how the script is called, and verifies that all necessary - # configs and scripts are reachable. - - # This benchmarking script is usually at: - # - # /devops/scripts/benchmarking/benchmark.sh - # - # Derive /devops based on location of this script: - [ -z "$DEVOPS_PATH" ] && DEVOPS_PATH="$(dirname "$0")/../.." - if [ -z "$(printf '%s' "$DEVOPS_PATH" | grep -oE '^[a-zA-Z0-9._\/-]+$')" ]; then - echo "Bad DEVOPS_PATH, please specify DEVOPS_PATH variable." - exit 1 - fi - - TESTS_CONFIG="$(realpath "$DEVOPS_PATH/benchmarking/enabled_tests.conf")" - COMPARE_PATH="$(realpath "$DEVOPS_PATH/scripts/benchmarking/compare.py")" - LOAD_CONFIG_PY="$(realpath "$DEVOPS_PATH/scripts/benchmarking/load_config.py")" - - for file in \ - "$TESTS_CONFIG" "$COMPARE_PATH" "$LOAD_CONFIG_PY" - do - if [ ! -f "$file" ]; then - echo "Please provide path to /devops in DEVOPS_PATH." - exit -1 - fi - done - - $(python "$LOAD_CONFIG_PY" "$DEVOPS_PATH" config) - $(python "$LOAD_CONFIG_PY" "$DEVOPS_PATH" constants) -} - -##### - -load_configs - -COMPUTE_BENCH_COMPILE_FLAGS="" -CACHE_RESULTS="0" -# Timestamp format is YYYYMMDD_HHMMSS -TIMESTAMP="$(date +%Y%m%d_%H%M%S)" - -# CLI flags + overrides to configuration options: -while getopts "n:cCs" opt; do - case "$opt" in - n) - if [ -n "$(printf "%s" "$OPTARG" | sed "s/[a-zA-Z0-9_-]*//g")" ]; then - echo "Illegal characters in runner name." - exit 1 - fi - RUNNER="$OPTARG" - ;; - # Cleanup status is saved in a var to ensure all arguments are processed before - # performing cleanup - c) _cleanup=1 ;; - C) _cleanup=1 && _exit_after_cleanup=1 ;; - s) CACHE_RESULTS=1;; - \?) usage ;; - esac -done - -# Check all necessary variables exist: -if [ -z "$CMPLR_ROOT" ]; then - echo "Please set CMPLR_ROOT first; it is needed by compute-benchmarks to build." - exit 1 -elif [ -z "$ONEAPI_DEVICE_SELECTOR" ]; then - echo "Please set ONEAPI_DEVICE_SELECTOR first to specify which device to use." - exit 1 -elif [ -z "$RUNNER" ]; then - echo "Please specify runner name using -n first; it is needed for storing/comparing benchmark results." - exit 1 -fi - -# Make sure ONEAPI_DEVICE_SELECTOR doesn't try to enable multiple devices at the -# same time, or use specific device id's -_dev_sel_backend_re="$(echo "$SANITIZED_DEVICE_SELECTOR_ENABLED_BACKENDS" | sed 's/,/|/g')" -_dev_sel_device_re="$(echo "$SANITIZED_DEVICE_SELECTOR_ENABLED_DEVICES" | sed 's/,/|/g')" -_dev_sel_re="s/($_dev_sel_backend_re):($_dev_sel_device_re)//" -if [ -n "$(echo "$ONEAPI_DEVICE_SELECTOR" | sed -E "$_dev_sel_re")" ]; then - echo "Unsupported ONEAPI_DEVICE_SELECTOR value: please ensure only one \ -device is selected, and devices are not selected by indices." - echo "Enabled backends: $SANITIZED_DEVICE_SELECTOR_ENABLED_BACKENDS" - echo "Enabled device types: $SANITIZED_DEVICE_SELECTOR_ENABLED_DEVICES" - exit 1 -fi -# ONEAPI_DEVICE_SELECTOR values are not valid directory names in unix: this -# value lets us use ONEAPI_DEVICE_SELECTOR as actual directory names -DEVICE_SELECTOR_DIRNAME="$(echo "$ONEAPI_DEVICE_SELECTOR" | sed 's/:/-/')" - -# Clean up and delete all cached files if specified: -[ ! -z "$_cleanup" ] && cleanup -# Clone and build only if they aren't already cached/deleted: -[ ! -d ./llvm-ci-perf-results ] && clone_perf_res -[ ! -d ./compute-benchmarks ] && clone_compute_bench -[ ! -d ./compute-benchmarks/build ] && build_compute_bench -# Process benchmarks: -process_benchmarks -process_results \ No newline at end of file diff --git a/devops/scripts/benchmarking/common.py b/devops/scripts/benchmarking/common.py deleted file mode 100644 index c400b686db90f..0000000000000 --- a/devops/scripts/benchmarking/common.py +++ /dev/null @@ -1,196 +0,0 @@ -import re -import os -import sys -import string -import configparser - - -class Validate: - """Static class containing methods for validating various fields""" - - @staticmethod - def filepath(path: str) -> bool: - """ - Returns True if path is clean (no illegal characters), otherwise False. - """ - filepath_re = re.compile(r"[a-zA-Z0-9\/\._\-]+") - return filepath_re.match(path) is not None - - @staticmethod - def timestamp(t: str) -> bool: - """ - Returns True if t is in form YYYYMMDD_HHMMSS, otherwise False. - """ - timestamp_re = re.compile( - r"^\d{4}(0[1-9]|1[0-2])([0-2][0-9]|3[01])_([01][0-9]|2[0-3])[0-5][0-9][0-5][0-9]$" - ) - return timestamp_re.match(t) is not None - - @staticmethod - def sanitize_stat(stat: str) -> float: - """ - Sanitize statistics found in compute-benchmark output csv files. Returns - float if sanitized, None if not sanitizable. - """ - # Get rid of % - if stat[-1] == "%": - stat = stat[:-1] - - # Cast to float: If cast succeeds, the statistic is clean. - try: - return float(stat) - except ValueError: - return None - - -class SanitizedConfig: - """ - Static class for holding sanitized configuration values used within python. - - Configuration option names follow
_