Amend config loading to python, split aggregate workflow

ianayl · ianayl · commit 93456e087bd7 · 2025-02-03T09:01:33.000-08:00
diff --git a/.github/workflows/sycl-benchmark-aggregate.yml b/.github/workflows/sycl-benchmark-aggregate.yml
@@ -11,19 +11,16 @@ on:
     inputs:
       cutoff_timestamp:
         description: |
-          Timestamp indicating the age limit of data used in average calculation:
-          Any benchmark results created before this timestamp is excluded from
-          being aggregated. 
-          
-          Any valid date string supported by GNU coreutils is valid here:
-          https://www.gnu.org/software/coreutils/manual/html_node/Date-input-formats.html
+          Timestamp (YYYYMMDD_HHMMSS) indicating the age limit of data used in
+          average calculation: Any benchmark results created before this 
+          timestamp is excluded from being aggregated. 
         type: string
-        required: false
+        required: true
   workflow_call:
     inputs:
       cutoff_timestamp:
         type: string
-        required: false
+        required: true
 
 permissions:
   contents: read
@@ -35,74 +32,12 @@ jobs:
     steps:
     - uses: actions/checkout@v4
       with:
-        path: llvm
         sparse-checkout: |
           devops/scripts/benchmarking
           devops/benchmarking
-    - name: Load benchmarking configuration
-      run: |
-        CONFIG_FILE="$PWD/llvm/devops/benchmarking/benchmark-ci.conf"
-
-        # Load default values from configuration file
-        . "$PWD/llvm/devops/scripts/benchmarking/utils.sh"
-        # utils.sh contains functions to sanitize config file settings
-        load_config_constants "$CONFIG_FILE"
-        echo "PERF_RES_GIT_REPO=$PERF_RES_GIT_REPO" >> $GITHUB_ENV
-        echo "PERF_RES_BRANCH=$PERF_RES_BRANCH" >> $GITHUB_ENV
-        echo "PERF_RES_PATH=$PERF_RES_PATH" >> $GITHUB_ENV
-
-        # Determine a "cutoff timestamp" used by the aggregator script
-        #
-        # This timestamp controls which historical results are used to compute
-        # measures of central tendency: Any files timestamped *before* this time
-        # will be *excluded* from the central tendency calculation.
-
-        echo "TIMESTAMP_FORMAT=$TIMESTAMP_FORMAT" >> $GITHUB_ENV
-        if [ -z '${{ inputs.cutoff_timestamp }}' ]; then
-          # No time given, use default time period from config file:
-          echo "CUTOFF_TIMESTAMP=$(date --date="$AVERAGE_CUTOFF_RANGE" +"$TIMESTAMP_FORMAT")" >> $GITHUB_ENV
-        else
-          # If the provided time is a valid GNU coreutils date string, convert
-          # the time to our format:
-          _converted_timestamp="$(date --date '${{ inputs.cutoff_timestamp }}' +"$TIMESTAMP_FORMAT" 2> /dev/null)"
-          if [ -n "$_converted_timestamp" ]; then
-            echo "CUTOFF_TIMESTAMP=$_converted_timestamp" >> $GITHUB_ENV
-          else
-            # If not a valid GNU date string, it could be in our timestamp format already.
-            # aggregate.py will ensure the timestamp is in the proper format, so we can pass the
-            # time forward regardless: 
-            echo 'CUTOFF_TIMESTAMP=${{ inputs.cutoff_timestamp }}' >> $GITHUB_ENV
-          fi
-        fi
-    - name: Checkout historical performance results repository
-      run: |
-        git clone -b $PERF_RES_BRANCH https://github.com/$PERF_RES_GIT_REPO $PERF_RES_PATH
-    - name: Run aggregator on historical results
-      run: |
-        # The current format of the historical results respository is:
-        #
-        # /<ONEAPI_DEVICE_SELECTOR>/<runner>/<test name>
-        #
-        # Thus, a min/max depth of 3 is used to enumerate all test cases in the
-        # repository. Test name is also derived from here.
-        for dir in $(find "$PERF_RES_PATH" -mindepth 3 -maxdepth 3 -type d ! -path '*.git*'); do
-          test_name="$(basename $dir)"
-          python llvm/devops/scripts/benchmarking/aggregate.py "$test_name" "$dir" "$CUTOFF_TIMESTAMP"
-        done
-    - name: Upload average to the repo
-      env:
-        GITHUB_TOKEN: ${{ secrets.LLVM_SYCL_BENCHMARK_TOKEN }}
-      run: |
-        # TODO -- waiting on security clearance
-        cd "$PERF_RES_PATH"
-        git config user.name "SYCL Benchmarking Bot"
-        git config user.email "sys_sycl_benchmarks@intel.com"
-        git add .
-        git commit -m "[GHA] Aggregate median data from $CUTOFF_TIMESTAMP to $(date +"$TIMESTAMP_FORMAT")"
-        git push "https://$GITHUB_TOKEN@github.com/$PERF_RES_GIT_REPO.git" "$PERF_RES_BRANCH"
-    - name: Archive new medians
-      if: always()
-      uses: actions/upload-artifact@v4
+    - name: Aggregate benchmark results and produce historical average
+      uses: ./devops/actions/benchmarking/aggregate
       with:
-        name: llvm-ci-perf-results new medians
-        path: ${{ env.PERF_RES_PATH }}/**/*-median.csv
+        cutoff_timestamp: ${{ inputs.cutoff_timestamp }}
+      env:
+        GITHUB_TOKEN: ${{ secrets.LLVM_SYCL_BENCHMARK_TOKEN }}
diff --git a/devops/actions/benchmarking/aggregate/action.yml b/devops/actions/benchmarking/aggregate/action.yml
@@ -0,0 +1,84 @@
+name: 'Aggregate compute-benchmark results and produce historical averages'
+
+# The benchmarking workflow in sycl-linux-run-tests.yml passes or fails based on
+# how the benchmark results compare to a historical average: This historical
+# average is calculated in this workflow, which aggregates historical data and
+# produces measures of central tendency (median in this case) used for this
+# purpose.
+#
+# This action assumes that /devops has been checked out in ./devops. This action
+# also assumes that GITHUB_TOKEN was properly set in env, because according to
+# Github, that's apparently the recommended way to pass a secret into a github
+# action:
+#
+# https://docs.github.com/en/actions/security-for-github-actions/security-guides/using-secrets-in-github-actions#accessing-your-secrets
+#
+
+inputs:
+  cutoff_timestamp:
+    type: string
+    required: true
+
+runs:
+  using: "composite"
+  steps:
+  - name: Sanitize provided timestamp
+    run: |
+      # Sanitize the cutoff timestamp provided
+      #
+      # This timestamp controls which historical results are used to compute
+      # measures of central tendency: Any files timestamped *before* this time
+      # will be *excluded* from the central tendency calculation.
+
+      # Regex greps YYYYMMDD_HHMMSS
+      TIMESTAMP_RE='^[0-9]{4}(0[1-9]|1[0-2])([0-2][0-9]|3[01])_([01][0-9]|2[0-3])([0-5][0-9])([0-5][0-9])$'
+
+      # Note:
+      # - variables from input context should be surrounded with '' to prevent
+      #   remote code execution
+      # - `-o` needs to be kept in the grep command; `-o` is exact match only,
+      #   incase user somehow manages to inject extra text, `-o` should take
+      #   care of it.
+      # - DO NOT use input.cutoff_timestamp directly, only use SANITIZED_TIMESTAMP
+      SANITIZED_TIMESTAMP="$(echo '${{ input.cutoff_timestamp }}' | grep -oE "$TIMESTAM_RE")"
+      if [ -z "$SANITIZED_TIMESTAMP" ]; then
+        echo "Please ensure input in input.cutoff_timestamp is exactly YYYYMMDD_HHMMSS."
+        exit 1  # Terminate workflow
+      fi
+      echo "SANITIZED_TIMESTAMP=$SANITIZED_TIMESTAMP" >> $GITHUB_ENV
+  - name: Load benchmarking configuration
+    run: |
+      $(python ./devops/scripts/benchmarking/load_config.py ./devops constants)
+      echo "SANITIZED_PERF_RES_GIT_REPO=$SANITIZED_PERF_RES_GIT_REPO" >> $GITHUB_ENV
+      echo "SANITIZED_PERF_RES_GIT_BRANCH=$SANITIZED_PERF_RES_GIT_BRANCH" >> $GITHUB_ENV
+      echo "SANITIZED_PERF_RES_PATH=$SANITIZED_PERF_RES_PATH" >> $GITHUB_ENV
+  - name: Checkout historical performance results repository
+    run: |
+      git clone -b "$SANITIZED_PERF_RES_GIT_BRANCH" "https://github.com/$SANITIZED_PERF_RES_GIT_REPO" "$SANITIZED_PERF_RES_PATH"
+  - name: Run aggregator on historical results
+    run: |
+      # The current format of the historical results respository is:
+      #
+      # /<ONEAPI_DEVICE_SELECTOR>/<runner>/<test name>
+      #
+      # Thus, a min/max depth of 3 is used to enumerate all test cases in the
+      # repository. Test name is also derived from here.
+      for dir in "$(find "$SANITIZED_PERF_RES_PATH" -mindepth 3 -maxdepth 3 -type d ! -path '*.git*')"; do
+        test_name="$(basename "$dir")"
+        python ./devops/scripts/benchmarking/aggregate.py "$test_name" "$dir" "$SANITIZED_TIMESTAMP"
+      done
+  - name: Upload average to the repo
+    run: |
+      # TODO -- waiting on security clearance
+      cd "$SANITIZED_PERF_RES_PATH"
+      git config user.name "SYCL Benchmarking Bot"
+      git config user.email "sys_sycl_benchmarks@intel.com"
+      git add .
+      git commit -m "[GHA] Aggregate median data from $SANITIZED_TIMESTAMP to $(date +%Y%m%d_%H%M%S)"
+      git push "https://$GITHUB_TOKEN@github.com/$SANITIZED_PERF_RES_GIT_REPO.git" "$SANITIZED_PERF_RES_GIT_BRANCH"
+  - name: Archive new medians
+    if: always()
+    uses: actions/upload-artifact@v4
+    with:
+      name: llvm-ci-perf-results new medians
+      path: ${{ env.SANITIZED_PERF_RES_PATH }}/**/*-median.csv
diff --git a/devops/actions/run-tests/benchmark/action.yml b/devops/actions/run-tests/benchmark/action.yml
@@ -2,8 +2,8 @@ name: 'Run compute-benchmarks'
 
 # Run compute-benchmarks on SYCL
 # 
-# This action assumes SYCL is in $PWD/toolchain, and that /devops has been
-# checked out in $PWD/devops. This action also assumes that GITHUB_TOKEN
+# This action assumes SYCL is in ./toolchain, and that /devops has been
+# checked out in ./devops. This action also assumes that GITHUB_TOKEN
 # was properly set in env, because according to Github, that's apparently the
 # recommended way to pass a secret into a github action:
 #
@@ -17,6 +17,12 @@ inputs:
 runs:
   using: "composite"
   steps:
+  - name: (Test) aggregate benchmark results and produce historical average
+    uses: ./devops/actions/benchmarking/aggregate
+    with:
+      cutoff_timestamp: 20240101_000000
+    env:
+      GITHUB_TOKEN: ${{ env.GITHUB_TOKEN }}
   - name: Run compute-benchmarks
     shell: bash
     run: |
@@ -36,7 +42,8 @@ runs:
 
       EOF
       export ONEAPI_DEVICE_SELECTOR="${{ inputs.target_devices }}"
-      export CMPLR_ROOT=$PWD/toolchain
+      export CMPLR_ROOT=./toolchain
+      echo "-----"
       sycl-ls
       echo "-----"
       ./devops/scripts/benchmarking/benchmark.sh -n '${{ runner.name }}' -s
@@ -45,19 +52,17 @@ runs:
     run: |
       # TODO -- waiting on security clearance
       # Load configuration values
-      . "$PWD/devops/scripts/benchmarking/utils.sh"
-      CONFIG_FILE="$PWD/devops/benchmarking/benchmark-ci.conf"
-      load_config_constants "$CONFIG_FILE"
+      $(python ./devops/scripts/benchmarking/load_config.py ./devops constants)
 
-      cd "$PERF_RES_PATH"
+      cd "$SANITIZED_PERF_RES_PATH"
       git config user.name "SYCL Benchmarking Bot"
       git config user.email "sys_sycl_benchmarks@intel.com"
       git add .
       git commit -m "[GHA] Upload compute-benchmarks results from https://github.com/intel/llvm/actions/runs/${{ github.run_id }}"
-      git push "https://$GITHUB_TOKEN@github.com/$PERF_RES_GIT_REPO.git" "$PERF_RES_BRANCH"
+      git push "https://$GITHUB_TOKEN@github.com/$SANITIZED_PERF_RES_GIT_REPO.git" "$SANITIZED_PERF_RES_GIT_BRANCH"
   - name: Archive compute-benchmark results
     if: always()
     uses: actions/upload-artifact@v4
     with:
-      name: Compute-benchmark results (${{ runner.name }})
+      name: Compute-benchmark run ${{ github.run_id }} (${{ runner.name }})
       path: ./artifact
diff --git a/devops/scripts/benchmarking/benchmark.sh b/devops/scripts/benchmarking/benchmark.sh
@@ -23,22 +23,22 @@ This script builds and runs benchmarks from compute-benchmarks."
 clone_perf_res() {
     echo "### Cloning llvm-ci-perf-res ($SANITIZED_PERF_RES_GIT_REPO:$SANITIZED_PERF_RES_GIT_BRANCH) ###"
     mkdir -p "$(dirname "$SANITIZED_PERF_RES_PATH")"
-    git clone -b $SANITIZED_PERF_RES_GIT_BRANCH https://github.com/$SANITIZED_PERF_RES_GIT_REPO $SANITIZED_PERF_RES_PATH
+    git clone -b "$SANITIZED_PERF_RES_GIT_BRANCH" "https://github.com/$SANITIZED_PERF_RES_GIT_REPO" "$SANITIZED_PERF_RES_PATH"
     [ "$?" -ne 0 ] && exit $? 
 }
 
 clone_compute_bench() {
     echo "### Cloning compute-benchmarks ($SANITIZED_COMPUTE_BENCH_GIT_REPO:$SANITIZED_COMPUTE_BENCH_GIT_BRANCH) ###"
     mkdir -p "$(dirname "$SANITIZED_COMPUTE_BENCH_PATH")"
-    git clone -b $SANITIZED_COMPUTE_BENCH_GIT_BRANCH \
-              --recurse-submodules https://github.com/$SANITIZED_COMPUTE_BENCH_GIT_REPO \
-              $SANITIZED_COMPUTE_BENCH_PATH
-    [ "$?" -ne 0 ] && exit $? 
+    git clone -b "$SANITIZED_COMPUTE_BENCH_GIT_BRANCH" \
+              --recurse-submodules "https://github.com/$SANITIZED_COMPUTE_BENCH_GIT_REPO" \
+              "$SANITIZED_COMPUTE_BENCH_PATH"
+    [ "$?" -ne 0 ] && exit "$?"
 }
 
 build_compute_bench() {
     echo "### Building compute-benchmarks ($SANITIZED_COMPUTE_BENCH_GIT_REPO:$SANITIZED_COMPUTE_BENCH_GIT_BRANCH) ###"
-    mkdir $SANITIZED_COMPUTE_BENCH_PATH/build && cd $SANITIZED_COMPUTE_BENCH_PATH/build &&
+    mkdir "$SANITIZED_COMPUTE_BENCH_PATH/build" && cd "$SANITIZED_COMPUTE_BENCH_PATH/build" &&
     # No reason to turn on ccache, if this docker image will be disassembled later on
     cmake .. -DBUILD_SYCL=ON -DBUILD_L0=OFF -DBUILD=OCL=OFF -DCCACHE_ALLOWED=FALSE
     # TODO enable mechanism for opting into L0 and OCL -- the concept is to
@@ -58,38 +58,9 @@ build_compute_bench() {
             make "-j$SANITIZED_COMPUTE_BENCH_COMPILE_JOBS" "$case"
         done < "$TESTS_CONFIG"
     fi
-    #compute_bench_build_stat=$?
     cd -
-    #[ "$compute_bench_build_stat" -ne 0 ] && exit $compute_bench_build_stat 
 }
 
-# print_bench_res() {
-#     # Usage: print_bench_res <benchmark output .csv file> <benchmark status code> <summary file>
-#     if [ ! -s $1 ]; then
-#         printf "NO OUTPUT! (Status $2)\n" | tee -a $3
-#         return  # Do not proceed if file is empty
-#     fi
-#     
-#     get_csv_col_index $1 run-time-mean
-#     tmp_run_time_mean_i=$tmp_csv_col_i
-#     get_csv_col_index $1 run-time-median
-#     tmp_run_time_median_i=$tmp_csv_col_i
-#     get_csv_col_index $1 run-time-throughput
-#     tmp_run_time_throughput_i=$tmp_csv_col_i
-# 
-#     # `sycl-bench` output seems to like inserting the header multiple times.
-#     # Here we cache the header to make sure it prints only once:
-#     tmp_header_title="$(cat $1 | head -n 1 | sed 's/^\# Benchmark name/benchmark/')"
-#     tmp_result="$(cat $1 | grep '^[^\#]')"
-# 
-#     printf "%s\n%s" "$tmp_header_title" "$tmp_result"                  \
-#         | awk -F',' -v me="$tmp_run_time_mean_i"                       \
-#                     -v md="$tmp_run_time_median_i"                     \
-#                     -v th="$tmp_run_time_throughput_i"                 \
-#             '{printf "%-57s %-13s %-15s %-20s\n", $1, $me, $md, $th }' \
-#         | tee -a $3   # Print to summary file
-# }
-
 # Check if the number of samples for a given test case is less than a threshold
 # set in benchmark-ci.conf
 #
@@ -155,6 +126,7 @@ process_benchmarks() {
         # Loop through each line of enabled_tests.conf, but ignore lines in the
         # test config starting with #'s:
         grep "^[^#]" "$TESTS_CONFIG" | while read -r testcase; do
+            # Make sure testcase is clean:
             if [ -n "$(printf "%s" "$testcase" | sed "s/[a-zA-Z_]*//g")" ]; then
                 echo "Illegal characters in $TESTS_CONFIG."
                 exit 1
@@ -214,8 +186,8 @@ process_results() {
 
 cleanup() {
     echo "### Cleaning up compute-benchmark builds from prior runs ###"
-    rm -rf $SANITIZED_COMPUTE_BENCH_PATH
-    rm -rf $SANITIZED_PERF_RES_PATH
+    rm -rf "$SANITIZED_COMPUTE_BENCH_PATH"
+    rm -rf "$SANITIZED_PERF_RES_PATH"
     [ ! -z "$_exit_after_cleanup" ] && exit
 }
 
@@ -234,9 +206,9 @@ load_configs() {
     # Derive /devops based on location of this script:
     [ -z "$DEVOPS_PATH" ] && DEVOPS_PATH="$(dirname "$0")/../.."
 
-    TESTS_CONFIG="$(realpath $DEVOPS_PATH/benchmarking/enabled_tests.conf)"
-    COMPARE_PATH="$(realpath $DEVOPS_PATH/scripts/benchmarking/compare.py)"
-    LOAD_CONFIG_PY="$(realpath $DEVOPS_PATH/scripts/benchmarking/load_config.py)"
+    TESTS_CONFIG="$(realpath "$DEVOPS_PATH/benchmarking/enabled_tests.conf")"
+    COMPARE_PATH="$(realpath "$DEVOPS_PATH/scripts/benchmarking/compare.py")"
+    LOAD_CONFIG_PY="$(realpath "$DEVOPS_PATH/scripts/benchmarking/load_config.py")"
 
     for file in \
         "$TESTS_CONFIG" "$COMPARE_PATH" "$LOAD_CONFIG_PY"
@@ -261,17 +233,17 @@ TIMESTAMP="$(date +"$SANITIZED_TIMESTAMP_FORMAT")"
 
 # CLI flags + overrides to configuration options:
 while getopts "p:b:r:f:n:cCs" opt; do
-    case $opt in
-        p) COMPUTE_BENCH_PATH=$OPTARG ;;
-        r) COMPUTE_BENCH_GIT_REPO=$OPTARG ;;
-        b) COMPUTE_BENCH_BRANCH=$OPTARG ;;
-        f) COMPUTE_BENCH_COMPILE_FLAGS=$OPTARG ;;
-		n) RUNNER=$OPTARG ;;
+    case "$opt" in
+        p) COMPUTE_BENCH_PATH="$OPTARG" ;;
+        r) COMPUTE_BENCH_GIT_REPO="$OPTARG" ;;
+        b) COMPUTE_BENCH_BRANCH="$OPTARG" ;;
+        f) COMPUTE_BENCH_COMPILE_FLAGS="$OPTARG" ;;
+		n) RUNNER="$OPTARG" ;;
         # Cleanup status is saved in a var to ensure all arguments are processed before
         # performing cleanup
         c) _cleanup=1 ;;
         C) _cleanup=1 && _exit_after_cleanup=1 ;;
-        s) CACHE_RESULTS="1";;
+        s) CACHE_RESULTS=1;;
         \?) usage ;;
     esac
 done