Change folder structure

ianayl · ianayl · commit fcbbe52c30cd · 2025-01-22T20:34:34.000-08:00
diff --git a/.github/workflows/sycl-benchmark-aggregate.yml b/.github/workflows/sycl-benchmark-aggregate.yml
@@ -83,14 +83,14 @@ jobs:
     - name: Run aggregator on historical results
       run: |
         # The current format of the historical results respository is:
-        # /<runner type>/<test case name>
-        # Thus, a min/max depth of 2 is used to enumerate all test cases in the
-        # repository. Runner type and testcase name is also extracted from this
-        # path.
-        for dir in $(find "$PERF_RES_PATH" -mindepth 2 -maxdepth 2 -type d ! -path '*.git*'); do
-          _runner="$(basename $(dirname $dir))"
-          _testcase="$(basename $dir)"
-          python llvm/devops/scripts/benchmarking/aggregate.py "$_runner" "$_testcase" "$CUTOFF_TIMESTAMP"
+        #
+        # /<ONEAPI_DEVICE_SELECTOR>/<runner>/<test name>
+        #
+        # Thus, a min/max depth of 3 is used to enumerate all test cases in the
+        # repository. Test name is also derived from here.
+        for dir in $(find "$PERF_RES_PATH" -mindepth 3 -maxdepth 3 -type d ! -path '*.git*'); do
+          test_name="$(basename $dir)"
+          python llvm/devops/scripts/benchmarking/aggregate.py "$test_name" "$dir" "$CUTOFF_TIMESTAMP"
         done
     - name: Upload average to the repo
       env:
diff --git a/devops/scripts/benchmarking/aggregate.py b/devops/scripts/benchmarking/aggregate.py
@@ -63,18 +63,18 @@ def get_median(self) -> float:
             return -self.maxheap_smaller[0]
 
 
-def aggregate_median(runner: str, benchmark: str, cutoff: str):
+def aggregate_median(test_name: str, test_dir: str, cutoff: str):
 
-    # Get all .csv benchmark samples for the requested runner + benchmark
+    # Get all .csv samples for the requested test folder
     def csv_samples() -> list[str]:
         # TODO check that the path below is valid directory
-        cache_dir = Path(f"{common.PERF_RES_PATH}/{runner}/{benchmark}")
+        cache_dir = Path(f"{test_dir}")
         # TODO check for time range; What time range do I want?
         return filter(
             lambda f: f.is_file()
             and common.valid_timestamp(str(f)[-19:-4])
             and str(f)[-19:-4] > cutoff,
-            cache_dir.glob(f"{benchmark}-*_*.csv"),
+            cache_dir.glob(f"{test_name}-*_*.csv"),
         )
 
     # Calculate median of every desired metric:
@@ -95,7 +95,7 @@ def csv_samples() -> list[str]:
 
     # Write calculated median (aggregate_s) as a new .csv file:
     with open(
-        f"{common.PERF_RES_PATH}/{runner}/{benchmark}/{benchmark}-median.csv", "w"
+        f"{test_dir}/{test_name}-median.csv", "w"
     ) as output_csv:
         writer = csv.DictWriter(
             output_csv, fieldnames=["TestCase", *common.metrics_variance.keys()]
@@ -114,13 +114,13 @@ def csv_samples() -> list[str]:
 if __name__ == "__main__":
     if len(sys.argv) < 4:
         print(
-            f"Usage: {sys.argv[0]} <runner name> <test case name> <cutoff date YYYYMMDD_HHMMSS>"
+            f"Usage: {sys.argv[0]} <test name> <absolute path to test directory> <cutoff timestamp YYYYMMDD_HHMMSS>"
         )
         exit(1)
     if not common.valid_timestamp(sys.argv[3]):
         print(sys.argv)
         print(f"Bad cutoff timestamp, please use YYYYMMDD_HHMMSS.")
         exit(1)
     common.load_configs()
-    #                <runner>,    <test case>, <cutoff>
+
     aggregate_median(sys.argv[1], sys.argv[2], sys.argv[3])
diff --git a/devops/scripts/benchmarking/benchmark-ci.conf b/devops/scripts/benchmarking/benchmark-ci.conf
@@ -45,6 +45,6 @@ BENCHMARK_SLOW_LOG="./benchmarks-over_tolerance.log"
 # Log file for test cases that errored / failed to build
 BENCHMARK_ERROR_LOG="./benchmarks-errored.log"
 
-# Runner types to parse from runner tag 
-RUNNER_TYPES="arc,pvc,amdgpu,cuda,gen12"
-# These type names will be used to separate benchmark results based on hardware
+# Enabled ONEAPI_DEVICE_SELECTOR backends
+DEVICE_SELECTOR_ENABLED_BACKENDS="level_zero,opencl,cuda,hip" # Disabled: native_cpu
+DEVICE_SELECTOR_ENABLED_DEVICES="cpu,gpu" # Disabled: fpga
diff --git a/devops/scripts/benchmarking/benchmark.sh b/devops/scripts/benchmarking/benchmark.sh
@@ -6,7 +6,7 @@
 
 usage () {
     >&2 echo "Usage: $0 <compute-benchmarks git repo> -t <runner type> [-B <compute-benchmarks build path>]
-  -t  Specify runner type -- Required
+  -n  Github runner name -- Required
   -B  Path to clone and build compute-benchmarks on
   -p  Path to compute-benchmarks (or directory to build compute-benchmarks in)
   -r  Github repo to use for compute-benchmarks origin, in format <org>/<name>
@@ -84,45 +84,51 @@ build_compute_bench() {
 #         | tee -a $3   # Print to summary file
 # }
 
-###
-STATUS_SUCCESS=0
-STATUS_ERROR=1
-###
-
 # Check if the number of samples for a given test case is less than a threshold
 # set in benchmark-ci.conf
+#
+# Usage: <relative path of directory containing test case results>
 samples_under_threshold () {
-    mkdir -p $1
-    file_count="$(find $1 -maxdepth 1 -type f | wc -l )"
+    [ ! -d "$PERF_RES_PATH/$1" ] && return 1 # Directory doesn't exist
+    file_count="$(find "$PERF_RES_PATH/$1" -maxdepth 1 -type f | wc -l )"
     [ "$file_count" -lt "$AVERAGE_THRESHOLD" ]
 }
 
+# Check for a regression via compare.py
+#
+# Usage: check_regression <relative path of output csv>
 check_regression() {
-    if samples_under_threshold "$PERF_RES_PATH/$RUNNER/$1"; then
-        echo "Not enough samples to construct an average, performance check skipped!"
-        return $STATUS_SUCCESS
+    csv_relpath="$(dirname $1)"
+    csv_name="$(basename $1)"
+    if samples_under_threshold "$csv_relpath"; then
+        echo "Not enough samples to construct a good average, performance\
+ check skipped!"
+        return 0 # Success status
     fi
-    BENCHMARKING_ROOT="$BENCHMARKING_ROOT" python "$BENCHMARKING_ROOT/compare.py" "$RUNNER" "$1" "$2"
+    BENCHMARKING_ROOT="$BENCHMARKING_ROOT" \
+        python "$BENCHMARKING_ROOT/compare.py" "$csv_relpath" "$csv_name"
     return $?
 }
 
 # Move the results of our benchmark into the git repo
+#
+# Usage: cache <relative path of output csv>
 cache() {
-    mv "$2" "$PERF_RES_PATH/$RUNNER/$1/"
+    mv "$OUTPUT_PATH/$1" "$PERF_RES_PATH/$1"
 }
 
-# Check for a regression, and cache if no regression found
+# Check for a regression + cache if no regression found
+#
+# Usage: check_and_cache <relative path of output csv>
 check_and_cache() {
-    echo "Checking $testcase..."
-    if check_regression $1 $2; then
+    echo "Checking $1..."
+    if check_regression $1; then
         if [ "$CACHE_RESULTS" -eq "1" ]; then
-            echo "Caching $testcase..."
-            cache $1 $2
+            echo "Caching $1..."
+            cache $1
         fi
     else
-        if [ "$CACHE_RESULTS" -eq "1" ]; then
-            echo "Not caching!"
-        fi
+        [ "$CACHE_RESULTS" -eq "1" ] && echo "Regression found -- Not caching!"
     fi
 }
 
@@ -133,24 +139,39 @@ process_benchmarks() {
     echo "### Running and processing selected benchmarks ###"
     if [ -z "$TESTS_CONFIG" ]; then
         echo "Setting tests to run via cli is not currently supported."
-        exit $STATUS_ERROR
+        exit 1
     else
         rm "$BENCHMARK_ERROR_LOG" "$BENCHMARK_SLOW_LOG" 2> /dev/null
-        # Ignore lines in the test config starting with #'s
+        # Loop through each line of enabled_tests.conf, but ignore lines in the
+        # test config starting with #'s:
         grep "^[^#]" "$TESTS_CONFIG" | while read -r testcase; do
             echo "# Running $testcase..."
 
-            test_csv_output="$OUTPUT_PATH/$RUNNER/$testcase-$TIMESTAMP.csv"
-			mkdir -p "$OUTPUT_PATH/$RUNNER/"
-            $COMPUTE_BENCH_PATH/build/bin/$testcase --csv --iterations="$COMPUTE_BENCH_ITERATIONS" | tail +8 > "$test_csv_output"
-            # The tail +8 filters out initial debug prints not in csv format
+            # The benchmark results git repo and this script's output both share
+            # the following directory structure:
+            #
+            # /<device selector>/<runner>/<test name>
+            #
+            # Figure out the relative path of our testcase result in both
+            # directories: 
+            test_dir_relpath="$DEVICE_SELECTOR_DIRNAME/$RUNNER/$testcase"
+			mkdir -p "$OUTPUT_PATH/$test_dir_relpath" # Ensure directory exists
+            # TODO generate runner config txt if not exist
+            output_csv_relpath="$test_dir_relpath/$testcase-$TIMESTAMP.csv"
+
+            output_csv="$OUTPUT_PATH/$output_csv_relpath" # Real output path
+            $COMPUTE_BENCH_PATH/build/bin/$testcase --csv \
+                --iterations="$COMPUTE_BENCH_ITERATIONS" \
+                    | tail +8 > "$output_csv"
+                    # The tail +8 filters out header lines not in csv format
 
-            if [ "$?" -eq 0 ] && [ -s "$test_csv_output" ]; then 
-                check_and_cache $testcase $test_csv_output
+            exit_status="$?"
+            if [ "$exit_status" -eq 0 ] && [ -s "$output_csv" ]; then 
+                check_and_cache $output_csv_relpath
             else
-                # TODO consider capturing error for logging
-                echo "ERROR @ $test_case"
-                echo "-- $testcase: error $?" >> "$BENCHMARK_ERROR_LOG"
+                # TODO consider capturing stderr for logging
+                echo "[ERROR] $testcase returned exit status $exit_status"
+                echo "-- $testcase: error $exit_status" >> "$BENCHMARK_ERROR_LOG"
             fi
         done
     fi
@@ -163,13 +184,13 @@ process_results() {
         printf "\n### Tests performing over acceptable range of average: ###\n"
         cat "$BENCHMARK_SLOW_LOG"
         echo ""
-        fail=1
+        fail=2
     fi
     if [ -s "$BENCHMARK_ERROR_LOG" ]; then
         printf "\n### Tests that failed to run: ###\n"
         cat "$BENCHMARK_ERROR_LOG"
         echo ""
-        fail=2
+        fail=1
     fi
     exit $fail
 }
@@ -203,40 +224,24 @@ load_configs() {
 
     . $BENCHMARKING_ROOT/utils.sh
     load_all_configs "$BENCHMARK_CI_CONFIG"
-
-    # Debug
-    # echo "PERF_RES_GIT_REPO: $PERF_RES_GIT_REPO"
-    # echo "PERF_RES_BRANCH: $PERF_RES_BRANCH"
-    # echo "PERF_RES_PATH: $PERF_RES_PATH"
-    # echo "COMPUTE_BENCH_GIT_REPO: $COMPUTE_BENCH_GIT_REPO"
-    # echo "COMPUTE_BENCH_BRANCH: $COMPUTE_BENCH_BRANCH"
-    # echo "COMPUTE_BENCH_PATH: $COMPUTE_BENCH_PATH"
-    # echo "COMPUTE_BENCH_COMPILE_FLAGS: $COMPUTE_BENCH_COMPILE_FLAGS"
-    # echo "OUTPUT_PATH: $OUTPUT_PATH"
-    # echo "METRICS_VARIANCE: $METRICS_VARIANCE"
-    # echo "METRICS_RECORDED: $METRICS_RECORDED"
-    # echo "AVERAGE_THRESHOLD: $AVERAGE_THRESHOLD"
-    # echo "AVERAGE_CUTOFF_RANGE: $AVERAGE_CUTOFF_RANGE"
-    # echo "TIMESTAMP_FORMAT: $TIMESTAMP_FORMAT"
-    # echo "BENCHMARK_SLOW_LOG: $BENCHMARK_SLOW_LOG"
-    # echo "BENCHMARK_ERROR_LOG: $BENCHMARK_ERROR_LOG"
-	echo "Configured runner types: $RUNNER_TYPES"
 }
 
-load_configs
+#####
 
 COMPUTE_BENCH_COMPILE_FLAGS=""
 CACHE_RESULTS="0"
 TIMESTAMP="$(date +"$TIMESTAMP_FORMAT")"
 
-# CLI overrides to configuration options
-while getopts "p:b:r:f:t:cCs" opt; do
+load_configs
+
+# CLI flags + overrides to configuration options:
+while getopts "p:b:r:f:n:cCs" opt; do
     case $opt in
         p) COMPUTE_BENCH_PATH=$OPTARG ;;
         r) COMPUTE_BENCH_GIT_REPO=$OPTARG ;;
         b) COMPUTE_BENCH_BRANCH=$OPTARG ;;
         f) COMPUTE_BENCH_COMPILE_FLAGS=$OPTARG ;;
-		t) RUNNER_TYPE=$OPTARG ;;
+		n) RUNNER=$OPTARG ;;
         # Cleanup status is saved in a var to ensure all arguments are processed before
         # performing cleanup
         c) _cleanup=1 ;;
@@ -246,28 +251,40 @@ while getopts "p:b:r:f:t:cCs" opt; do
     esac
 done
 
+# Check all necessary variables exist:
 if [ -z "$CMPLR_ROOT" ]; then
     echo "Please set \$CMPLR_ROOT first; it is needed by compute-benchmarks to build."
     exit 1
+elif [ -z "$ONEAPI_DEVICE_SELECTOR" ]; then
+    echo "Please set \$ONEAPI_DEVICE_SELECTOR first to specify which device to use."
+    exit 1
+elif [ -z "$RUNNER" ]; then
+    echo "Please specify runner name using -n first; it is needed for storing/comparing benchmark results."
+    exit 1
 fi
-if [ -z "$RUNNER_TYPE" ]; then
-    echo "Please specify runner type using -t first; it is needed for comparing benchmark results"
+
+# Make sure ONEAPI_DEVICE_SELECTOR doesn't try to enable multiple devices at the
+# same time, or use specific device id's
+_dev_sel_backend_re="$(sed 's/,/|/g' <<< "$DEVICE_SELECTOR_ENABLED_BACKENDS")"
+_dev_sel_device_re="$(sed 's/,/|/g' <<< "$DEVICE_SELECTOR_ENABLED_DEVICES")"
+_dev_sel_re="s/($_dev_sel_backend_re):($_dev_sel_device_re)//"
+if [ -n "$(sed -E "$_dev_sel_re" <<< "$ONEAPI_DEVICE_SELECTOR" )" ]; then
+    echo "Unsupported \$ONEAPI_DEVICE_SELECTOR value: please ensure only one \
+device is selected, and devices are not selected by indices."
+    echo "Enabled backends: $DEVICE_SELECTOR_ENABLED_BACKENDS"
+    echo "Enabled device types: $DEVICE_SELECTOR_ENABLED_DEVICES"
     exit 1
-else
-	# Identify runner being used
-	runner_regex="$(printf "$RUNNER_TYPES" | sed 's/,/|/g')"
-	RUNNER="$(printf "$RUNNER_TYPE" | grep -o -E "\b($runner_regex)\b")"
-	if [ -z "$RUNNER" ]; then
-		echo "Unknown runner type! Configured runners: $RUNNER_TYPES"
-		exit 1
-	fi
-	echo "Chosen runner: $RUNNER"
 fi
+# ONEAPI_DEVICE_SELECTOR values are not valid directory names in unix: this 
+# value lets us use ONEAPI_DEVICE_SELECTOR as actual directory names 
+DEVICE_SELECTOR_DIRNAME="$(sed 's/:/-/' <<< "$ONEAPI_DEVICE_SELECTOR")"
 
+# Clean up and delete all cached files if specified:
 [ ! -z "$_cleanup" ] && cleanup
-
+# Clone and build only if they aren't already cached/deleted:
 [ ! -d "$PERF_RES_PATH"            ] && clone_perf_res
 [ ! -d "$COMPUTE_BENCH_PATH"       ] && clone_compute_bench
 [ ! -d "$COMPUTE_BENCH_PATH/build" ] && build_compute_bench
+# Process benchmarks:
 process_benchmarks
 process_results
diff --git a/devops/scripts/benchmarking/common.py b/devops/scripts/benchmarking/common.py
@@ -9,7 +9,7 @@
 
 def sanitize(stat: str) -> float:
     # Get rid of %
-    if stat[-1] == '%':
+    if stat[-1] == "%":
         stat = stat[:-1]
     return float(stat)
 
@@ -24,15 +24,16 @@ def load_configs():
     if not os.path.isfile(benchmarking_ci_conf_path):
         raise Exception(f"Please provide path to a valid BENCHMARKING_ROOT.")
 
-    global PERF_RES_PATH, metrics_variance, metrics_recorded
+    global PERF_RES_PATH, OUTPUT_PATH, metrics_variance, metrics_recorded
     global BENCHMARK_ERROR_LOG, BENCHMARK_SLOW_LOG
-    perf_res_re = re.compile(r'^PERF_RES_PATH=(.*)$', re.M)
-    m_variance_re = re.compile(r'^METRICS_VARIANCE=(.*)$', re.M)
-    m_recorded_re = re.compile(r'^METRICS_RECORDED=(.*)$', re.M)
-    b_slow_re = re.compile(r'^BENCHMARK_SLOW_LOG=(.*)$', re.M)
-    b_error_re = re.compile(r'^BENCHMARK_ERROR_LOG=(.*)$', re.M)
+    perf_res_re = re.compile(r"^PERF_RES_PATH=(.*)$", re.M)
+    output_path_re = re.compile(r"^OUTPUT_PATH=(.*)$", re.M)
+    m_variance_re = re.compile(r"^METRICS_VARIANCE=(.*)$", re.M)
+    m_recorded_re = re.compile(r"^METRICS_RECORDED=(.*)$", re.M)
+    b_slow_re = re.compile(r"^BENCHMARK_SLOW_LOG=(.*)$", re.M)
+    b_error_re = re.compile(r"^BENCHMARK_ERROR_LOG=(.*)$", re.M)
 
-    with open(benchmarking_ci_conf_path, 'r') as configs_file:
+    with open(benchmarking_ci_conf_path, "r") as configs_file:
         configs_str = configs_file.read()
 
         for m_variance in m_variance_re.findall(configs_str):
@@ -48,6 +49,9 @@ def load_configs():
         for perf_res in perf_res_re.findall(configs_str):
             PERF_RES_PATH = str(perf_res[1:-1])
 
+        for output_path in output_path_re.findall(configs_str):
+            OUTPUT_PATH = str(output_path[1:-1])
+
         for b_slow_log in b_slow_re.findall(configs_str):
             BENCHMARK_SLOW_LOG = str(b_slow_log[1:-1])
 
@@ -58,6 +62,6 @@ def load_configs():
 def valid_timestamp(timestamp: str) -> bool:
     timestamp_re = re.compile(
         # YYYYMMDD_HHMMSS
-        r'^\d{4}(0[1-9]|1[0-2])(0[1-9]|[12][0-9]|3[01])_(0[0-9]|1[0-9]|2[0-3])[0-5][0-9][0-5][0-9]$'
+        r"^\d{4}(0[1-9]|1[0-2])(0[1-9]|[12][0-9]|3[01])_(0[0-9]|1[0-9]|2[0-3])[0-5][0-9][0-5][0-9]$"
     )
     return timestamp_re.match(timestamp) is not None
diff --git a/devops/scripts/benchmarking/compare.py b/devops/scripts/benchmarking/compare.py
diff --git a/devops/scripts/benchmarking/utils.sh b/devops/scripts/benchmarking/utils.sh