Skip to content

Commit 35177d6

Browse files
authored
[CI] Improve error handling and validation in Jenkins pipeline, tuna-script and tuningRunner (#1913)
Improve CI robustness and error visibility by enhancing error handling in the Jenkins pipeline, validating output files from tuna-script and guarding against unbound variables. (Together with the changes from ROCm/MITuna#999) --------- Signed-off-by: Djordje Antic <[email protected]>
1 parent e5bad4d commit 35177d6

File tree

3 files changed

+51
-11
lines changed

3 files changed

+51
-11
lines changed

mlir/utils/jenkins/Jenkinsfile

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -966,30 +966,45 @@ pipeline {
966966
git branch: "pf-tuna-rocmlir-3", poll: false, url: 'https://github.com/ROCm/MITuna.git'
967967
}
968968
dir('build') {
969+
def tuningLog = "tune_rocmlir_${CHIP}.log"
970+
sh """echo "=== Tuning rocMLIR for ${CHIP} ===" | tee ${tuningLog}"""
969971
// Tune gemms with default datatypes, fail if the DB is not created
970972
// (Includes int8xint8->int8 for performance comparisons against CK.)
971973
sh """../mlir/utils/tuna/tuna-script.sh -o gemm \
972974
-c ../mlir/utils/performance/configs/tier1-gemm-configs \
973-
-t ${WORKSPACE}/MITuna -f mlir_tuning_${CHIP}.tsv
974-
[ -f mlir_tuning_${CHIP}.tsv ]"""
975+
-t ${WORKSPACE}/MITuna -f mlir_tuning_${CHIP}.tsv 2>&1 | tee -a ${tuningLog}
976+
[ -f mlir_tuning_${CHIP}.tsv ]"""
975977
// Tune resnet50 and unet configs
976978
sh """../mlir/utils/tuna/tuna-script.sh -o convolution \
977979
-c ../mlir/utils/performance/configs/tier1-conv-configs \
978-
-t ${WORKSPACE}/MITuna -f mlir_tuning_${CHIP}.tsv"""
980+
-t ${WORKSPACE}/MITuna -f mlir_tuning_${CHIP}.tsv 2>&1 | tee -a ${tuningLog}"""
979981
// Tune attention configs
980982
sh """../mlir/utils/tuna/tuna-script.sh -o attention \
981983
-c ../mlir/utils/performance/configs/tier1-attention-configs \
982-
-t ${WORKSPACE}/MITuna -f mlir_tuning_${CHIP}.tsv"""
984+
-t ${WORKSPACE}/MITuna -f mlir_tuning_${CHIP}.tsv 2>&1 | tee -a ${tuningLog}"""
983985
// Tune gemms with default datatypes, fail if the DB is not created (quick tuning)
984986
// (Includes int8xint8->int8 for performance comparisons against CK.)
985987
sh """../mlir/utils/tuna/tuna-script.sh -o gemm \
986988
-c ../mlir/utils/performance/configs/tier1-gemm-configs -s quick \
987-
-t ${WORKSPACE}/MITuna -f mlir_quick_tuning_${CHIP}.tsv
989+
-t ${WORKSPACE}/MITuna -f mlir_quick_tuning_${CHIP}.tsv 2>&1 | tee -a ${tuningLog}
988990
[ -f mlir_quick_tuning_${CHIP}.tsv ]"""
989991
// Tune resnet50 and unet configs (quick tuning)
990992
sh """../mlir/utils/tuna/tuna-script.sh -o convolution \
991993
-c ../mlir/utils/performance/configs/tier1-conv-configs -s quick \
992-
-t ${WORKSPACE}/MITuna -f mlir_quick_tuning_${CHIP}.tsv"""
994+
-t ${WORKSPACE}/MITuna -f mlir_quick_tuning_${CHIP}.tsv 2>&1 | tee -a ${tuningLog}"""
995+
sh """echo "=== Tuning rocMLIR for ${CHIP} completed ===" | tee -a ${tuningLog}"""
996+
// Check for errors in the tuning log
997+
script {
998+
def tuneLog = readFile(tuningLog).split('\n')
999+
def errors = tuneLog.findAll { it =~ /(?i)\berror\b/ }
1000+
1001+
if (errors) {
1002+
error("Tuning failed: Detected errors in tuning log")
1003+
currentBuild.result = 'FAILURE'
1004+
} else {
1005+
echo "No errors found in tuning log"
1006+
}
1007+
}
9931008
}
9941009
}
9951010
}

mlir/utils/performance/tuningRunner.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -401,7 +401,8 @@ def main(args=None):
401401

402402
if winners is None:
403403
# Tuning aborted, bail
404-
return
404+
print("Tuning aborted")
405+
return 1
405406

406407
if parsed_args.debug:
407408
print(allData, file=sys.stderr)

mlir/utils/tuna/tuna-script.sh

Lines changed: 28 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
#!/bin/bash
22

3+
# Ensures that the script exits on error and returns the exit status of the last command that failed.
4+
set -euo pipefail
5+
36
# For installing mysql 8.0 for testing, or for running with an isolated database.
47
function mysql_setup_generic
58
{
@@ -42,6 +45,28 @@ function clear_tables
4245
mysql --user root --database tuna -e "delete from rocmlir_${tablekind}_config;"
4346
}
4447

48+
validate_tuning_file() {
49+
local f="$1"
50+
if [ ! -s "$f" ]; then
51+
echo "ERROR: $f is missing or empty!"
52+
exit 1
53+
fi
54+
header=$(head -n1 "$f")
55+
# Check for two headers next to each other anywhere in the file
56+
if awk -v h="$header" 'prev==h && $0==h {print; exit 1} {prev=$0}' "$f"; then
57+
true
58+
else
59+
echo "ERROR: $f has duplicate header lines next to each other!"
60+
exit 1
61+
fi
62+
# Existing checks
63+
data_line=$(awk 'NR>1 && $0 !~ /^\s*$/ && $0 != header {print; exit}' header="$header" "$f")
64+
if [ -z "$data_line" ]; then
65+
echo "ERROR: $f has no data after header!"
66+
exit 1
67+
fi
68+
}
69+
4570
function tuna_run
4671
{
4772
kind=$1
@@ -60,10 +85,9 @@ function tuna_run
6085
fi
6186
(cd "${ROCMLIR_DIR}"/build/ || exit 1 ; ${TUNA_DIR}/tuna/go_fish.py rocmlir --execute --session_id "$session" $factor)
6287
${TUNA_DIR}/tuna/rocmlir/export_configs.py --session_id "$session" --append -f "$OUT_FILE"
88+
validate_tuning_file "$OUT_FILE"
6389
}
6490

65-
66-
6791
usage() { echo "$0 usage:" && grep " .)\ #" "$0"; exit 0; }
6892
[ $# -eq 0 ] && usage
6993

@@ -118,7 +142,7 @@ export TUNA_DB_USER_NAME=root
118142
export TUNA_DB_USER_PASSWORD=
119143
export TUNA_DB_HOSTNAME=127.0.0.1
120144
export TUNA_DB_NAME=tuna
121-
export PYTHONPATH=$TUNA_DIR:$PYTHONPATH
145+
export PYTHONPATH=$TUNA_DIR:${PYTHONPATH:-}
122146

123147
# If no mysqld running, assume it and Tuna need to be set up.
124148
# Otherwise, assume the usual setup.
@@ -130,7 +154,7 @@ else
130154
TUNA_DB_HOSTNAME=localhost
131155
fi
132156

133-
if [ "$VIRTUAL_ENV" = "" ]; then
157+
if [ "${VIRTUAL_ENV:-}" = "" ]; then
134158
source /tuna-venv/bin/activate
135159
fi
136160

0 commit comments

Comments
 (0)