|
| 1 | +#!/bin/bash |
| 2 | + |
| 3 | +set -eu |
| 4 | + |
| 5 | +# Check for an optional timeout threshold in seconds. If the duration of the |
| 6 | +# model run as executed below, takes longer that this threshhold |
| 7 | +# then the run will be aborted. Note that the "timeout" command |
| 8 | +# must be supported by executing OS. |
| 9 | + |
| 10 | +# The timeout argument is optional. By default the "run_model" swift |
| 11 | +# app fuction sends 3 arguments, and no timeout value is set. If there |
| 12 | +# is a 4th (the TIMEOUT_ARG_INDEX) argument, we use that as the timeout value. |
| 13 | + |
| 14 | +# !!! IF YOU CHANGE THE NUMBER OF ARGUMENTS PASSED TO THIS SCRIPT, YOU MUST |
| 15 | +# CHANGE THE TIMEOUT_ARG_INDEX !!! |
| 16 | +TIMEOUT_ARG_INDEX=7 |
| 17 | +TIMEOUT="" |
| 18 | +if [[ $# == $TIMEOUT_ARG_INDEX ]] |
| 19 | +then |
| 20 | + TIMEOUT=${!TIMEOUT_ARG_INDEX} |
| 21 | +fi |
| 22 | + |
| 23 | +TIMEOUT_CMD="" |
| 24 | +if [ -n "$TIMEOUT" ]; then |
| 25 | + TIMEOUT_CMD="timeout $TIMEOUT" |
| 26 | +fi |
| 27 | + |
| 28 | +parameter_string=$1 |
| 29 | + |
| 30 | +# Set emews_root to the root directory of the project (i.e. the directory |
| 31 | +# that contains the scripts, swift, etc. directories and files) |
| 32 | +emews_root=$2 |
| 33 | + |
| 34 | +# Each model run, runs in its own "instance" directory |
| 35 | +# Set instance_directory to that and cd into it. |
| 36 | +instance_directory=$3 |
| 37 | +cd $instance_directory |
| 38 | + |
| 39 | +framework=$4 |
| 40 | +exp_id=$5 |
| 41 | +run_id=$6 |
| 42 | + |
| 43 | +# Theta / Tensorflow env vars |
| 44 | +export KMP_BLOCKTIME=30 |
| 45 | +export KMP_SETTINGS=1 |
| 46 | +export KMP_AFFINITY=granularity=fine,verbose,compact,1,0 |
| 47 | +export OMP_NUM_THREADS=144 |
| 48 | + |
| 49 | +export PYTHONHOME="/sw/xk6/deeplearning/1.0/sles11.3_gnu4.9.3" |
| 50 | +PYTHON="$PYTHONHOME/bin/python" |
| 51 | +export LD_LIBRARY_PATH="$PYTHONHOME/lib:/sw/xk6/deeplearning/1.0/sles11.3_gnu4.9.3/lib:/sw/xk6/deeplearning/1.0/sles11.3_gnu4.9.3/cuda/lib64:/opt/gcc/4.9.3/snos/lib64:/sw/xk6/r/3.3.2/sles11.3_gnu4.9.3x/lib64/R/lib" |
| 52 | +export PATH="$PYTHONHOME/bin:$PATH" |
| 53 | + |
| 54 | +BENCHMARK_DIR=$emews_root/../../../Benchmarks/common:$emews_root/../../../Benchmarks/Pilot3/P3B1 |
| 55 | +COMMON_DIR=$emews_root/../common/python |
| 56 | +PYTHONPATH="$PYTHONHOME/lib/python3.6:" |
| 57 | +PYTHONPATH+="$BENCHMARK_DIR:$COMMON_DIR:" |
| 58 | +PYTHONPATH+="$PYTHONHOME/lib/python2.7/site-packages" |
| 59 | +export PYTHONPATH |
| 60 | + |
| 61 | +arg_array=("$emews_root/python/p3b1_runner.py" "$parameter_string" "$instance_directory" "$framework" "$exp_id" "$run_id") |
| 62 | +MODEL_CMD="python ${arg_array[@]}" |
| 63 | + |
| 64 | +# Turn bash error checking off. This is |
| 65 | +# required to properly handle the model execution return value |
| 66 | +# the optional timeout. |
| 67 | +set +e |
| 68 | +echo $MODEL_CMD |
| 69 | +$TIMEOUT_CMD python "${arg_array[@]}" |
| 70 | +# $? is the exit status of the most recently executed command (i.e the |
| 71 | +# line above) |
| 72 | +RES=$? |
| 73 | +if [ "$RES" -ne 0 ]; then |
| 74 | + if [ "$RES" == 124 ]; then |
| 75 | + echo "---> Timeout error in $MODEL_CMD" |
| 76 | + else |
| 77 | + echo "---> Error in $MODEL_CMD" |
| 78 | + fi |
| 79 | +fi |
0 commit comments