ECP-CANDLE
diff --git a/‎workflows/nt3_mlrMBO/python/nt3_tc1_runner.py‎
Lines changed: 2 additions & 0 deletions b/‎workflows/nt3_mlrMBO/python/nt3_tc1_runner.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎workflows/nt3_mlrMBO/scripts/run_model.sh‎
Lines changed: 3 additions & 2 deletions b/‎workflows/nt3_mlrMBO/scripts/run_model.sh‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎workflows/nt3_mlrMBO/scripts/theta_run_model.sh‎
Lines changed: 3 additions & 2 deletions b/‎workflows/nt3_mlrMBO/scripts/theta_run_model.sh‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎workflows/nt3_mlrMBO/swift/ai_workflow.sh‎
Lines changed: 5 additions & 1 deletion b/‎workflows/nt3_mlrMBO/swift/ai_workflow.sh‎
Lines changed: 5 additions & 1 deletion
diff --git a/‎workflows/nt3_mlrMBO/swift/ai_workflow3.swift‎
Lines changed: 2 additions & 1 deletion b/‎workflows/nt3_mlrMBO/swift/ai_workflow3.swift‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎workflows/nt3_mlrMBO/swift/cori_workflow3.sh‎
Lines changed: 5 additions & 7 deletions b/‎workflows/nt3_mlrMBO/swift/cori_workflow3.sh‎
Lines changed: 5 additions & 7 deletions
diff --git a/‎workflows/nt3_mlrMBO/swift/theta_workflow.sh‎
Lines changed: 6 additions & 7 deletions b/‎workflows/nt3_mlrMBO/swift/theta_workflow.sh‎
Lines changed: 6 additions & 7 deletions
diff --git a/‎workflows/nt3_mlrMBO/swift/workflow.sh‎
Lines changed: 9 additions & 7 deletions b/‎workflows/nt3_mlrMBO/swift/workflow.sh‎
Lines changed: 9 additions & 7 deletions
diff --git a/‎workflows/nt3_mlrMBO/swift/workflow3.swift‎
Lines changed: 3 additions & 1 deletion b/‎workflows/nt3_mlrMBO/swift/workflow3.swift‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎workflows/p2b1_mlrMBO/python/p2b1_runner.py‎
Lines changed: 2 additions & 0 deletions b/‎workflows/p2b1_mlrMBO/python/p2b1_runner.py‎
Lines changed: 2 additions & 0 deletions
@@ -61,10 +61,12 @@ def run(hyper_parameter_map):
     framework = sys.argv[4]
     exp_id = sys.argv[5]
     run_id = sys.argv[6]
+    benchmark_timeout = int(sys.argv[7])
     hyper_parameter_map = runner_utils.init(param_string, instance_directory, framework, 'save')
     hyper_parameter_map['model_name'] = model_name
     hyper_parameter_map['experiment_id'] = exp_id
     hyper_parameter_map['run_id'] = run_id
+    hyper_parameter_map['timeout'] = benchmark_timeout
     # clear sys.argv so that argparse doesn't object
     sys.argv = ['nt3_tc1_runner']
     result = run(hyper_parameter_map)
 
@@ -13,7 +13,7 @@ set -eu
 
 # !!! IF YOU CHANGE THE NUMBER OF ARGUMENTS PASSED TO THIS SCRIPT, YOU MUST
 # CHANGE THE TIMEOUT_ARG_INDEX !!!
-TIMEOUT_ARG_INDEX=8
+TIMEOUT_ARG_INDEX=9
 TIMEOUT=""
 if [[ $# ==  $TIMEOUT_ARG_INDEX ]]
 then
@@ -42,12 +42,13 @@ model_name=$4
 framework=$5
 exp_id=$6
 run_id=$7
+benchmark_timeout=$8
 
 BENCHMARK_DIR=$emews_root/../../../Benchmarks/common:$emews_root/../../../Benchmarks/Pilot1/NT3:$emews_root/../../../Benchmarks/Pilot1/TC1
 COMMON_DIR=$emews_root/../common/python
 export PYTHONPATH="$PYTHONPATH:$BENCHMARK_DIR:$COMMON_DIR"
 
-arg_array=("$emews_root/python/nt3_tc1_runner.py" "$parameter_string" "$instance_directory" "$model_name" "$framework" "$exp_id" "$run_id")
+arg_array=("$emews_root/python/nt3_tc1_runner.py" "$parameter_string" "$instance_directory" "$model_name" "$framework" "$exp_id" "$run_id" "$benchmark_timeout")
 MODEL_CMD="python ${arg_array[@]}"
 # Turn bash error checking off. This is
 # required to properly handle the model execution return value
 
@@ -13,7 +13,7 @@ set -eu
 
 # !!! IF YOU CHANGE THE NUMBER OF ARGUMENTS PASSED TO THIS SCRIPT, YOU MUST
 # CHANGE THE TIMEOUT_ARG_INDEX !!!
-TIMEOUT_ARG_INDEX=8
+TIMEOUT_ARG_INDEX=9
 TIMEOUT=""
 if [[ $# ==  $TIMEOUT_ARG_INDEX ]]
 then
@@ -41,6 +41,7 @@ model_name=$4
 framework=$5
 exp_id=$6
 run_id=$7
+benchmark_timeout=$8
 
 # Theta / Tensorflow env vars
 export KMP_BLOCKTIME=30
@@ -60,7 +61,7 @@ PYTHONPATH+="$BENCHMARK_DIR:$COMMON_DIR:"
 PYTHONPATH+="$PYTHONHOME/lib/python2.7/site-packages"
 export PYTHONPATH
 
-arg_array=("$emews_root/python/nt3_tc1_runner.py" "$parameter_string" "$instance_directory" "$model_name" "$framework"  "$exp_id" "$run_id")
+arg_array=("$emews_root/python/nt3_tc1_runner.py" "$parameter_string" "$instance_directory" "$model_name" "$framework"  "$exp_id" "$run_id" "$benchmark_timeout")
 MODEL_CMD="python ${arg_array[@]}"
 
 # Turn bash error checking off. This is
 
@@ -26,12 +26,15 @@ export PPN=${PPN:-1}
 export QUEUE=${QUEUE:-debug}
 export WALLTIME=${WALLTIME:-00:30:00}
 
+# Benchmark run timeout: benchmark run will timeouT
+# after the specified number of seconds. -1 is no timeout.
+BENCHMARK_TIMEOUT=${BENCHMARK_TIMEOUT:-3600}
+
 # set machine to your scheduler type (e.g. pbs, slurm, cobalt etc.),
 # or empty for an immediate non-queued unscheduled run
 MACHINE=""
 
 # mlrMBO settings
-# How many to runs evaluate per iteration
 MAX_BUDGET=${MAX_BUDGET:-110}
 # Total iterations
 MAX_ITERATIONS=${MAX_ITERATIONS:-4}
@@ -83,6 +86,7 @@ MODEL_NAME="nt3"
 CMD_LINE_ARGS="$* -pp=$PROPOSE_POINTS -mi=$MAX_ITERATIONS -mb=$MAX_BUDGET -ds=$DESIGN_SIZE "
 CMD_LINE_ARGS+="-param_set_file=$PARAM_SET_FILE -script_file=$SCRIPT_FILE -model_name=$MODEL_NAME "
 CMD_LINE_ARGS+="-exp_id=$EXPID -log_script=$LOG_SCRIPT_FILE "
+CMD_LINE_ARGS+="-benchmark_timeout=$BENCHMARK_TIMEOUT"
 
 if [ -n "$MACHINE" ]; then
   MACHINE="-m $MACHINE"
 
@@ -21,6 +21,7 @@ string model_name = argv("model_name");
 file model_script = input(argv("script_file"));
 file log_script = input(argv("log_script"));
 string exp_id = argv("exp_id");
+int benchmark_timeout = toint(argv("benchmark_timeout", "-1"));
 
 string FRAMEWORK = "keras";
 
@@ -31,7 +32,7 @@ max.budget = %d, max.iterations = %d, design.size=%d, propose.points=%d, param.s
 
 app (file out, file err) run_model (file shfile, string params_string, string instance, string run_id)
 {
-    "bash" shfile params_string emews_root instance model_name FRAMEWORK exp_id run_id @stdout=out @stderr=err;
+    "bash" shfile params_string emews_root instance model_name FRAMEWORK exp_id run_id benchmark_timeout @stdout=out @stderr=err;
 }
 
 
 
@@ -1,9 +1,6 @@
 #! /usr/bin/env bash
 set -eu
 
-# CORI WORKFLOW
-# Main entry point for P1B3 mlrMBO workflow
-
 # Autodetect this workflow directory
 export EMEWS_PROJECT_ROOT=$( cd $( dirname $0 )/.. ; /bin/pwd )
 
@@ -28,10 +25,11 @@ export PPN=${PPN:-1}
 export QUEUE=${QUEUE:-regular}
 export WALLTIME=${WALLTIME:-01:00:00}
 
-# mlrMBO settings
-# How many to runs evaluate per iteration
-
+# Benchmark run timeout: benchmark run will timeouT
+# after the specified number of seconds. -1 is no timeout.
+BENCHMARK_TIMEOUT=${BENCHMARK_TIMEOUT:-3600}
 
+# mlrMBO settings
 MAX_BUDGET=${MAX_BUDGET:-1000}
 # Total iterations
 MAX_ITERATIONS=${MAX_ITERATIONS:-4}
@@ -88,6 +86,7 @@ MODEL_NAME="nt3"
 CMD_LINE_ARGS="$* -pp=$PROPOSE_POINTS -mi=$MAX_ITERATIONS -mb=$MAX_BUDGET -ds=$DESIGN_SIZE "
 CMD_LINE_ARGS+="-param_set_file=$PARAM_SET_FILE -model_name=$MODEL_NAME "
 CMD_LINE_ARGS+="-exp_id=$EXPID "
+CMD_LINE_ARGS+="-benchmark_timeout=$BENCHMARK_TIMEOUT"
 
 # set machine to your scheduler type (e.g. pbs, slurm, cobalt etc.),
 # or empty for an immediate non-queued unscheduled run
@@ -113,4 +112,3 @@ WORKFLOW_SWIFT=workflow3.swift
 swift-t -n $PROCS $MACHINE -p -I $EQR -r $EQR \
         -e LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$R_LIB:$GCC_LIB \
         $EMEWS_PROJECT_ROOT/swift/$WORKFLOW_SWIFT $CMD_LINE_ARGS
-
@@ -1,9 +1,6 @@
 #! /usr/bin/env bash
 set -eu
 
-# CORI WORKFLOW
-# Main entry point for P1B3 mlrMBO workflow
-
 # Autodetect this workflow directory
 export EMEWS_PROJECT_ROOT=$( cd $( dirname $0 )/.. ; /bin/pwd )
 
@@ -26,11 +23,12 @@ export PPN=${PPN:-1}
 export QUEUE=${QUEUE:-default}
 export WALLTIME=${WALLTIME:-05:00:00}
 
-
-# mlrMBO settings
-# How many to runs evaluate per iteration
+# Benchmark run timeout: benchmark run will timeouT
+# after the specified number of seconds. -1 is no timeout.
+BENCHMARK_TIMEOUT=${BENCHMARK_TIMEOUT:-3600}
 
 
+# mlrMBO settings
 MAX_BUDGET=${MAX_BUDGET:-1000}
 # Total iterations
 MAX_ITERATIONS=${MAX_ITERATIONS:-3}
@@ -91,7 +89,8 @@ EQR=$EMEWS_PROJECT_ROOT/ext/EQ-R
 CMD_LINE_ARGS="$* -pp=$PROPOSE_POINTS -mi=$MAX_ITERATIONS -mb=$MAX_BUDGET -ds=$DESIGN_SIZE "
 CMD_LINE_ARGS+="-param_set_file=$PARAM_SET_FILE -script_file=$EMEWS_PROJECT_ROOT/scripts/theta_run_model.sh "
 CMD_LINE_ARGS+="-model_name=$MODEL_NAME "
-CMD_LINE_ARGS+="-exp_id=$EXPID -log_script=$EMEWS_PROJECT_ROOT/../common/sh/theta_run_logger.sh"
+CMD_LINE_ARGS+="-exp_id=$EXPID -log_script=$EMEWS_PROJECT_ROOT/../common/sh/theta_run_logger.sh "
+CMD_LINE_ARGS+="-benchmark_timeout=$BENCHMARK_TIMEOUT"
 
 TURBINE_DIR=/home/wozniak/Public/sfw/theta/swift-t-pyr/turbine/lib
 
 
@@ -2,18 +2,16 @@
 #! /usr/bin/env bash
 set -eu
 
-# CORI WORKFLOW
-# Main entry point for P1B3 mlrMBO workflow
-
 # Autodetect this workflow directory
 export EMEWS_PROJECT_ROOT=$( cd $( dirname $0 )/.. ; /bin/pwd )
 
 # USER SETTINGS START
 
 # See README.md for more information
 
-# The directory in the Benchmarks repo containing P1B3
-BENCHMARK_DIR="$EMEWS_PROJECT_ROOT/../../../Benchmarks/Pilot1/NT3"
+# The directory in the Benchmarks repo containing NT3
+BENCHMARK_DIR="$EMEWS_PROJECT_ROOT/../../../Benchmarks/common"
+BENCHMARK_DIR="$BENCHMARK_DIR:$EMEWS_PROJECT_ROOT/../../../Benchmarks/Pilot1/NT3"
 
 # The number of MPI processes
 # Note that 2 processes are reserved for Swift/EMEMS
@@ -28,12 +26,15 @@ export PPN=${PPN:-1}
 export QUEUE=${QUEUE:-debug}
 export WALLTIME=${WALLTIME:-00:30:00}
 
+# Benchmark run timeout: benchmark run will timeouT
+# after the specified number of seconds. -1 is no timeout.
+BENCHMARK_TIMEOUT=${BENCHMARK_TIMEOUT:-3600}
+
 # set machine to your scheduler type (e.g. pbs, slurm, cobalt etc.),
 # or empty for an immediate non-queued unscheduled run
 MACHINE=""
 
 # mlrMBO settings
-# How many to runs evaluate per iteration
 MAX_BUDGET=${MAX_BUDGET:-110}
 # Total iterations
 MAX_ITERATIONS=${MAX_ITERATIONS:-4}
@@ -81,7 +82,8 @@ export RESIDENT_WORK_RANKS=$(( PROCS - 2 ))
 EQR=$EMEWS_PROJECT_ROOT/ext/EQ-R
 
 CMD_LINE_ARGS="$* -pp=$PROPOSE_POINTS -mi=$MAX_ITERATIONS -mb=$MAX_BUDGET -ds=$DESIGN_SIZE "
-CMD_LINE_ARGS+="-param_set_file=$PARAM_SET_FILE -model_name=$MODEL_NAME -script_file=$SCRIPT_FILE -exp_id=$EXPID"
+CMD_LINE_ARGS+="-param_set_file=$PARAM_SET_FILE -model_name=$MODEL_NAME -exp_id=$EXPID "
+CMD_LINE_ARGS+="-benchmark_timeout=$BENCHMARK_TIMEOUT"
 
 if [ -n "$MACHINE" ]; then
   MACHINE="-m $MACHINE"
 
@@ -19,6 +19,7 @@ int design_size = toint(argv("ds", "10"));
 string param_set = argv("param_set_file");
 string model_name = argv("model_name");
 string exp_id = argv("exp_id");
+int benchmark_timeout = toint(argv("benchmark_timeout", "-1"));
 
 string code_template =
 """
@@ -37,6 +38,7 @@ hyper_parameter_map['instance_directory'] = outdir
 hyper_parameter_map['model_name'] = '%s'
 hyper_parameter_map['experiment_id'] = '%s'
 hyper_parameter_map['run_id'] = '%s'
+hyper_parameter_map['timeout'] = %d
 
 validation_loss = nt3_tc1_runner.run(hyper_parameter_map)
 """;
@@ -73,7 +75,7 @@ max.budget = %d, max.iterations = %d, design.size=%d, propose.points=%d, param.s
 
 (string obj_result) obj(string params, string iter_indiv_id) {
   string outdir = "%s/run_%s" % (turbine_output, iter_indiv_id);
-  string code = code_template % (outdir, params, model_name, exp_id, iter_indiv_id);
+  string code = code_template % (outdir, params, model_name, exp_id, iter_indiv_id, benchmark_timeout);
   obj_result = python_persist(code, "str(validation_loss)");
   printf(obj_result);
 }
 
@@ -46,10 +46,12 @@ def run(hyper_parameter_map):
     framework = sys.argv[3]
     exp_id = sys.argv[4]
     run_id = sys.argv[5]
+    benchmark_timeout = int(sys.argv[6])
     hyper_parameter_map = runner_utils.init(param_string, instance_directory,
                                             framework, 'save_path')
     hyper_parameter_map['experiment_id'] = exp_id
     hyper_parameter_map['run_id'] = run_id
+    hyper_parameter_map['timeout'] = benchmark_timeout
     # clear sys.argv so that argparse doesn't object
     sys.argv = ['p2b1_runner']
     result = run(hyper_parameter_map)
Original file line number	Diff line number	Diff line change
`@@ -21,6 +21,7 @@ string model_name = argv("model_name");`
`21`	`21`	`file model_script = input(argv("script_file"));`
`22`	`22`	`file log_script = input(argv("log_script"));`
`23`	`23`	`string exp_id = argv("exp_id");`
	`24`	`+int benchmark_timeout = toint(argv("benchmark_timeout", "-1"));`
`24`	`25`
`25`	`26`	`string FRAMEWORK = "keras";`
`26`	`27`
`@@ -31,7 +32,7 @@ max.budget = %d, max.iterations = %d, design.size=%d, propose.points=%d, param.s`
`31`	`32`
`32`	`33`	`app (file out, file err) run_model (file shfile, string params_string, string instance, string run_id)`
`33`	`34`	`{`
`34`		`- "bash" shfile params_string emews_root instance model_name FRAMEWORK exp_id run_id @stdout=out @stderr=err;`
	`35`	`+ "bash" shfile params_string emews_root instance model_name FRAMEWORK exp_id run_id benchmark_timeout @stdout=out @stderr=err;`
`35`	`36`	`}`
`36`	`37`
`37`	`38`