Skip to content

Commit 6d271d1

Browse files
committed
Merge branch 'adding_timeout'
2 parents f9f9619 + 67b9fbc commit 6d271d1

27 files changed

+110
-47
lines changed

workflows/nt3_mlrMBO/python/nt3_tc1_runner.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,10 +61,12 @@ def run(hyper_parameter_map):
6161
framework = sys.argv[4]
6262
exp_id = sys.argv[5]
6363
run_id = sys.argv[6]
64+
benchmark_timeout = int(sys.argv[7])
6465
hyper_parameter_map = runner_utils.init(param_string, instance_directory, framework, 'save')
6566
hyper_parameter_map['model_name'] = model_name
6667
hyper_parameter_map['experiment_id'] = exp_id
6768
hyper_parameter_map['run_id'] = run_id
69+
hyper_parameter_map['timeout'] = benchmark_timeout
6870
# clear sys.argv so that argparse doesn't object
6971
sys.argv = ['nt3_tc1_runner']
7072
result = run(hyper_parameter_map)

workflows/nt3_mlrMBO/scripts/run_model.sh

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ set -eu
1313

1414
# !!! IF YOU CHANGE THE NUMBER OF ARGUMENTS PASSED TO THIS SCRIPT, YOU MUST
1515
# CHANGE THE TIMEOUT_ARG_INDEX !!!
16-
TIMEOUT_ARG_INDEX=8
16+
TIMEOUT_ARG_INDEX=9
1717
TIMEOUT=""
1818
if [[ $# == $TIMEOUT_ARG_INDEX ]]
1919
then
@@ -42,12 +42,13 @@ model_name=$4
4242
framework=$5
4343
exp_id=$6
4444
run_id=$7
45+
benchmark_timeout=$8
4546

4647
BENCHMARK_DIR=$emews_root/../../../Benchmarks/common:$emews_root/../../../Benchmarks/Pilot1/NT3:$emews_root/../../../Benchmarks/Pilot1/TC1
4748
COMMON_DIR=$emews_root/../common/python
4849
export PYTHONPATH="$PYTHONPATH:$BENCHMARK_DIR:$COMMON_DIR"
4950

50-
arg_array=("$emews_root/python/nt3_tc1_runner.py" "$parameter_string" "$instance_directory" "$model_name" "$framework" "$exp_id" "$run_id")
51+
arg_array=("$emews_root/python/nt3_tc1_runner.py" "$parameter_string" "$instance_directory" "$model_name" "$framework" "$exp_id" "$run_id" "$benchmark_timeout")
5152
MODEL_CMD="python ${arg_array[@]}"
5253
# Turn bash error checking off. This is
5354
# required to properly handle the model execution return value

workflows/nt3_mlrMBO/scripts/theta_run_model.sh

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ set -eu
1313

1414
# !!! IF YOU CHANGE THE NUMBER OF ARGUMENTS PASSED TO THIS SCRIPT, YOU MUST
1515
# CHANGE THE TIMEOUT_ARG_INDEX !!!
16-
TIMEOUT_ARG_INDEX=8
16+
TIMEOUT_ARG_INDEX=9
1717
TIMEOUT=""
1818
if [[ $# == $TIMEOUT_ARG_INDEX ]]
1919
then
@@ -41,6 +41,7 @@ model_name=$4
4141
framework=$5
4242
exp_id=$6
4343
run_id=$7
44+
benchmark_timeout=$8
4445

4546
# Theta / Tensorflow env vars
4647
export KMP_BLOCKTIME=30
@@ -60,7 +61,7 @@ PYTHONPATH+="$BENCHMARK_DIR:$COMMON_DIR:"
6061
PYTHONPATH+="$PYTHONHOME/lib/python2.7/site-packages"
6162
export PYTHONPATH
6263

63-
arg_array=("$emews_root/python/nt3_tc1_runner.py" "$parameter_string" "$instance_directory" "$model_name" "$framework" "$exp_id" "$run_id")
64+
arg_array=("$emews_root/python/nt3_tc1_runner.py" "$parameter_string" "$instance_directory" "$model_name" "$framework" "$exp_id" "$run_id" "$benchmark_timeout")
6465
MODEL_CMD="python ${arg_array[@]}"
6566

6667
# Turn bash error checking off. This is

workflows/nt3_mlrMBO/swift/ai_workflow.sh

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,12 +26,15 @@ export PPN=${PPN:-1}
2626
export QUEUE=${QUEUE:-debug}
2727
export WALLTIME=${WALLTIME:-00:30:00}
2828

29+
# Benchmark run timeout: benchmark run will timeouT
30+
# after the specified number of seconds. -1 is no timeout.
31+
BENCHMARK_TIMEOUT=${BENCHMARK_TIMEOUT:-3600}
32+
2933
# set machine to your scheduler type (e.g. pbs, slurm, cobalt etc.),
3034
# or empty for an immediate non-queued unscheduled run
3135
MACHINE=""
3236

3337
# mlrMBO settings
34-
# How many to runs evaluate per iteration
3538
MAX_BUDGET=${MAX_BUDGET:-110}
3639
# Total iterations
3740
MAX_ITERATIONS=${MAX_ITERATIONS:-4}
@@ -83,6 +86,7 @@ MODEL_NAME="nt3"
8386
CMD_LINE_ARGS="$* -pp=$PROPOSE_POINTS -mi=$MAX_ITERATIONS -mb=$MAX_BUDGET -ds=$DESIGN_SIZE "
8487
CMD_LINE_ARGS+="-param_set_file=$PARAM_SET_FILE -script_file=$SCRIPT_FILE -model_name=$MODEL_NAME "
8588
CMD_LINE_ARGS+="-exp_id=$EXPID -log_script=$LOG_SCRIPT_FILE "
89+
CMD_LINE_ARGS+="-benchmark_timeout=$BENCHMARK_TIMEOUT"
8690

8791
if [ -n "$MACHINE" ]; then
8892
MACHINE="-m $MACHINE"

workflows/nt3_mlrMBO/swift/ai_workflow3.swift

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ string model_name = argv("model_name");
2121
file model_script = input(argv("script_file"));
2222
file log_script = input(argv("log_script"));
2323
string exp_id = argv("exp_id");
24+
int benchmark_timeout = toint(argv("benchmark_timeout", "-1"));
2425

2526
string FRAMEWORK = "keras";
2627

@@ -31,7 +32,7 @@ max.budget = %d, max.iterations = %d, design.size=%d, propose.points=%d, param.s
3132

3233
app (file out, file err) run_model (file shfile, string params_string, string instance, string run_id)
3334
{
34-
"bash" shfile params_string emews_root instance model_name FRAMEWORK exp_id run_id @stdout=out @stderr=err;
35+
"bash" shfile params_string emews_root instance model_name FRAMEWORK exp_id run_id benchmark_timeout @stdout=out @stderr=err;
3536
}
3637

3738

workflows/nt3_mlrMBO/swift/cori_workflow3.sh

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,6 @@
11
#! /usr/bin/env bash
22
set -eu
33

4-
# CORI WORKFLOW
5-
# Main entry point for P1B3 mlrMBO workflow
6-
74
# Autodetect this workflow directory
85
export EMEWS_PROJECT_ROOT=$( cd $( dirname $0 )/.. ; /bin/pwd )
96

@@ -28,10 +25,11 @@ export PPN=${PPN:-1}
2825
export QUEUE=${QUEUE:-regular}
2926
export WALLTIME=${WALLTIME:-01:00:00}
3027

31-
# mlrMBO settings
32-
# How many to runs evaluate per iteration
33-
28+
# Benchmark run timeout: benchmark run will timeouT
29+
# after the specified number of seconds. -1 is no timeout.
30+
BENCHMARK_TIMEOUT=${BENCHMARK_TIMEOUT:-3600}
3431

32+
# mlrMBO settings
3533
MAX_BUDGET=${MAX_BUDGET:-1000}
3634
# Total iterations
3735
MAX_ITERATIONS=${MAX_ITERATIONS:-4}
@@ -88,6 +86,7 @@ MODEL_NAME="nt3"
8886
CMD_LINE_ARGS="$* -pp=$PROPOSE_POINTS -mi=$MAX_ITERATIONS -mb=$MAX_BUDGET -ds=$DESIGN_SIZE "
8987
CMD_LINE_ARGS+="-param_set_file=$PARAM_SET_FILE -model_name=$MODEL_NAME "
9088
CMD_LINE_ARGS+="-exp_id=$EXPID "
89+
CMD_LINE_ARGS+="-benchmark_timeout=$BENCHMARK_TIMEOUT"
9190

9291
# set machine to your scheduler type (e.g. pbs, slurm, cobalt etc.),
9392
# or empty for an immediate non-queued unscheduled run
@@ -113,4 +112,3 @@ WORKFLOW_SWIFT=workflow3.swift
113112
swift-t -n $PROCS $MACHINE -p -I $EQR -r $EQR \
114113
-e LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$R_LIB:$GCC_LIB \
115114
$EMEWS_PROJECT_ROOT/swift/$WORKFLOW_SWIFT $CMD_LINE_ARGS
116-

workflows/nt3_mlrMBO/swift/theta_workflow.sh

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,6 @@
11
#! /usr/bin/env bash
22
set -eu
33

4-
# CORI WORKFLOW
5-
# Main entry point for P1B3 mlrMBO workflow
6-
74
# Autodetect this workflow directory
85
export EMEWS_PROJECT_ROOT=$( cd $( dirname $0 )/.. ; /bin/pwd )
96

@@ -26,11 +23,12 @@ export PPN=${PPN:-1}
2623
export QUEUE=${QUEUE:-default}
2724
export WALLTIME=${WALLTIME:-05:00:00}
2825

29-
30-
# mlrMBO settings
31-
# How many to runs evaluate per iteration
26+
# Benchmark run timeout: benchmark run will timeouT
27+
# after the specified number of seconds. -1 is no timeout.
28+
BENCHMARK_TIMEOUT=${BENCHMARK_TIMEOUT:-3600}
3229

3330

31+
# mlrMBO settings
3432
MAX_BUDGET=${MAX_BUDGET:-1000}
3533
# Total iterations
3634
MAX_ITERATIONS=${MAX_ITERATIONS:-3}
@@ -91,7 +89,8 @@ EQR=$EMEWS_PROJECT_ROOT/ext/EQ-R
9189
CMD_LINE_ARGS="$* -pp=$PROPOSE_POINTS -mi=$MAX_ITERATIONS -mb=$MAX_BUDGET -ds=$DESIGN_SIZE "
9290
CMD_LINE_ARGS+="-param_set_file=$PARAM_SET_FILE -script_file=$EMEWS_PROJECT_ROOT/scripts/theta_run_model.sh "
9391
CMD_LINE_ARGS+="-model_name=$MODEL_NAME "
94-
CMD_LINE_ARGS+="-exp_id=$EXPID -log_script=$EMEWS_PROJECT_ROOT/../common/sh/theta_run_logger.sh"
92+
CMD_LINE_ARGS+="-exp_id=$EXPID -log_script=$EMEWS_PROJECT_ROOT/../common/sh/theta_run_logger.sh "
93+
CMD_LINE_ARGS+="-benchmark_timeout=$BENCHMARK_TIMEOUT"
9594

9695
TURBINE_DIR=/home/wozniak/Public/sfw/theta/swift-t-pyr/turbine/lib
9796

workflows/nt3_mlrMBO/swift/workflow.sh

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,18 +2,16 @@
22
#! /usr/bin/env bash
33
set -eu
44

5-
# CORI WORKFLOW
6-
# Main entry point for P1B3 mlrMBO workflow
7-
85
# Autodetect this workflow directory
96
export EMEWS_PROJECT_ROOT=$( cd $( dirname $0 )/.. ; /bin/pwd )
107

118
# USER SETTINGS START
129

1310
# See README.md for more information
1411

15-
# The directory in the Benchmarks repo containing P1B3
16-
BENCHMARK_DIR="$EMEWS_PROJECT_ROOT/../../../Benchmarks/Pilot1/NT3"
12+
# The directory in the Benchmarks repo containing NT3
13+
BENCHMARK_DIR="$EMEWS_PROJECT_ROOT/../../../Benchmarks/common"
14+
BENCHMARK_DIR="$BENCHMARK_DIR:$EMEWS_PROJECT_ROOT/../../../Benchmarks/Pilot1/NT3"
1715

1816
# The number of MPI processes
1917
# Note that 2 processes are reserved for Swift/EMEMS
@@ -28,12 +26,15 @@ export PPN=${PPN:-1}
2826
export QUEUE=${QUEUE:-debug}
2927
export WALLTIME=${WALLTIME:-00:30:00}
3028

29+
# Benchmark run timeout: benchmark run will timeouT
30+
# after the specified number of seconds. -1 is no timeout.
31+
BENCHMARK_TIMEOUT=${BENCHMARK_TIMEOUT:-3600}
32+
3133
# set machine to your scheduler type (e.g. pbs, slurm, cobalt etc.),
3234
# or empty for an immediate non-queued unscheduled run
3335
MACHINE=""
3436

3537
# mlrMBO settings
36-
# How many to runs evaluate per iteration
3738
MAX_BUDGET=${MAX_BUDGET:-110}
3839
# Total iterations
3940
MAX_ITERATIONS=${MAX_ITERATIONS:-4}
@@ -81,7 +82,8 @@ export RESIDENT_WORK_RANKS=$(( PROCS - 2 ))
8182
EQR=$EMEWS_PROJECT_ROOT/ext/EQ-R
8283

8384
CMD_LINE_ARGS="$* -pp=$PROPOSE_POINTS -mi=$MAX_ITERATIONS -mb=$MAX_BUDGET -ds=$DESIGN_SIZE "
84-
CMD_LINE_ARGS+="-param_set_file=$PARAM_SET_FILE -model_name=$MODEL_NAME -script_file=$SCRIPT_FILE -exp_id=$EXPID"
85+
CMD_LINE_ARGS+="-param_set_file=$PARAM_SET_FILE -model_name=$MODEL_NAME -exp_id=$EXPID "
86+
CMD_LINE_ARGS+="-benchmark_timeout=$BENCHMARK_TIMEOUT"
8587

8688
if [ -n "$MACHINE" ]; then
8789
MACHINE="-m $MACHINE"

workflows/nt3_mlrMBO/swift/workflow3.swift

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ int design_size = toint(argv("ds", "10"));
1919
string param_set = argv("param_set_file");
2020
string model_name = argv("model_name");
2121
string exp_id = argv("exp_id");
22+
int benchmark_timeout = toint(argv("benchmark_timeout", "-1"));
2223

2324
string code_template =
2425
"""
@@ -37,6 +38,7 @@ hyper_parameter_map['instance_directory'] = outdir
3738
hyper_parameter_map['model_name'] = '%s'
3839
hyper_parameter_map['experiment_id'] = '%s'
3940
hyper_parameter_map['run_id'] = '%s'
41+
hyper_parameter_map['timeout'] = %d
4042
4143
validation_loss = nt3_tc1_runner.run(hyper_parameter_map)
4244
""";
@@ -73,7 +75,7 @@ max.budget = %d, max.iterations = %d, design.size=%d, propose.points=%d, param.s
7375

7476
(string obj_result) obj(string params, string iter_indiv_id) {
7577
string outdir = "%s/run_%s" % (turbine_output, iter_indiv_id);
76-
string code = code_template % (outdir, params, model_name, exp_id, iter_indiv_id);
78+
string code = code_template % (outdir, params, model_name, exp_id, iter_indiv_id, benchmark_timeout);
7779
obj_result = python_persist(code, "str(validation_loss)");
7880
printf(obj_result);
7981
}

workflows/p2b1_mlrMBO/python/p2b1_runner.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,10 +46,12 @@ def run(hyper_parameter_map):
4646
framework = sys.argv[3]
4747
exp_id = sys.argv[4]
4848
run_id = sys.argv[5]
49+
benchmark_timeout = int(sys.argv[6])
4950
hyper_parameter_map = runner_utils.init(param_string, instance_directory,
5051
framework, 'save_path')
5152
hyper_parameter_map['experiment_id'] = exp_id
5253
hyper_parameter_map['run_id'] = run_id
54+
hyper_parameter_map['timeout'] = benchmark_timeout
5355
# clear sys.argv so that argparse doesn't object
5456
sys.argv = ['p2b1_runner']
5557
result = run(hyper_parameter_map)

0 commit comments

Comments
 (0)