Skip to content

Commit af6b639

Browse files
committed
Merge branch 'master' of https://github.com/ECP-CANDLE/Supervisor into rajeeja/rnd_grid
2 parents e4889b8 + 6d271d1 commit af6b639

39 files changed

+299
-125
lines changed
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
2+
export PYTHONHOME="/lus/theta-fs0/projects/Candle_ECP/ncollier/py2_tf_gcc6.3_eigen3_native"
3+
PYTHON="$PYTHONHOME/bin/python"
4+
export LD_LIBRARY_PATH="$PYTHONHOME/lib"
5+
export PATH="$PYTHONHOME/bin:$PATH"
6+
7+
8+
COMMON_DIR=$emews_root/../common/python
9+
PYTHONPATH="$PYTHONHOME/lib/python2.7:"
10+
PYTHONPATH+="$BENCHMARK_DIR:$COMMON_DIR:"
11+
PYTHONPATH+="$PYTHONHOME/lib/python2.7/site-packages"
12+
export PYTHONPATH

workflows/common/sh/langs-theta.sh

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
# LANGS Theta
2+
# Language settings for Theta (Swift, Python, R, Tcl, etc.)
3+
4+
TCL=/home/wozniak/Public/sfw/theta/tcl-8.6.1
5+
export R=/home/wozniak/Public/sfw/theta/R-3.4.0/lib64/R
6+
export PY=/home/wozniak/Public/sfw/theta/Python-2.7.12
7+
export LD_LIBRARY_PATH=$PY/lib:$R/lib:$LD_LIBRARY_PATH
8+
COMMON_DIR=$EMEWS_PROJECT_ROOT/../common/python
9+
PYTHONPATH=$EMEWS_PROJECT_ROOT/python:$BENCHMARK_DIR:$COMMON_DIR
10+
PYTHONHOME=/home/wozniak/Public/sfw/theta/Python-2.7.12
11+
12+
# STC=/home/wozniak/Public/sfw/theta/swift-t-pyr/stc
13+
STC=/projects/Candle_ECP/swift/pyr/stc
14+
15+
export PATH=$STC/bin:$TCL/bin:$PATH

workflows/common/sh/utils.sh

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,10 +35,13 @@ EOF
3535
fi
3636

3737
export EXPID=$1
38-
if [ $EXPID = "-a" ]; then
38+
if [ $EXPID = "-a" ]
39+
then
3940
export TURBINE_OUTPUT_ROOT=$EMEWS_PROJECT_ROOT/experiments
40-
export TURBINE_OUTPUT_FORMAT=X%Q
41-
EXPID=SWIFT
41+
# Creates a X + a unique integer padded to 3 digits: e.g., X023
42+
export TURBINE_OUTPUT_FORMAT="X%Q"
43+
EXPID="AUTO"
44+
shift
4245
else
4346
export TURBINE_OUTPUT=$EMEWS_PROJECT_ROOT/experiments/$EXPID
4447
check_directory_exists
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
module load gcc
2+
export PATH=$PATH:/ccs/home/wozniak/Public/sfw/swig-3.0.2/bin
3+
4+
# TITAN BUILD SETTINGS
5+
6+
R_HOME=/sw/xk6/r/3.3.2/sles11.3_gnu4.9.3x/lib64/R
7+
R_INCLUDE=$R_HOME/include
8+
R_LIB=$R_HOME/lib
9+
R_INSIDE=$R_HOME/library/RInside
10+
RCPP=$R_HOME/library/Rcpp
11+
12+
#system-wide tcl
13+
TCL=/ccs/home/wozniak/Public/sfw/tcl-8.6.2
14+
TCL_INCLUDE=$TCL/include
15+
TCL_LIB=$TCL/lib
16+
TCL_LIBRARY=tcl8.6
17+
export PATH=$PATH:/ccs/home/wozniak/Public/sfw/tcl-8.6.2/bin
18+
19+
CPPFLAGS=""
20+
CPPFLAGS+="-I$TCL_INCLUDE "
21+
CPPFLAGS+="-I$R_INCLUDE "
22+
CPPFLAGS+="-I$RCPP/include "
23+
CPPFLAGS+="-I$R_INSIDE/include "
24+
CXXFLAGS=$CPPFLAGS
25+
26+
LDFLAGS=""
27+
LDFLAGS+="-L$R_INSIDE/lib -lRInside "
28+
LDFLAGS+="-L$R_LIB -lR -lRblas "
29+
LDFLAGS+="-L$TCL_LIB -l$TCL_LIBRARY "
30+
LDFLAGS+="-Wl,-rpath -Wl,$TCL_LIB "
31+
LDFLAGS+="-Wl,-rpath -Wl,$R_LIB "
32+
LDFLAGS+="-Wl,-rpath -Wl,$R_INSIDE/lib"
33+
34+
export CPPFLAGS CXXFLAGS LDFLAGS

workflows/nt3_mlrMBO/python/nt3_tc1_runner.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,10 +61,12 @@ def run(hyper_parameter_map):
6161
framework = sys.argv[4]
6262
exp_id = sys.argv[5]
6363
run_id = sys.argv[6]
64+
benchmark_timeout = int(sys.argv[7])
6465
hyper_parameter_map = runner_utils.init(param_string, instance_directory, framework, 'save')
6566
hyper_parameter_map['model_name'] = model_name
6667
hyper_parameter_map['experiment_id'] = exp_id
6768
hyper_parameter_map['run_id'] = run_id
69+
hyper_parameter_map['timeout'] = benchmark_timeout
6870
# clear sys.argv so that argparse doesn't object
6971
sys.argv = ['nt3_tc1_runner']
7072
result = run(hyper_parameter_map)

workflows/nt3_mlrMBO/scripts/run_model.sh

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ set -eu
1313

1414
# !!! IF YOU CHANGE THE NUMBER OF ARGUMENTS PASSED TO THIS SCRIPT, YOU MUST
1515
# CHANGE THE TIMEOUT_ARG_INDEX !!!
16-
TIMEOUT_ARG_INDEX=8
16+
TIMEOUT_ARG_INDEX=9
1717
TIMEOUT=""
1818
if [[ $# == $TIMEOUT_ARG_INDEX ]]
1919
then
@@ -42,12 +42,13 @@ model_name=$4
4242
framework=$5
4343
exp_id=$6
4444
run_id=$7
45+
benchmark_timeout=$8
4546

4647
BENCHMARK_DIR=$emews_root/../../../Benchmarks/common:$emews_root/../../../Benchmarks/Pilot1/NT3:$emews_root/../../../Benchmarks/Pilot1/TC1
4748
COMMON_DIR=$emews_root/../common/python
4849
export PYTHONPATH="$PYTHONPATH:$BENCHMARK_DIR:$COMMON_DIR"
4950

50-
arg_array=("$emews_root/python/nt3_tc1_runner.py" "$parameter_string" "$instance_directory" "$model_name" "$framework" "$exp_id" "$run_id")
51+
arg_array=("$emews_root/python/nt3_tc1_runner.py" "$parameter_string" "$instance_directory" "$model_name" "$framework" "$exp_id" "$run_id" "$benchmark_timeout")
5152
MODEL_CMD="python ${arg_array[@]}"
5253
# Turn bash error checking off. This is
5354
# required to properly handle the model execution return value

workflows/nt3_mlrMBO/scripts/theta_run_model.sh

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ set -eu
1313

1414
# !!! IF YOU CHANGE THE NUMBER OF ARGUMENTS PASSED TO THIS SCRIPT, YOU MUST
1515
# CHANGE THE TIMEOUT_ARG_INDEX !!!
16-
TIMEOUT_ARG_INDEX=8
16+
TIMEOUT_ARG_INDEX=9
1717
TIMEOUT=""
1818
if [[ $# == $TIMEOUT_ARG_INDEX ]]
1919
then
@@ -41,6 +41,7 @@ model_name=$4
4141
framework=$5
4242
exp_id=$6
4343
run_id=$7
44+
benchmark_timeout=$8
4445

4546
# Theta / Tensorflow env vars
4647
export KMP_BLOCKTIME=30
@@ -60,7 +61,7 @@ PYTHONPATH+="$BENCHMARK_DIR:$COMMON_DIR:"
6061
PYTHONPATH+="$PYTHONHOME/lib/python2.7/site-packages"
6162
export PYTHONPATH
6263

63-
arg_array=("$emews_root/python/nt3_tc1_runner.py" "$parameter_string" "$instance_directory" "$model_name" "$framework" "$exp_id" "$run_id")
64+
arg_array=("$emews_root/python/nt3_tc1_runner.py" "$parameter_string" "$instance_directory" "$model_name" "$framework" "$exp_id" "$run_id" "$benchmark_timeout")
6465
MODEL_CMD="python ${arg_array[@]}"
6566

6667
# Turn bash error checking off. This is

workflows/nt3_mlrMBO/swift/ai_workflow.sh

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,12 +26,15 @@ export PPN=${PPN:-1}
2626
export QUEUE=${QUEUE:-debug}
2727
export WALLTIME=${WALLTIME:-00:30:00}
2828

29+
# Benchmark run timeout: benchmark run will timeouT
30+
# after the specified number of seconds. -1 is no timeout.
31+
BENCHMARK_TIMEOUT=${BENCHMARK_TIMEOUT:-3600}
32+
2933
# set machine to your scheduler type (e.g. pbs, slurm, cobalt etc.),
3034
# or empty for an immediate non-queued unscheduled run
3135
MACHINE=""
3236

3337
# mlrMBO settings
34-
# How many to runs evaluate per iteration
3538
MAX_BUDGET=${MAX_BUDGET:-110}
3639
# Total iterations
3740
MAX_ITERATIONS=${MAX_ITERATIONS:-4}
@@ -83,6 +86,7 @@ MODEL_NAME="nt3"
8386
CMD_LINE_ARGS="$* -pp=$PROPOSE_POINTS -mi=$MAX_ITERATIONS -mb=$MAX_BUDGET -ds=$DESIGN_SIZE "
8487
CMD_LINE_ARGS+="-param_set_file=$PARAM_SET_FILE -script_file=$SCRIPT_FILE -model_name=$MODEL_NAME "
8588
CMD_LINE_ARGS+="-exp_id=$EXPID -log_script=$LOG_SCRIPT_FILE "
89+
CMD_LINE_ARGS+="-benchmark_timeout=$BENCHMARK_TIMEOUT"
8690

8791
if [ -n "$MACHINE" ]; then
8892
MACHINE="-m $MACHINE"

workflows/nt3_mlrMBO/swift/ai_workflow3.swift

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ string model_name = argv("model_name");
2121
file model_script = input(argv("script_file"));
2222
file log_script = input(argv("log_script"));
2323
string exp_id = argv("exp_id");
24+
int benchmark_timeout = toint(argv("benchmark_timeout", "-1"));
2425

2526
string FRAMEWORK = "keras";
2627

@@ -31,7 +32,7 @@ max.budget = %d, max.iterations = %d, design.size=%d, propose.points=%d, param.s
3132

3233
app (file out, file err) run_model (file shfile, string params_string, string instance, string run_id)
3334
{
34-
"bash" shfile params_string emews_root instance model_name FRAMEWORK exp_id run_id @stdout=out @stderr=err;
35+
"bash" shfile params_string emews_root instance model_name FRAMEWORK exp_id run_id benchmark_timeout @stdout=out @stderr=err;
3536
}
3637

3738

workflows/nt3_mlrMBO/swift/cori_workflow3.sh

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,6 @@
11
#! /usr/bin/env bash
22
set -eu
33

4-
# CORI WORKFLOW
5-
# Main entry point for P1B3 mlrMBO workflow
6-
74
# Autodetect this workflow directory
85
export EMEWS_PROJECT_ROOT=$( cd $( dirname $0 )/.. ; /bin/pwd )
96

@@ -28,10 +25,11 @@ export PPN=${PPN:-1}
2825
export QUEUE=${QUEUE:-regular}
2926
export WALLTIME=${WALLTIME:-01:00:00}
3027

31-
# mlrMBO settings
32-
# How many to runs evaluate per iteration
33-
28+
# Benchmark run timeout: benchmark run will timeouT
29+
# after the specified number of seconds. -1 is no timeout.
30+
BENCHMARK_TIMEOUT=${BENCHMARK_TIMEOUT:-3600}
3431

32+
# mlrMBO settings
3533
MAX_BUDGET=${MAX_BUDGET:-1000}
3634
# Total iterations
3735
MAX_ITERATIONS=${MAX_ITERATIONS:-4}
@@ -88,6 +86,7 @@ MODEL_NAME="nt3"
8886
CMD_LINE_ARGS="$* -pp=$PROPOSE_POINTS -mi=$MAX_ITERATIONS -mb=$MAX_BUDGET -ds=$DESIGN_SIZE "
8987
CMD_LINE_ARGS+="-param_set_file=$PARAM_SET_FILE -model_name=$MODEL_NAME "
9088
CMD_LINE_ARGS+="-exp_id=$EXPID "
89+
CMD_LINE_ARGS+="-benchmark_timeout=$BENCHMARK_TIMEOUT"
9190

9291
# set machine to your scheduler type (e.g. pbs, slurm, cobalt etc.),
9392
# or empty for an immediate non-queued unscheduled run
@@ -113,4 +112,3 @@ WORKFLOW_SWIFT=workflow3.swift
113112
swift-t -n $PROCS $MACHINE -p -I $EQR -r $EQR \
114113
-e LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$R_LIB:$GCC_LIB \
115114
$EMEWS_PROJECT_ROOT/swift/$WORKFLOW_SWIFT $CMD_LINE_ARGS
116-

0 commit comments

Comments
 (0)