Skip to content

Commit bd6bb65

Browse files
committed
Merge branch 'master' of github.com:ECP-CANDLE/Supervisor
2 parents c2dc5e7 + 3a904af commit bd6bb65

File tree

7 files changed

+58
-28
lines changed

7 files changed

+58
-28
lines changed

workflows/nt3_mlrMBO/swift/cori_settings.sh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,5 +4,7 @@ module swap PrgEnv-intel PrgEnv-gnu
44
# if PrgEnv-intel is not loaded then PrgEnv-gnu won't load via the swap
55
# so we load gcc explicitly
66
module load gcc
7+
module load intel-tensorflow
8+
79

810
export PATH=/global/homes/w/wozniak/Public/sfw/compute/swift-t-r/stc/bin:$PATH

workflows/p2b1_mlrMBO/scripts/run_model.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ framework=$4
4242
exp_id=$5
4343
run_id=$6
4444

45-
export KERAS_BACKEND=theano
45+
#export KERAS_BACKEND=theano
4646
BENCHMARK_DIR=$emews_root/../../../Benchmarks/common:$emews_root/../../../Benchmarks/Pilot2/P2B1
4747
COMMON_DIR=$emews_root/../common/python
4848
export PYTHONPATH="$BENCHMARK_DIR:$COMMON_DIR"

workflows/p2b1_mlrMBO/scripts/theta_run_model.sh

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -40,17 +40,16 @@ framework=$4
4040
exp_id=$5
4141
run_id=$6
4242

43-
export KERAS_BACKEND=theano
44-
# Theta / Tensorflow env vars
45-
#export KMP_BLOCKTIME=30
46-
#export KMP_SETTINGS=1
47-
#export KMP_AFFINITY=granularity=fine,verbose,compact,1,0
48-
#export OMP_NUM_THREADS=144
49-
43+
# export KERAS_BACKEND=theano
5044
#export THEANO_FLAGS="config.base_compiledir=$instance_directory"
51-
export THEANO_FLAGS="base_compiledir=$instance_directory"
45+
#export THEANO_FLAGS="base_compiledir=$instance_directory"
5246

5347

48+
# Theta / Tensorflow env vars
49+
export KMP_BLOCKTIME=30
50+
export KMP_SETTINGS=1
51+
export KMP_AFFINITY=granularity=fine,verbose,compact,1,0
52+
export OMP_NUM_THREADS=144
5453

5554
export PYTHONHOME="/lus/theta-fs0/projects/Candle_ECP/ncollier/py2_tf_gcc6.3_eigen3_native"
5655
PYTHON="$PYTHONHOME/bin/python"
@@ -73,7 +72,7 @@ MODEL_CMD="python ${arg_array[@]}"
7372
set +e
7473
echo $MODEL_CMD
7574
$TIMEOUT_CMD python "${arg_array[@]}"
76-
sleep 60
75+
7776

7877
RES=$?
7978
if [ "$RES" -ne 0 ]; then

workflows/p2b1_mlrMBO/swift/cori_workflow3.sh

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -88,8 +88,7 @@ CMD_LINE_ARGS+="-param_set_file=$PARAM_SET_FILE "
8888
CMD_LINE_ARGS+="-exp_id=$EXPID "
8989

9090
# P2B1 requires theano -- doesn't work with tensor flow
91-
KERAS_BACKEND=theano
92-
91+
# KERAS_BACKEND=theano
9392

9493
# set machine to your scheduler type (e.g. pbs, slurm, cobalt etc.),
9594
# or empty for an immediate non-queued unscheduled run

workflows/p2b1_mlrMBO/swift/workflow.sh

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,8 @@ export EMEWS_PROJECT_ROOT=$( cd $( dirname $0 )/.. ; /bin/pwd )
1414
# See README.md for more information
1515

1616
# The directory in the Benchmarks repo containing P2B1
17-
BENCHMARK_DIR=$EMEWS_PROJECT_ROOT/../../../Benchmarks/Pilot2/P2B1
17+
BENCHMARK_DIR="$EMEWS_PROJECT_ROOT/../../../Benchmarks/common:"
18+
BENCHMARK_DIR+="$EMEWS_PROJECT_ROOT/../../../Benchmarks/Pilot2/P2B1"
1819

1920
# The number of MPI processes
2021
# Note that 2 processes are reserved for Swift/EMEMS
@@ -78,7 +79,7 @@ export RESIDENT_WORK_RANKS=$(( PROCS - 2 ))
7879
# EQ/R location
7980
EQR=$EMEWS_PROJECT_ROOT/ext/EQ-R
8081

81-
export KERAS_BACKEND=theano
82+
#export KERAS_BACKEND=theano
8283

8384
CMD_LINE_ARGS="$* -pp=$PROPOSE_POINTS -mi=$MAX_ITERATIONS -mb=$MAX_BUDGET -ds=$DESIGN_SIZE "
8485
CMD_LINE_ARGS+="-exp_id=$EXPID -param_set_file=$PARAM_SET_FILE "

workflows/p2b1_mlrMBO/swift/workflow3.swift

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -23,18 +23,16 @@ string code_template =
2323
"""
2424
import os
2525
outdir = '%s'
26-
os.environ['THEANO_FLAGS']="base_compiledir={}".format(outdir)
2726
2827
import p2b1_runner
2928
import json
30-
import theano
3129
3230
if not os.path.exists(outdir):
3331
os.makedirs(outdir)
3432
3533
hyper_parameter_map = json.loads('%s')
3634
hyper_parameter_map['framework'] = 'keras'
37-
hyper_parameter_map['save'] = '{}/output'.format(outdir)
35+
hyper_parameter_map['save_path'] = '{}/output'.format(outdir)
3836
hyper_parameter_map['instance_directory'] = outdir
3937
hyper_parameter_map['experiment_id'] = '%s'
4038
hyper_parameter_map['run_id'] = '%s'
@@ -132,9 +130,16 @@ max.budget = %d, max.iterations = %d, design.size=%d, propose.points=%d, param.s
132130

133131
(void o) log_start(string algorithm) {
134132
string ps = join(file_lines(input(param_set)), " ");
135-
string sys_env = join(file_lines(input("%s/turbine.log" % turbine_output)), ", ");
136-
string code = code_log_start % (propose_points, max_iterations, ps, algorithm, exp_id, sys_env);
137-
python_persist(code);
133+
string t_log = "%s/turbine.log" % turbine_output;
134+
if (file_exists(t_log)) {
135+
string sys_env = join(file_lines(input(t_log)), ", ");
136+
string code = code_log_start % (propose_points, max_iterations, ps, algorithm, exp_id, sys_env);
137+
python_persist(code);
138+
} else {
139+
string code = code_log_start % (propose_points, max_iterations, ps, algorithm, exp_id, "");
140+
python_persist(code);
141+
}
142+
138143
o = propagate();
139144
}
140145

Lines changed: 32 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,37 @@
11
# see https://cran.r-project.org/web/packages/ParamHelpers/ParamHelpers.pdfmakeNum
22
# the parameter names should match names of the arguments expected by the benchmark
33

4+
45
param.set <- makeParamSet(
5-
makeDiscreteParam("batch_size", values = c(16, 32, 64, 128, 256, 512)),
6-
makeIntegerParam("epochs", lower = 5, upper = 500),
7-
makeDiscreteParam("activation", values = c("softmax", "elu", "softplus", "softsign", "relu", "tanh", "sigmoid", "hard_sigmoid", "linear")),
8-
makeDiscreteParam("optimizer", values = c("adam", "sgd", "rmsprop", "adagrad", "adadelta")),
9-
makeNumericParam("dropout", lower = 0, upper = 0.9),
10-
makeNumericParam("learning_rate", lower = 0.00001, upper = 0.1)
11-
## DEBUG PARAMETERS: DON'T USE THESE IN PRODUCTION RUN
12-
## makeDiscreteParam("conv", values = c("32 20 16 32 10 1"))
6+
makeNumericParam("learning_rate", lower= 0.00001, upper= 0.1 ),
7+
makeNumericParam("dropout", lower= 0, upper= 0.9 ),
8+
makeDiscreteParam("activation",
9+
values= c( "softmax","elu","softplus","softsign",
10+
"relu", "tanh","sigmoid","hard_sigmoid",
11+
"linear") ),
12+
makeDiscreteParam("optimizer",
13+
values = c("adam", "sgd", "rmsprop","adagrad",
14+
"adadelta")),
15+
makeDiscreteParam("shared_nnet_spec",
16+
values= c( "400", "500", "600", "700"
17+
#"800", "900", "1000", "1100", "1200",
18+
#"400,400", "500,500", "600,600", "700,700",
19+
#"800,800", "900,900", "1000,1000", "1100,1100",
20+
#"1200,1200"
21+
) ),
22+
makeDiscreteParam("ind_nnet_spec",
23+
values= c( "400:400:400", "600:600:600"
24+
#"800:800:800", "1000:1000:1000",
25+
#"1200:1200:1200",
26+
#"400,400:400,400:400,400", "600,600:600,600:600,600",
27+
#"800,800:800,800:800,800", "1000,1000:1000,1000:1000,1000",
28+
#"1200,1200:1200,1200:1200,1200",
29+
#"800,400:800,400:800,400",
30+
#"1200,400:1200,400:1200,400",
31+
#"1200,800,400:1200,800,400:1200,800,400"
32+
)),
33+
makeDiscreteParam("batch_size", values = c(16,32,64,128,256)),
34+
makeIntegerParam("epochs", lower = 5, upper = 50)
1335
)
36+
37+

0 commit comments

Comments
 (0)