Skip to content

Commit f0ba1de

Browse files
committed
2 parents e160a84 + b18bd15 commit f0ba1de

File tree

10 files changed

+155
-66
lines changed

10 files changed

+155
-66
lines changed

workflows/common/sh/langs-theta.sh

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
2+
# LANGS Theta
3+
# Language settings for Theta (Swift, Python, R, Tcl, etc.)
4+
5+
TCL=/home/wozniak/Public/sfw/theta/tcl-8.6.1
6+
export R=/home/wozniak/Public/sfw/theta/R-3.4.0/lib64/R
7+
export PY=/home/wozniak/Public/sfw/theta/Python-2.7.12
8+
export LD_LIBRARY_PATH=$PY/lib:$R/lib:$LD_LIBRARY_PATH
9+
COMMON_DIR=$EMEWS_PROJECT_ROOT/../common/python
10+
PYTHONPATH=$EMEWS_PROJECT_ROOT/python:$BENCHMARK_DIR:$COMMON_DIR
11+
PYTHONHOME=/home/wozniak/Public/sfw/theta/Python-2.7.12
12+
13+
# STC=/home/wozniak/Public/sfw/theta/swift-t-pyr/stc
14+
STC=/projects/Candle_ECP/swift/pyr/stc
15+
16+
export PATH=$STC/bin:$TCL/bin:$PATH

workflows/common/sh/utils.sh

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,10 +35,13 @@ EOF
3535
fi
3636

3737
export EXPID=$1
38-
if [ $EXPID = "-a" ]; then
38+
if [ $EXPID = "-a" ]
39+
then
3940
export TURBINE_OUTPUT_ROOT=$EMEWS_PROJECT_ROOT/experiments
40-
export TURBINE_OUTPUT_FORMAT=X%Q
41-
EXPID=SWIFT
41+
# Creates a X + a unique integer padded to 3 digits: e.g., X023
42+
export TURBINE_OUTPUT_FORMAT="X%Q"
43+
EXPID="AUTO"
44+
shift
4245
else
4346
export TURBINE_OUTPUT=$EMEWS_PROJECT_ROOT/experiments/$EXPID
4447
check_directory_exists
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
module load gcc
2+
export PATH=$PATH:/ccs/home/wozniak/Public/sfw/swig-3.0.2/bin
3+
4+
# TITAN BUILD SETTINGS
5+
6+
R_HOME=/sw/xk6/r/3.3.2/sles11.3_gnu4.9.3x/lib64/R
7+
R_INCLUDE=$R_HOME/include
8+
R_LIB=$R_HOME/lib
9+
R_INSIDE=$R_HOME/library/RInside
10+
RCPP=$R_HOME/library/Rcpp
11+
12+
#system-wide tcl
13+
TCL=/ccs/home/wozniak/Public/sfw/tcl-8.6.2
14+
TCL_INCLUDE=$TCL/include
15+
TCL_LIB=$TCL/lib
16+
TCL_LIBRARY=tcl8.6
17+
export PATH=$PATH:/ccs/home/wozniak/Public/sfw/tcl-8.6.2/bin
18+
19+
CPPFLAGS=""
20+
CPPFLAGS+="-I$TCL_INCLUDE "
21+
CPPFLAGS+="-I$R_INCLUDE "
22+
CPPFLAGS+="-I$RCPP/include "
23+
CPPFLAGS+="-I$R_INSIDE/include "
24+
CXXFLAGS=$CPPFLAGS
25+
26+
LDFLAGS=""
27+
LDFLAGS+="-L$R_INSIDE/lib -lRInside "
28+
LDFLAGS+="-L$R_LIB -lR -lRblas "
29+
LDFLAGS+="-L$TCL_LIB -l$TCL_LIBRARY "
30+
LDFLAGS+="-Wl,-rpath -Wl,$TCL_LIB "
31+
LDFLAGS+="-Wl,-rpath -Wl,$R_LIB "
32+
LDFLAGS+="-Wl,-rpath -Wl,$R_INSIDE/lib"
33+
34+
export CPPFLAGS CXXFLAGS LDFLAGS

workflows/p3b1_mlrMBO/R/mlrMBO3.R

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,8 @@ parallelMap2 <- function(fun, ...,
2525
else{
2626
dots <- list(...)
2727
string_params <- elements_of_lists_to_json(dots[[1L]])
28-
print(paste0("parallelMap2 called with list_param: ",string_params))
28+
# print(paste0("parallelMap2 called with list_param: ",string_params))
29+
# print(paste0("parallelMap2 called with list of length: ", ???)
2930
OUT_put(string_params)
3031
string_results = IN_get()
3132

workflows/p3b1_mlrMBO/etc/emews_utils.sh

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
log_script() {
2+
# usage: Provide shell script for inclusion in log
23
SCRIPT_NAME=$(basename $0)
34
mkdir -p $TURBINE_OUTPUT
4-
LOG_NAME="${TURBINE_OUTPUT}/${SCRIPT_NAME}.log"
5+
LOG_NAME="${TURBINE_OUTPUT}/emews.log"
56
echo "### VARIABLES ###" > $LOG_NAME
67
set +u
78
VARS=( "EMEWS_PROJECT_ROOT" "EXPID" "TURBINE_OUTPUT" \
@@ -24,7 +25,7 @@ log_script() {
2425

2526
echo "" >> $LOG_NAME
2627
echo "## SCRIPT ###" >> $LOG_NAME
27-
cat $EMEWS_PROJECT_ROOT/swift/$SCRIPT_NAME >> $LOG_NAME
28+
cat $1 >> $LOG_NAME
2829
}
2930

3031
check_directory_exists() {

workflows/p3b1_mlrMBO/python/p3b1_runner.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,14 +8,39 @@
88
import os
99
import p3b1
1010
import runner_utils
11+
import socket
12+
13+
node_pid = "%s,%i" % (socket.gethostname(), os.getpid())
14+
print("node,pid: " + node_pid)
15+
16+
logger = None
17+
18+
def get_logger():
19+
""" Set up logging """
20+
global logger
21+
if logger is not None:
22+
return logger
23+
import logging, sys
24+
logger = logging.getLogger(__name__)
25+
logger.setLevel(logging.DEBUG)
26+
h = logging.StreamHandler(stream=sys.stdout)
27+
fmtr = logging.Formatter('%(asctime)s %(name)s %(levelname)-9s %(message)s',
28+
datefmt='%Y/%m/%d %H:%M:%S')
29+
h.setFormatter(fmtr)
30+
logger.addHandler(h)
31+
return logger
1132

1233
def run(hyper_parameter_map):
34+
35+
logger = get_logger()
1336
framework = hyper_parameter_map['framework']
37+
logger.debug("IMPORT START")
1438
if framework == 'keras':
1539
import p3b1_baseline_keras2
1640
pkg = p3b1_baseline_keras2
1741
else:
1842
raise ValueError("Unsupported framework: {}".format(framework))
43+
logger.debug("IMPORT STOP")
1944

2045
# params is python dictionary
2146
params = pkg.initialize_parameters()
@@ -26,8 +51,12 @@ def run(hyper_parameter_map):
2651
# raise Exception("Parameter '{}' not found in set of valid arguments".format(k))
2752
params[k] = v
2853

54+
logger.debug("WRITE_PARAMS START")
2955
runner_utils.write_params(params, hyper_parameter_map)
56+
logger.debug("WRITE_PARAMS STOP")
57+
logger.debug("DO_N_FOLD START")
3058
avg_loss = pkg.do_n_fold(params)
59+
logger.debug("DO_N_FOLD STOP")
3160

3261
if framework == 'keras':
3362
# works around this error:
@@ -41,16 +70,24 @@ def run(hyper_parameter_map):
4170
return avg_loss
4271

4372
if __name__ == '__main__':
73+
logger = get_logger()
74+
logger.debug("RUN START")
75+
4476
param_string = sys.argv[1]
4577
instance_directory = sys.argv[2]
4678
framework = sys.argv[3]
4779
exp_id = sys.argv[4]
4880
run_id = sys.argv[5]
81+
logger.debug("RUN INIT START")
4982
hyper_parameter_map = runner_utils.init(param_string, instance_directory,
5083
framework, 'save_path')
84+
logger.debug("RUN INIT STOP")
5185
hyper_parameter_map['experiment_id'] = exp_id
5286
hyper_parameter_map['run_id'] = run_id
5387
# clear sys.argv so that argparse doesn't object
5488
sys.argv = ['p3b1_runner']
5589
result = run(hyper_parameter_map)
90+
logger.debug("WRITE OUTPUT START")
5691
runner_utils.write_output(result, instance_directory)
92+
logger.debug("WRITE OUTPUT STOP")
93+
logger.debug("RUN STOP")

workflows/p3b1_mlrMBO/scripts/theta_run_model.sh

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -61,18 +61,29 @@ export PYTHONPATH
6161
arg_array=("$emews_root/python/p3b1_runner.py" "$parameter_string" "$instance_directory" "$framework" "$exp_id" "$run_id")
6262
MODEL_CMD="python ${arg_array[@]}"
6363

64+
msg()
65+
{
66+
echo "theta_run_model.sh: $*"
67+
}
68+
6469
# Turn bash error checking off. This is
6570
# required to properly handle the model execution return value
6671
# the optional timeout.
6772
set +e
68-
echo $MODEL_CMD
69-
$TIMEOUT_CMD python "${arg_array[@]}"
7073

74+
# Format and report model parameters
75+
# as represented on Python command line:
76+
msg MODEL_CMD: $MODEL_CMD | \
77+
tr -d "{}\"" | \
78+
tr "," " " | \
79+
fmt -t -w 1
80+
echo
81+
$TIMEOUT_CMD $MODEL_CMD
7182
RES=$?
7283
if [ "$RES" -ne 0 ]; then
73-
if [ "$RES" == 124 ]; then
74-
echo "---> Timeout error in $MODEL_CMD"
84+
if [ "$RES" == 124 ]; then
85+
msg "---> Timeout error in MODEL_CMD"
7586
else
76-
echo "---> Error in $MODEL_CMD"
87+
msg "---> Error in MODEL_CMD"
7788
fi
7889
fi

workflows/p3b1_mlrMBO/swift/ai_workflow3.swift

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@ file model_script = input(argv("script_file"));
2121
file log_script = input(argv("log_script"));
2222
string exp_id = argv("exp_id");
2323

24+
printf("TURBINE_OUTPUT: %s", turbine_output);
25+
2426
string FRAMEWORK = "keras";
2527

2628

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
#!/bin/bash
2+
set -eu
3+
4+
source $EMEWS_PROJECT_ROOT/etc/emews_utils.sh
5+
6+
log_script $EMEWS_PROJECT_ROOT/swift/theta_workflow.sh
7+
Lines changed: 32 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#! /usr/bin/env bash
22
set -eu
33

4-
# CORI WORKFLOW
4+
# THETA WORKFLOW
55
# Main entry point for P1B3 mlrMBO workflow
66

77
# Autodetect this workflow directory
@@ -19,17 +19,15 @@ BENCHMARK_DIR=$EMEWS_PROJECT_ROOT/../../../Benchmarks/Pilot3/P3B1
1919
export PROCS=${PROCS:-10}
2020

2121
# MPI processes per node
22-
# Cori has 32 cores per node, 128GB per node
22+
# (Theta has 64 cores per node, 192GB per node)
2323
export PPN=${PPN:-1}
2424

25-
2625
export QUEUE=${QUEUE:-default}
2726
export WALLTIME=${WALLTIME:-02:00:00}
2827

2928
# mlrMBO settings
3029
# How many to runs evaluate per iteration
3130

32-
3331
MAX_BUDGET=${MAX_BUDGET:-110}
3432
# Total iterations
3533
MAX_ITERATIONS=${MAX_ITERATIONS:-4}
@@ -42,83 +40,62 @@ PARAM_SET_FILE=${PARAM_SET_FILE:-$EMEWS_PROJECT_ROOT/data/parameter_set3.R}
4240

4341
# USER SETTINGS END
4442

45-
4643
# Source some utility functions used by EMEWS in this script
4744
source "${EMEWS_PROJECT_ROOT}/etc/emews_utils.sh"
4845

49-
if [ "$#" -ne 1 ]; then
50-
script_name=$(basename $0)
51-
echo "Usage: ${script_name} EXPERIMENT_ID (e.g. ${script_name} experiment_1)"
52-
exit 1
53-
fi
46+
WORKFLOWS_ROOT=$( cd $EMEWS_PROJECT_ROOT ; cd .. ; /bin/pwd )
47+
source $WORKFLOWS_ROOT/common/sh/langs-theta.sh
48+
source $WORKFLOWS_ROOT/common/sh/utils.sh
5449

5550
# uncomment to turn on swift/t logging. Can also set TURBINE_LOG,
5651
# TURBINE_DEBUG, and ADLB_DEBUG to 0 to turn off logging
57-
#export TURBINE_LOG=1 TURBINE_DEBUG=1 ADLB_DEBUG=1
52+
# export TURBINE_LOG=1 # TURBINE_DEBUG=1 ADLB_DEBUG=1
5853

59-
export EXPID=$1
60-
export TURBINE_OUTPUT_ROOT=${TURBINE_OUTPUT_ROOT:-$EMEWS_PROJECT_ROOT/experiments}
61-
export TURBINE_OUTPUT=$TURBINE_OUTPUT_ROOT/$EXPID
62-
check_directory_exists
54+
get_expid $*
6355

6456
export TURBINE_JOBNAME="${EXPID}_job"
6557

66-
# if R cannot be found, then these will need to be
67-
# uncommented and set correctly.
68-
# export R_HOME=/path/to/R
69-
# export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$R_HOME/lib
70-
# export PYTHONHOME=
71-
72-
73-
74-
TCL=/home/wozniak/Public/sfw/theta/tcl-8.6.1
75-
export R=/home/wozniak/Public/sfw/theta/R-3.4.0/lib64/R
76-
export PY=/home/wozniak/Public/sfw/theta/Python-2.7.12
77-
export LD_LIBRARY_PATH=$PY/lib:$R/lib:$LD_LIBRARY_PATH
78-
COMMON_DIR=$EMEWS_PROJECT_ROOT/../common/python
79-
PYTHONPATH=$EMEWS_PROJECT_ROOT/python:$BENCHMARK_DIR:$COMMON_DIR
80-
PYTHONHOME=/home/wozniak/Public/sfw/theta/Python-2.7.12
81-
82-
export PATH=/home/wozniak/Public/sfw/theta/swift-t-pyr/stc/bin:$TCL/bin:$PATH
83-
#$PYTHONHOME/bin:$TCL/bin:$PATH
84-
8558
# Resident task workers and ranks
8659
export TURBINE_RESIDENT_WORK_WORKERS=1
8760
export RESIDENT_WORK_RANKS=$(( PROCS - 2 ))
8861

8962
# EQ/R location
9063
EQR=$EMEWS_PROJECT_ROOT/ext/EQ-R
9164

92-
CMD_LINE_ARGS="$* -pp=$PROPOSE_POINTS -mi=$MAX_ITERATIONS -mb=$MAX_BUDGET -ds=$DESIGN_SIZE "
93-
CMD_LINE_ARGS+="-param_set_file=$PARAM_SET_FILE -script_file=$EMEWS_PROJECT_ROOT/scripts/theta_run_model.sh "
94-
CMD_LINE_ARGS+="-exp_id=$EXPID -log_script=$EMEWS_PROJECT_ROOT/../common/sh/theta_run_logger.sh"
65+
SCRIPT_FILE=$EMEWS_PROJECT_ROOT/scripts/theta_run_model.sh
66+
LOG_SCRIPT=$EMEWS_PROJECT_ROOT/../common/sh/theta_run_logger.sh
9567

96-
TURBINE_DIR=/home/wozniak/Public/sfw/theta/swift-t-pyr/turbine/lib
68+
CMD_LINE_ARGS=( $*
69+
-exp_id=$EXPID
70+
-pp=$PROPOSE_POINTS
71+
-mi=$MAX_ITERATIONS
72+
-mb=$MAX_BUDGET
73+
-ds=$DESIGN_SIZE
74+
-param_set_file=$PARAM_SET_FILE
75+
-script_file=$SCRIPT_FILE
76+
-log_script=$LOG_SCRIPT
77+
)
9778

98-
# set machine to your scheduler type (e.g. pbs, slurm, cobalt etc.),
99-
# or empty for an immediate non-queued unscheduled run
100-
MACHINE="theta"
101-
102-
if [ -n "$MACHINE" ]; then
103-
MACHINE="-m $MACHINE"
104-
fi
79+
TURBINE_DIR=/home/wozniak/Public/sfw/theta/swift-t-pyr/turbine/lib
10580

10681
# Add any script variables that you want to log as
10782
# part of the experiment meta data to the USER_VARS array,
10883
# for example, USER_VARS=("VAR_1" "VAR_2")
10984
USER_VARS=($CMD_LINE_ARGS)
11085
# log variables and script to to TURBINE_OUTPUT directory
111-
log_script
86+
# log_script
11287

11388
# echo's anything following this to standard out
11489
set -x
115-
WORKFLOW_SWIFT=ai_workflow3.swift
116-
swift-t -n $PROCS $MACHINE -p -I $EQR -r $EQR -r $TURBINE_DIR \
90+
WORKFLOW_SWIFT=$EMEWS_PROJECT_ROOT/swift/ai_workflow3.swift
91+
swift-t -m theta \
92+
-n $PROCS \
93+
-p -I $EQR -r $EQR -r $TURBINE_DIR \
94+
-t i:$EMEWS_PROJECT_ROOT/swift/init-theta.sh \
11795
-e LD_LIBRARY_PATH=$LD_LIBRARY_PATH \
118-
-e TURBINE_RESIDENT_WORK_WORKERS=$TURBINE_RESIDENT_WORK_WORKERS \
119-
-e RESIDENT_WORK_RANKS=$RESIDENT_WORK_RANKS \
120-
-e EMEWS_PROJECT_ROOT=$EMEWS_PROJECT_ROOT \
121-
-e PYTHONPATH=$PYTHONPATH \
122-
-e PYTHONHOME=$PYTHONHOME \
123-
-e TURBINE_OUTPUT=$TURBINE_OUTPUT \
124-
$EMEWS_PROJECT_ROOT/swift/$WORKFLOW_SWIFT $CMD_LINE_ARGS
96+
-e TURBINE_RESIDENT_WORK_WORKERS \
97+
-e RESIDENT_WORK_RANKS \
98+
-e EMEWS_PROJECT_ROOT \
99+
-e PYTHONPATH=$PYTHONPATH \
100+
-e PYTHONHOME=$PYTHONHOME \
101+
$WORKFLOW_SWIFT ${CMD_LINE_ARGS[@]}

0 commit comments

Comments
 (0)