Skip to content

Commit 6d522d7

Browse files
author
acadev
authored
Added titan_workflow.sh file
TITAN specific workflow changes for running.
1 parent f38119b commit 6d522d7

File tree

1 file changed

+128
-0
lines changed

1 file changed

+128
-0
lines changed
Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,128 @@
1+
#! /usr/bin/env bash
2+
set -eu
3+
4+
# CORI WORKFLOW
5+
# Main entry point for P1B3 mlrMBO workflow
6+
7+
# Autodetect this workflow directory
8+
export EMEWS_PROJECT_ROOT=$( cd $( dirname $0 )/.. ; /bin/pwd )
9+
10+
# USER SETTINGS START
11+
12+
# See README.md for more information
13+
14+
BENCHMARK_DIR=$EMEWS_PROJECT_ROOT/../../../Benchmarks/Pilot3/P3B1
15+
16+
# The number of MPI processes
17+
# Note that 2 processes are reserved for Swift/EMEMS
18+
# The default of 4 gives you 2 workers, i.e., 2 concurrent Keras runs
19+
export PROCS=258
20+
21+
# MPI processes per node
22+
# Cori has 32 cores per node, 128GB per node
23+
export PPN=1
24+
export QUEUE=batch
25+
export WALLTIME=02:00:00
26+
27+
# mlrMBO settings
28+
# How many to runs evaluate per iteration
29+
30+
31+
MAX_BUDGET=${MAX_BUDGET:-1200}
32+
# Total iterations
33+
MAX_ITERATIONS=${MAX_ITERATIONS:-3}
34+
DESIGN_SIZE=${DESIGN_SIZE:-300}
35+
PROPOSE_POINTS=${PROPOSE_POINTS:-300}
36+
PARAM_SET_FILE=${PARAM_SET_FILE:-$EMEWS_PROJECT_ROOT/data/parameter_set3.R}
37+
38+
# pbalabra:
39+
# PARAM_SET_FILE="$EMEWS_PROJECT_ROOT/data/parameter_set1.R"
40+
41+
# USER SETTINGS END
42+
43+
44+
# Source some utility functions used by EMEWS in this script
45+
source "${EMEWS_PROJECT_ROOT}/etc/emews_utils.sh"
46+
47+
if [ "$#" -ne 1 ]; then
48+
script_name=$(basename $0)
49+
echo "Usage: ${script_name} EXPERIMENT_ID (e.g. ${script_name} experiment_1)"
50+
exit 1
51+
fi
52+
53+
# uncomment to turn on swift/t logging. Can also set TURBINE_LOG,
54+
# TURBINE_DEBUG, and ADLB_DEBUG to 0 to turn off logging
55+
export TURBINE_LOG=1 TURBINE_DEBUG=1 ADLB_DEBUG=1
56+
57+
export EXPID=$1
58+
export TURBINE_OUTPUT=$EMEWS_PROJECT_ROOT/experiments/$EXPID
59+
check_directory_exists
60+
61+
export TURBINE_JOBNAME="${EXPID}_job"
62+
63+
# if R cannot be found, then these will need to be
64+
# uncommented and set correctly.
65+
# export R_HOME=/path/to/R
66+
# export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$R_HOME/lib
67+
# export PYTHONHOME=
68+
69+
70+
71+
TCL=/sw/xk6/tcl_tk/8.5.8/sles11.1_gnu4.5.3
72+
export R=/sw/xk6/r/3.3.2/sles11.3_gnu4.9.3x/lib64/R
73+
export PY=/sw/xk6/deeplearning/1.0/sles11.3_gnu4.9.3
74+
export LD_LIBRARY_PATH=$PY/lib:$R/lib:$LD_LIBRARY_PATH
75+
COMMON_DIR=$EMEWS_PROJECT_ROOT/../common/python
76+
PYTHONPATH=$EMEWS_PROJECT_ROOT/python:$BENCHMARK_DIR:$COMMON_DIR
77+
PYTHONHOME=/sw/xk6/deeplearning/1.0/sles11.3_gnu4.9.3
78+
79+
export PATH=/lustre/atlas2/csc249/proj-shared/sfw/swift-t/stc/bin/:$TCL/bin:$PATH
80+
#$PYTHONHOME/bin:$TCL/bin:$PATH
81+
82+
# Resident task workers and ranks
83+
export TURBINE_RESIDENT_WORK_WORKERS=1
84+
export RESIDENT_WORK_RANKS=$(( PROCS - 2 ))
85+
86+
# EQ/R location
87+
EQR=$EMEWS_PROJECT_ROOT/ext/EQ-R
88+
89+
CMD_LINE_ARGS="$* -pp=$PROPOSE_POINTS -mi=$MAX_ITERATIONS -mb=$MAX_BUDGET -ds=$DESIGN_SIZE "
90+
CMD_LINE_ARGS+="-param_set_file=$PARAM_SET_FILE -script_file=$EMEWS_PROJECT_ROOT/scripts/titan_run_model.sh "
91+
CMD_LINE_ARGS+="-exp_id=$EXPID -log_script=$EMEWS_PROJECT_ROOT/../common/sh/titan_run_logger.sh"
92+
93+
# set machine to your scheduler type (e.g. pbs, slurm, cobalt etc.),
94+
# or empty for an immediate non-queued unscheduled run
95+
MACHINE="cray"
96+
97+
if [ -n "$MACHINE" ]; then
98+
MACHINE="-m $MACHINE"
99+
fi
100+
101+
# Add any script variables that you want to log as
102+
# part of the experiment meta data to the USER_VARS array,
103+
# for example, USER_VARS=("VAR_1" "VAR_2")
104+
USER_VARS=($CMD_LINE_ARGS)
105+
# log variables and script to to TURBINE_OUTPUT directory
106+
log_script
107+
108+
LD_LIBRARY_PATH=/sw/xk6/deeplearning/1.0/sles11.3_gnu4.9.3/lib:/sw/xk6/deeplearning/1.0/sles11.3_gnu4.9.3/cuda/lib64:/opt/gcc/4.9.3/snos/lib64:/sw/xk6/r/3.3.2/sles11.3_gnu4.9.3x/lib64/R/lib
109+
SWIFT=/lustre/atlas2/csc249/proj-shared/sfw/swift-t/stc/bin/swift-t
110+
export PROJECT=CSC249ADOA01
111+
export TITAN=true
112+
113+
# echo's anything following this to standard out
114+
set -x
115+
WORKFLOW_SWIFT=ai_workflow3.swift
116+
$SWIFT -m cray -n $PROCS\
117+
-e LD_LIBRARY_PATH=$LD_LIBRARY_PATH \
118+
-p -I $EQR -r $EQR \
119+
-e TURBINE_RESIDENT_WORK_WORKERS=$TURBINE_RESIDENT_WORK_WORKERS \
120+
-e RESIDENT_WORK_RANKS=$RESIDENT_WORK_RANKS \
121+
-e EMEWS_PROJECT_ROOT=$EMEWS_PROJECT_ROOT \
122+
-e PYTHONPATH=$PYTHONPATH \
123+
-e PYTHONHOME=$PYTHONHOME \
124+
-e TURBINE_LOG=$TURBINE_LOG \
125+
-e TURBINE_DEBUG=$TURBINE_DEBUG\
126+
-e ADLB_DEBUG=$ADLB_DEBUG \
127+
-e TURBINE_OUTPUT=$TURBINE_OUTPUT \
128+
$EMEWS_PROJECT_ROOT/swift/$WORKFLOW_SWIFT $CMD_LINE_ARGS

0 commit comments

Comments
 (0)