ECP-CANDLE
diff --git a/‎docs/home.adoc‎
Lines changed: 16 additions & 0 deletions b/‎docs/home.adoc‎
Lines changed: 16 additions & 0 deletions
diff --git a/‎docs/home.html‎
Lines changed: 17 additions & 1 deletion b/‎docs/home.html‎
Lines changed: 17 additions & 1 deletion
diff --git a/‎workflows/common/python/runner_utils.py‎
Lines changed: 9 additions & 3 deletions b/‎workflows/common/python/runner_utils.py‎
Lines changed: 9 additions & 3 deletions
diff --git a/‎workflows/nt3_mlrMBO/README.md‎
Lines changed: 15 additions & 5 deletions b/‎workflows/nt3_mlrMBO/README.md‎
Lines changed: 15 additions & 5 deletions
diff --git a/‎workflows/nt3_mlrMBO/python/log_runner.py‎
Lines changed: 27 additions & 0 deletions b/‎workflows/nt3_mlrMBO/python/log_runner.py‎
Lines changed: 27 additions & 0 deletions
diff --git a/‎workflows/nt3_mlrMBO/python/nt3_tc1_runner.py‎
Lines changed: 6 additions & 3 deletions b/‎workflows/nt3_mlrMBO/python/nt3_tc1_runner.py‎
Lines changed: 6 additions & 3 deletions
diff --git a/‎workflows/nt3_mlrMBO/python/test/run_test_app_invoke.sh‎
Lines changed: 5 additions & 2 deletions b/‎workflows/nt3_mlrMBO/python/test/run_test_app_invoke.sh‎
Lines changed: 5 additions & 2 deletions
diff --git a/‎workflows/nt3_mlrMBO/scripts/run_logger.sh‎
Lines changed: 17 additions & 0 deletions b/‎workflows/nt3_mlrMBO/scripts/run_logger.sh‎
Lines changed: 17 additions & 0 deletions
diff --git a/‎workflows/nt3_mlrMBO/scripts/run_model.sh‎
Lines changed: 9 additions & 5 deletions b/‎workflows/nt3_mlrMBO/scripts/run_model.sh‎
Lines changed: 9 additions & 5 deletions
diff --git a/‎workflows/nt3_mlrMBO/scripts/theta_run_logger.sh‎
Lines changed: 24 additions & 0 deletions b/‎workflows/nt3_mlrMBO/scripts/theta_run_logger.sh‎
Lines changed: 24 additions & 0 deletions
@@ -43,6 +43,22 @@ Other Theta ESP notes are here: https://collab.cels.anl.gov/display/ESP
 This uses the system-installed Python with ML libs at: +
 +/usr/common/software/python/2.7-anaconda/envs/deeplearning+
 
+[[titan]]
+* https://www.olcf.ornl.gov/titan[Titan]
++
+This is a CANDLE-only installation.  It uses the OLCF-provided Python  +deeplearning+ module (Python 3.6 plus TensorFlow, Theano, and Keras) and R 3.3.2 .
++
+Add to +PATH+: +/lustre/atlas2/csc249/proj-shared/sfw/swift-t/stc/bin+
++
+Run with:
++
+----
+$ export TITAN=true
+$ export PROJECT=... QUEUE=...
+$ export LD_LIBRARY_PATH=/sw/xk6/deeplearning/1.0/sles11.3_gnu4.9.3/lib:/sw/xk6/deeplearning/1.0/sles11.3_gnu4.9.3/cuda/lib64:/opt/gcc/4.9.3/snos/lib64:/sw/xk6/r/3.3.2/sles11.3_gnu4.9.3x/lib64/R/lib
+$ swift-t -m cray -e LD_LIBRARY_PATH=$LD_LIBRARY_PATH workflow.swift
+----
+
 * http://swift-lang.github.io/swift-t/sites.html#cooley_candle[Cooley]
 +
 This uses the system-installed Python with ML libs at: +
 
@@ -803,6 +803,21 @@ <h2 id="_swift_installations">Swift installations</h2>
 </li>
 <li>
 <p>
+<a href="https://www.olcf.ornl.gov/titan">Titan</a>
+</p>
+<div class="paragraph" id="titan"><p>This is a CANDLE-only installation.  It uses the OLCF-provided Python  <code>deeplearning</code> module (Python 3.6 plus TensorFlow, Theano, and Keras) and R 3.3.2 .</p></div>
+<div class="paragraph"><p>Add to <code>PATH</code>: <code>/lustre/atlas2/csc249/proj-shared/sfw/swift-t/stc/bin</code></p></div>
+<div class="paragraph"><p>Run with:</p></div>
+<div class="listingblock">
+<div class="content">
+<pre><code>$ export TITAN=true
+$ export PROJECT=... QUEUE=...
+$ export LD_LIBRARY_PATH=/sw/xk6/deeplearning/1.0/sles11.3_gnu4.9.3/lib:/sw/xk6/deeplearning/1.0/sles11.3_gnu4.9.3/cuda/lib64:/opt/gcc/4.9.3/snos/lib64:/sw/xk6/r/3.3.2/sles11.3_gnu4.9.3x/lib64/R/lib
+$ swift-t -m cray -e LD_LIBRARY_PATH=$LD_LIBRARY_PATH workflow.swift</code></pre>
+</div></div>
+</li>
+<li>
+<p>
 <a href="http://swift-lang.github.io/swift-t/sites.html#cooley_candle">Cooley</a>
 </p>
 <div class="paragraph"><p>This uses the system-installed Python with ML libs at:<br />
@@ -834,7 +849,8 @@ <h2 id="_swift_installations">Swift installations</h2>
 <div id="footnotes"><hr /></div>
 <div id="footer">
 <div id="footer-text">
-Last updated 2017-06-07 11:35:47 CDT
+Last updated
+ 2017-06-21 13:21:24 CDT
 </div>
 </div>
 </body>
 
@@ -1,15 +1,21 @@
 import numpy as np
 import json, os
 
+try:
+  basestring
+except NameError:
+  basestring = str
+
 DATA_TYPES = {type(np.float16): 'f16', type(np.float32): 'f32', type(np.float64): 'f64'}
 
 def write_output(result, instance_directory):
     with open('{}/result.txt'.format(instance_directory), 'w') as f_out:
         f_out.write("{}\n".format(result))
 
-def init(param_file, instance_directory, framework, out_dir_key):
-    with open(param_file) as f_in:
-        hyper_parameter_map = json.load(f_in)
+def init(param_string, instance_directory, framework, out_dir_key):
+    #with open(param_file) as f_in:
+    #    hyper_parameter_map = json.load(f_in)
+    hyper_parameter_map = json.loads(param_string.strip())
 
     if not os.path.exists(instance_directory):
         os.makedirs(instance_directory)
 
@@ -2,7 +2,7 @@
 
 The NT3 mlrMBO workflow evaluates the NT3 benchmark
 using hyperparameters provided by a mlrMBO instance. mlrMBO
-minimizes the TODO. Swift is used to scalably distribute
+minimizes the validation loss. Swift is used to scalably distribute
 work across the system, and EMEWS is used to:
 
 1. Pass the hyperparameters to evaluate from the running mlrMBO algorithm to
@@ -95,7 +95,11 @@ nt3_mlrMBO/
  * `swift/workflow.sh` - generic launch script to set the appropriate enviroment variables etc. and then launch the swift workflow script
  * `swift/cori_workflow3.sh` - launch script customized for the Cori supercomputer
  * `swift/cori_settings.sh` - settings for running on the Cori supercomputer
- * `swift/ai_workflow3.swift` - app invocation ("ai") version (see below) of the swift workflow
+ * `swift/ai_workflow.sh` - launch script for running the app invocation ("ai") workflow (see below).
+ * `swift/ai_workflow3.swift` - app invocation version (see below) of the swift workflow
+ * `swift/theta_workflow.sh` - launch script for running on theta. This uses the app invocation workflow.
+ * `scripts/theta_run_model.sh` - theta-specific bash script used to launch nt3_runner.py
+ * `scripts/run_model.sh` - generic bash script used to to launch nt3_runner.py
 
 ## Running the Workflow ##
 
@@ -104,7 +108,9 @@ There are two different versions of the workflow.
 1. The first runs the benchmark code directly from within swift using swift's
 python integration.
 2. The second, the _ai_-version, runs the benchmark code by invoking the python interpreter using
-a bash script which is in turn invoked using a swift app function.
+a bash script which is in turn invoked using a swift app function.  The bash scripts
+`scripts/theta_run_model.sh` and `scripts/run_model.sh` are an example of the
+bash script.
 
 The latter of these is necessary on machines like Theta where it is not possible
 to compile swift with an appropriate python.
@@ -139,8 +145,12 @@ the workflow is run, by defining which swift is actually run.
    * Set to `$EMEWS_PROJECT_ROOT\swift\workflow3.swift` to run the benchmarks via swift's integrated python.
    * Set to `$EMEWS_PROJECT_ROOT\swift\ai_workflow3.swift` to run the benchmarks via a swift
    app function.
-* `SCRIPT_FILE` - the bash script used to run benchmark when the benchmark is
-run via a swift app function.
+
+ If you need to run the _ai_-version of the workflow, there is an addtional shell
+variable to set:
+
+* `SCRIPT_FILE` - the path to the bash script that is used to launch the python
+   benchmark runner code (e.g. `scripts/run_model.sh`).
 
 If running on an HPC machine, set `PROCS`, `PPN`, `QUEUE`, `WALLTIME` and `MACHINE`
 as appropriate.
 
@@ -0,0 +1,27 @@
+import sys
+import exp_logger
+
+def log_start():
+    parameter_map = {}
+    parameter_map['pp'] = sys.argv[2]
+    parameter_map['iterations'] = sys.argv[3]
+    parameter_map['params'] = "\"\"\"{}\"\"\"".format(sys.argv[4])
+    parameter_map['algorithm'] = sys.argv[5]
+    parameter_map['experiment_id'] = sys.argv[6]
+    sys_env = "\"\"\"{}\"\"\"".format(sys.argv[7])
+
+    exp_logger.start(parameter_map, sys_env)
+
+def log_end():
+    exp_id = sys.argv[2]
+    exp_logger.end(exp_id)
+
+def main():
+    print(sys.argv)
+    if sys.argv[1] == 'start':
+        log_start()
+    else:
+        log_end()
+
+if __name__ == '__main__':
+    main()
@@ -55,13 +55,16 @@ def run(hyper_parameter_map):
     return val_loss[-1]
 
 if __name__ == '__main__':
-    param_file = sys.argv[1]
+    param_string = sys.argv[1]
     instance_directory = sys.argv[2]
     model_name = sys.argv[3]
     framework = sys.argv[4]
-    hyper_parameter_map = runner_utils.init(param_file, instance_directory,
-                                            framework, 'save')
+    exp_id = sys.argv[5]
+    run_id = sys.argv[6]
+    hyper_parameter_map = runner_utils.init(param_string, instance_directory, framework, 'save')
     hyper_parameter_map['model_name'] = model_name
+    hyper_parameter_map['experiment_id'] = exp_id
+    hyper_parameter_map['run_id'] = run_id
     # clear sys.argv so that argparse doesn't object
     sys.argv = ['nt3_tc1_runner']
     result = run(hyper_parameter_map)
 
@@ -2,7 +2,10 @@
 
 NT3_DIR=../../../../../Benchmarks/Pilot1/NT3
 TC1_DIR=../../../../../Benchmarks/Pilot1/TC1
+COMMON_DIR="../../../common/python"
 
-export PYTHONPATH="$PWD/..:$NT3_DIR:$TC1_DIR"
+PARAM_STRING="$(<./params.json)"
 
-python ../nt3_tc1_runner.py ./params.json ./ nt3
+export PYTHONPATH="$PWD/..:$NT3_DIR:$TC1_DIR:$COMMON_DIR"
+
+python ../nt3_tc1_runner.py "$PARAM_STRING" ./ nt3 keras foo bar
@@ -0,0 +1,17 @@
+set -eu
+
+CMD=$1
+EMEWS_PROJECT_ROOT=$2
+
+COMMON_DIR=$EMEWS_PROJECT_ROOT/../../../Benchmarks/common
+export PYTHONPATH="$COMMON_DIR"
+
+# "start" propose_points, max_iterations, ps, algorithm, exp_id, sys_env
+if [ $CMD == "start" ]
+  then
+    arg_array=("$EMEWS_PROJECT_ROOT/python/log_runner.py" "$1" "$3" "$4" "$5" "$6" "$7" "$8")
+    python "${arg_array[@]}"
+  else
+    arg_array=("$EMEWS_PROJECT_ROOT/python/log_runner.py" "$1" "$3")
+    python "${arg_array[@]}"
+fi
@@ -13,7 +13,7 @@ set -eu
 
 # !!! IF YOU CHANGE THE NUMBER OF ARGUMENTS PASSED TO THIS SCRIPT, YOU MUST
 # CHANGE THE TIMEOUT_ARG_INDEX !!!
-TIMEOUT_ARG_INDEX=6
+TIMEOUT_ARG_INDEX=8
 TIMEOUT=""
 if [[ $# ==  $TIMEOUT_ARG_INDEX ]]
 then
@@ -27,7 +27,8 @@ fi
 
 # Set param_line from the first argument to this script
 # param_line is the string containing the model parameters for a run.
-param_file=$1
+parameter_string="$1"
+echo $parameter_string
 
 # Set emews_root to the root directory of the project (i.e. the directory
 # that contains the scripts, swift, etc. directories and files)
@@ -40,18 +41,21 @@ cd $instance_directory
 
 model_name=$4
 framework=$5
+exp_id=$6
+run_id=$7
 
-BENCHMARK_DIR=$emews_root/../../../Benchmarks/Pilot1/NT3:$emews_root/../../../Benchmarks/Pilot1/TC1
+BENCHMARK_DIR=$emews_root/../../../Benchmarks/common:$emews_root/../../../Benchmarks/Pilot1/NT3:$emews_root/../../../Benchmarks/Pilot1/TC1
 COMMON_DIR=$emews_root/../common/python
 export PYTHONPATH="$PYTHONPATH:$BENCHMARK_DIR:$COMMON_DIR"
-MODEL_CMD="python $emews_root/python/nt3_tc1_runner.py $param_file $instance_directory $model_name $framework"
 
+arg_array=("$emews_root/python/nt3_tc1_runner.py" "$parameter_string" "$instance_directory" "$model_name" "$framework" "$exp_id" "$run_id")
+MODEL_CMD="python ${arg_array[@]}"
 # Turn bash error checking off. This is
 # required to properly handle the model execution return value
 # the optional timeout.
 set +e
 echo $MODEL_CMD
-$TIMEOUT_CMD $MODEL_CMD
+$TIMEOUT_CMD python "${arg_array[@]}"
 # $? is the exit status of the most recently executed command (i.e the
 # line above)
 RES=$?
 
@@ -0,0 +1,24 @@
+set -eu
+
+CMD=$1
+EMEWS_PROJECT_ROOT=$2
+
+export PYTHONHOME="/home/brettin/anaconda2/envs/vrane"
+PYTHON="$PYTHONHOME/bin/python"
+export LD_LIBRARY_PATH="$PYTHONHOME/lib"
+export PATH="$PYTHONHOME/bin:$PATH"
+
+COMMON=$emews_root/../../../Benchmarks/common
+PYTHONPATH="$PYTHONHOME/lib/python2.7:$COMMON"
+PYTHONPATH+="$PYTHONHOME/lib/python2.7/site-packages"
+export PYTHONPATH
+
+# "start" propose_points, max_iterations, ps, algorithm, exp_id, sys_env
+if [ $CMD == "start" ]
+  then
+    arg_array=("$EMEWS_PROJECT_ROOT/python/log_runner.py" "$1" "$3" "$4" "$5" "$6" "$7" "$8")
+    python "${arg_array[@]}"
+  else
+    arg_array=("$EMEWS_PROJECT_ROOT/python/log_runner.py" "$1" "$3")
+    python "${arg_array[@]}"
+fi