Skip to content

Commit 322cac3

Browse files
authored
Merge pull request #16 from ECP-CANDLE/rajeeja/random_sweeps_p1b1
o Adding p1b1_random works with Benchmark(frameworks branch)
2 parents d2ae8f1 + 647a296 commit 322cac3

14 files changed

+492
-0
lines changed

workflows/p1b1_random/README.md

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
# Simple parameter sweep with Swift -> parameters randomly chosen between specified bounds.
2+
The main program (random-sweep.swift) calls a few app functions as follows:
3+
- determineParameters.{sh,py}: Read data/ **settings.json** for sweep parameters, and return as a string for use by Swift program
4+
- evaluateOne.{sh,py}: Runs a single experiment. (Calls p1b1_runner).
5+
- computeStats.{sh,py}: Ingests data from all of the experiments and computes simple stats.
6+
7+
Usage: ./run experient_1
8+
9+
Notes:
10+
- **settings.json**:
11+
A. parameters (benchmark parameters)
12+
=====================================
13+
1: epochs
14+
2. batch_size
15+
3. N1
16+
4. NE
17+
18+
B. samples (specifies the number of random samples to prepare)
19+
===============================================================
20+
1. num
21+
22+
For adding new parameters:
23+
1. Add to the json file the desired parameters
24+
2. Read params in determineParameters.py: def loadSettings(settingsFilename):
25+
3. Modify the evaluateOne.py file (set to run on keras framework now)
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
{
2+
"parameters":
3+
{
4+
"epochs": [4, 8],
5+
"batch_size": [30, 40],
6+
"N1": [1000, 2000],
7+
"NE": [400, 600]
8+
},
9+
"samples":
10+
{
11+
"num": [4]
12+
}
13+
}
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
import sys
2+
from collections import defaultdict
3+
import json, os
4+
5+
def extractVals(A):
6+
B = defaultdict(dict)
7+
A1 = A.split()
8+
for n, val in zip(A1[0::2], A1[1::2]):
9+
B[n] = float(val)
10+
return(B)
11+
12+
def computeStats(swiftArrayAsString):
13+
A = extractVals(swiftArrayAsString)
14+
vals = []
15+
for a in A:
16+
vals += [A[a]]
17+
print('%d values, with min=%f, max=%f, avg=%f\n'%(len(vals),min(vals),max(vals),sum(vals)/float(len(vals))))
18+
19+
20+
if (len(sys.argv) < 2):
21+
print('requires arg=dataFilename')
22+
sys.exit(1)
23+
24+
dataFilename = sys.argv[1]
25+
26+
try:
27+
with open(dataFilename, 'r') as the_file:
28+
data = the_file.read()
29+
except IOError as e:
30+
print("Could not open: %s" % dataFilename)
31+
print("PWD is: '%s'" % os.getcwd())
32+
33+
computeStats(data)
34+
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
import sys, json, os
2+
from random import randint
3+
4+
# ===== Definitions =========================================================
5+
6+
def loadSettings(settingsFilename):
7+
print("Reading settings: %s" % settingsFilename)
8+
try:
9+
with open(settingsFilename) as fp:
10+
settings = json.load(fp)
11+
except IOError as e:
12+
print("Could not open: %s" % settingsFilename)
13+
print("PWD is: '%s'" % os.getcwd())
14+
sys.exit(1)
15+
try:
16+
epochs = settings['parameters']["epochs"]
17+
batch_size = settings['parameters']["batch_size"]
18+
N1 = settings['parameters']["N1"]
19+
NE = settings['parameters']["NE"]
20+
21+
except KeyError as e:
22+
print("Settings file (%s) does not contain key: %s" % (settingsFilename, str(e)))
23+
sys.exit(1)
24+
try:
25+
samples = settings['samples']["num"]
26+
except KeyError as e:
27+
print("Settings file (%s) does not contain key: %s" % (settingsFilename, str(e)))
28+
sys.exit(1)
29+
return(epochs, batch_size, N1, NE, samples)
30+
31+
# ===== Main program ========================================================
32+
33+
if (len(sys.argv) < 3):
34+
print('requires arg1=settingsFilename and arg2=paramsFilename')
35+
sys.exit(1)
36+
37+
settingsFilename = sys.argv[1]
38+
paramsFilename = sys.argv[2]
39+
40+
print (settingsFilename)
41+
print (paramsFilename)
42+
43+
epochs, batch_size, N1, NE, samples = loadSettings(settingsFilename)
44+
result=""
45+
46+
for s in range(samples[0]):
47+
t_epoch= randint(epochs[0], epochs[1])
48+
t_batch_size= randint(batch_size[0], batch_size[1])
49+
t_N1= randint(N1[0], N1[1])
50+
t_NE= randint(NE[0], NE[1])
51+
result+=str(t_epoch) + ',' + str(t_batch_size) + ',' + str(t_N1) + ',' + str(t_NE)
52+
if(s < (samples[0]-1)):
53+
result+=":"
54+
55+
with open(paramsFilename, 'w') as the_file:
56+
the_file.write(result)
57+
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
import sys
2+
import p1b1_runner
3+
import json
4+
5+
6+
if (len(sys.argv) < 3):
7+
print('requires arg1=param and arg2=filename')
8+
sys.exit(1)
9+
10+
parameterString = sys.argv[1]
11+
filename = sys.argv[2]
12+
13+
# print (parameterString)
14+
print ("filename is " + filename)
15+
16+
17+
integs = [int(x) for x in parameterString.split(',')]
18+
print (integs)
19+
20+
hyper_parameter_map = {'epochs' : integs[0]}
21+
hyper_parameter_map['framework'] = 'keras'
22+
hyper_parameter_map['batch_size'] = integs[1]
23+
hyper_parameter_map['dense'] = [integs[2], integs[3]]
24+
hyper_parameter_map['save'] = './output'
25+
26+
val_loss = p1b1_runner.run(hyper_parameter_map)
27+
print (val_loss)
28+
# works around this error:
29+
# https://github.com/tensorflow/tensorflow/issues/3388
30+
from keras import backend as K
31+
K.clear_session()
32+
33+
# writing the val loss to the output file
34+
with open(filename, 'w') as the_file:
35+
the_file.write(repr(val_loss))
36+
Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
# tensoflow.__init__ calls _os.path.basename(_sys.argv[0])
2+
# so we need to create a synthetic argv.
3+
import sys
4+
if not hasattr(sys, 'argv'):
5+
sys.argv = ['p1b1']
6+
7+
import json
8+
import os
9+
import p1b1
10+
import numpy as np
11+
12+
DATA_TYPES = {type(np.float16): 'f16', type(np.float32): 'f32', type(np.float64): 'f64'}
13+
14+
def write_params(params, hyper_parameter_map):
15+
parent_dir = hyper_parameter_map['instance_directory'] if 'instance_directory' in hyper_parameter_map else '.'
16+
f = "{}/parameters_p1b1.txt".format(parent_dir)
17+
with open(f, "w") as f_out:
18+
f_out.write("[parameters]\n")
19+
for k,v in params.items():
20+
if type(v) in DATA_TYPES:
21+
v = DATA_TYPES[type(v)]
22+
if isinstance(v, basestring):
23+
v = "'{}'".format(v)
24+
f_out.write("{}={}\n".format(k, v))
25+
26+
def is_numeric(val):
27+
try:
28+
float(val)
29+
return True
30+
except ValueError:
31+
return False
32+
33+
def format_params(hyper_parameter_map):
34+
for k,v in hyper_parameter_map.items():
35+
vals = str(v).split(" ")
36+
if len(vals) > 1 and is_numeric(vals[0]):
37+
# assume this should be a list
38+
if "." in vals[0]:
39+
hyper_parameter_map[k] = [float(x) for x in vals]
40+
else:
41+
hyper_parameter_map[k] = [int(x) for x in vals]
42+
43+
44+
def run(hyper_parameter_map):
45+
framework = hyper_parameter_map['framework']
46+
if framework is 'keras':
47+
import p1b1_baseline_keras2
48+
pkg = p1b1_baseline_keras2
49+
elif framework is 'mxnet':
50+
import p1b1_baseline_mxnet
51+
pkg = p1b1_baseline_mxnet
52+
elif framework is 'neon':
53+
import p1b1_baseline_neon
54+
pkg = p1b1_baseline_neon
55+
else:
56+
raise ValueError("Invalid framework: {}".format(framework))
57+
58+
# params is python dictionary
59+
sys.argv = ['fail here', '--epochs', '54321']
60+
params = pkg.initialize_parameters()
61+
format_params(hyper_parameter_map)
62+
63+
for k,v in hyper_parameter_map.items():
64+
#if not k in params:
65+
# raise Exception("Parameter '{}' not found in set of valid arguments".format(k))
66+
params[k] = v
67+
68+
print(params)
69+
write_params(params, hyper_parameter_map)
70+
history = pkg.run(params)
71+
72+
if framework is 'keras':
73+
# works around this error:
74+
# https://github.com/tensorflow/tensorflow/issues/3388
75+
try:
76+
from keras import backend as K
77+
K.clear_session()
78+
except AttributeError: # theano does not have this function
79+
pass
80+
81+
# use the last validation_loss as the value to minimize
82+
val_loss = history.history['val_loss']
83+
return val_loss[-1]
84+
85+
def write_output(result, instance_directory):
86+
with open('{}/result.txt'.format(instance_directory), 'w') as f_out:
87+
f_out.write("{}\n".format(result))
88+
89+
def init(param_file, instance_directory):
90+
with open(param_file) as f_in:
91+
hyper_parameter_map = json.load(f_in)
92+
93+
hyper_parameter_map['framework'] = 'keras'
94+
hyper_parameter_map['save'] = '{}/output'.format(instance_directory)
95+
hyper_parameter_map['instance_directory'] = instance_directory
96+
97+
return hyper_parameter_map
98+
99+
if __name__ == '__main__':
100+
print('p1b1_runner main ' + str(argv))
101+
param_file = sys.argv[1]
102+
instance_directory = sys.argv[2]
103+
hyper_parameter_map = init(param_file, instance_directory)
104+
# clear sys.argv so that argparse doesn't object
105+
sys.argv = ['p1b1_runner']
106+
result = run(hyper_parameter_map)
107+
write_output(result, instance_directory)
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
#! /usr/bin/env bash
2+
3+
P1B1_DIR=../../../../Benchmarks/Pilot1/P1B1
4+
export PYTHONPATH="$PWD/..:$P1B1_DIR"
5+
echo $PYTHONPATH
6+
7+
python test_p1b1.py
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
import p1b1_runner
2+
3+
def main():
4+
5+
hyper_parameter_map = {'epochs' : 1}
6+
hyper_parameter_map['batch_size'] = 40
7+
hyper_parameter_map['dense'] = [1900, 500]
8+
hyper_parameter_map['framework'] = 'keras'
9+
hyper_parameter_map['save'] = './p1bl1_output'
10+
11+
validation_loss = p1b1_runner.run(hyper_parameter_map)
12+
print("Validation Loss: ", validation_loss)
13+
if __name__ == '__main__':
14+
main()
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
#!/bin/bash
2+
python $APP_HOME/../python/computeStats.py $1
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
#!/bin/bash
2+
#
3+
# Usage: ./run
4+
#
5+
6+
if [ "$#" -ne 1 ]; then
7+
script_name=$(basename $0)
8+
echo "Usage: ${script_name} EXPERIMENT_ID (e.g. ${script_name} experiment_1)"
9+
exit 1
10+
fi
11+
12+
#### set this variable to your P1B1 benchmark directory (frameworks branch)
13+
P1B1_DIR=../../../../Benchmarks/Pilot1/P1B1
14+
###
15+
16+
THIS=$( cd $( dirname $0 ); /bin/pwd )
17+
export APP_HOME=$THIS
18+
19+
PROJECT_ROOT=$APP_HOME/..
20+
21+
22+
export EXPID=$1
23+
export TURBINE_OUTPUT=$APP_HOME/../experiments/$EXPID
24+
25+
26+
# TODO edit QUEUE, WALLTIME, PPN, AND TURNBINE_JOBNAME
27+
# as required. Note that QUEUE, WALLTIME, PPN, AND TURNBINE_JOBNAME will
28+
# be ignored if MACHINE flag (see below) is not set
29+
export QUEUE=default
30+
export WALLTIME=00:45:00
31+
export PPN=2
32+
export TURBINE_JOBNAME="${EXPID}_job"
33+
# export PROJECT=UrbanExP
34+
35+
36+
# PYTHONPATH
37+
PYTHON_ROOT=/soft/analytics/conda/env/Candle_ML
38+
39+
PATH=$PYTHON_ROOT/bin:$PATH
40+
41+
which python
42+
43+
PP=
44+
PP+=$PYTHON_ROOT/lib/python2.7/site-packages:
45+
PP+=$PYTHON_ROOT/lib/python2.7:
46+
PP+=$P1B1_DIR:
47+
PP+=$PROJECT_ROOT/python
48+
49+
# PYTHONHOME
50+
PH=/soft/analytics/conda/env/Candle_ML
51+
52+
#ENVS="-e APP_HOME=$APP_HOME -e PROJECT_ROOT=$PROJECT_ROOT -e PYTHONHOME=$PH -e PYTHONPATH=$PP -e TURBINE_RESIDENT_WORK_WORKERS=1 -e RESIDENT_WORK_RANKS=$(( PROCS - 2 )) -e TURBINE_OUTPUT=$TURBINE_OUTPUT"
53+
54+
ENVS="-e PYTHONHOME=$PH -e PYTHONPATH=$PP -e TURBINE_RESIDENT_WORK_WORKERS=1 -e RESIDENT_WORK_RANKS=$(( PROCS - 2 )) -e PROJECT_ROOT=$PROJECT_ROOT -e APP_HOME=$APP_HOME -e TURBINE_OUTPUT=$TURBINE_OUTPUT"
55+
56+
export MODE=cluster
57+
### set the desired number of processors
58+
PROCS=2
59+
###
60+
61+
62+
# set machine to your schedule type (e.g. pbs, slurm, cobalt etc.),
63+
# or empty for an immediate non-queued unscheduled run
64+
# MACHINE="cobalt"
65+
66+
if [ -n "$MACHINE" ]; then
67+
MACHINE="-m $MACHINE"
68+
fi
69+
70+
71+
72+
73+
# remove -l option for removing printing processors ranks
74+
# settings.json file has all the parameter combinations to be tested
75+
set -x
76+
export TURBINE_LOG=1
77+
echo swift-t -l -n $PROCS $MACHINE -p $ENVS $APP_HOME/random-sweep.swift $* --settings=$PWD/../data/settings.json
78+
swift-t -l -n $PROCS $MACHINE -p $ENVS $APP_HOME/random-sweep.swift $* --settings=$PWD/../data/settings.json
79+

0 commit comments

Comments
 (0)