Skip to content

Commit 9e07404

Browse files
committed
Add runner changes to the grid workflow
1 parent 1e2c83e commit 9e07404

File tree

4 files changed

+27
-72
lines changed

4 files changed

+27
-72
lines changed
Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
{
22
"parameters":
33
{
4-
"epochs": [1],
5-
"batch_size": [30],
6-
"N1": [1000],
7-
"NE": [400]
4+
"epochs": [2, 4, 8 ],
5+
"batch_size": [20, 40],
6+
"N1": [1000, 2000],
7+
"NE": [500]
88
}
99
}
Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import sys
22
import p1b1_runner
33
import json, os
4-
4+
import socket
55

66
if (len(sys.argv) < 3):
77
print('requires arg1=param and arg2=filename')
@@ -12,7 +12,7 @@
1212

1313
# print (parameterString)
1414
print ("filename is " + filename)
15-
15+
print (socket.gethostname())
1616

1717
integs = [int(x) for x in parameterString.split(',')]
1818
print (integs)
@@ -21,16 +21,25 @@
2121
hyper_parameter_map['framework'] = 'keras'
2222
hyper_parameter_map['batch_size'] = integs[1]
2323
hyper_parameter_map['dense'] = [integs[2], integs[3]]
24-
hyper_parameter_map['save'] = os.environ['TURBINE_OUTPUT']+ "/"+'./output'+parameterString
25-
24+
hyper_parameter_map['run_id'] = parameterString
25+
# hyper_parameter_map['instance_directory'] = os.environ['TURBINE_OUTPUT']
26+
hyper_parameter_map['save'] = os.environ['TURBINE_OUTPUT']+ "/output-"+os.environ['PMI_RANK']
27+
sys.argv = ['p1b1_runner']
2628
val_loss = p1b1_runner.run(hyper_parameter_map)
2729
print (val_loss)
30+
31+
sfn = os.environ['TURBINE_OUTPUT']+ "/output-"+os.environ['PMI_RANK'] + "/procname-" + parameterString
32+
with open(sfn, 'w') as sfile:
33+
sfile.write(socket.getfqdn())
34+
proc_id = "-"+ str(os.getpid())
35+
sfile.write(proc_id)
36+
2837
# works around this error:
2938
# https://github.com/tensorflow/tensorflow/issues/3388
3039
from keras import backend as K
3140
K.clear_session()
3241

33-
# writing the val loss to the output file
42+
# writing the val loss to the output file (result-*)
3443
with open(filename, 'w') as the_file:
3544
the_file.write(repr(val_loss))
3645

workflows/p1b1_grid/python/p1b1_runner.py

Lines changed: 2 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -7,39 +7,7 @@
77
import json
88
import os
99
import p1b1
10-
import numpy as np
11-
12-
DATA_TYPES = {type(np.float16): 'f16', type(np.float32): 'f32', type(np.float64): 'f64'}
13-
14-
def write_params(params, hyper_parameter_map):
15-
parent_dir = hyper_parameter_map['instance_directory'] if 'instance_directory' in hyper_parameter_map else '.'
16-
f = "{}/parameters_p1b1.txt".format(parent_dir)
17-
with open(f, "w") as f_out:
18-
f_out.write("[parameters]\n")
19-
for k,v in params.items():
20-
if type(v) in DATA_TYPES:
21-
v = DATA_TYPES[type(v)]
22-
if isinstance(v, basestring):
23-
v = "'{}'".format(v)
24-
f_out.write("{}={}\n".format(k, v))
25-
26-
def is_numeric(val):
27-
try:
28-
float(val)
29-
return True
30-
except ValueError:
31-
return False
32-
33-
def format_params(hyper_parameter_map):
34-
for k,v in hyper_parameter_map.items():
35-
vals = str(v).split(" ")
36-
if len(vals) > 1 and is_numeric(vals[0]):
37-
# assume this should be a list
38-
if "." in vals[0]:
39-
hyper_parameter_map[k] = [float(x) for x in vals]
40-
else:
41-
hyper_parameter_map[k] = [int(x) for x in vals]
42-
10+
import runner_utils
4311

4412
def run(hyper_parameter_map):
4513
framework = hyper_parameter_map['framework']
@@ -56,17 +24,15 @@ def run(hyper_parameter_map):
5624
raise ValueError("Invalid framework: {}".format(framework))
5725

5826
# params is python dictionary
59-
sys.argv = ['fail here', '--epochs', '54321']
6027
params = pkg.initialize_parameters()
61-
format_params(hyper_parameter_map)
28+
runner_utils.format_params(hyper_parameter_map)
6229

6330
for k,v in hyper_parameter_map.items():
6431
#if not k in params:
6532
# raise Exception("Parameter '{}' not found in set of valid arguments".format(k))
6633
params[k] = v
6734

6835
print(params)
69-
write_params(params, hyper_parameter_map)
7036
history = pkg.run(params)
7137

7238
if framework is 'keras':
@@ -81,27 +47,3 @@ def run(hyper_parameter_map):
8147
# use the last validation_loss as the value to minimize
8248
val_loss = history.history['val_loss']
8349
return val_loss[-1]
84-
85-
def write_output(result, instance_directory):
86-
with open('{}/result.txt'.format(instance_directory), 'w') as f_out:
87-
f_out.write("{}\n".format(result))
88-
89-
def init(param_file, instance_directory):
90-
with open(param_file) as f_in:
91-
hyper_parameter_map = json.load(f_in)
92-
93-
hyper_parameter_map['framework'] = 'keras'
94-
hyper_parameter_map['save'] = '{}/output'.format(instance_directory)
95-
hyper_parameter_map['instance_directory'] = instance_directory
96-
97-
return hyper_parameter_map
98-
99-
if __name__ == '__main__':
100-
print('p1b1_runner main ' + str(argv))
101-
param_file = sys.argv[1]
102-
instance_directory = sys.argv[2]
103-
hyper_parameter_map = init(param_file, instance_directory)
104-
# clear sys.argv so that argparse doesn't object
105-
sys.argv = ['p1b1_runner']
106-
result = run(hyper_parameter_map)
107-
write_output(result, instance_directory)

workflows/p1b1_grid/swift/run

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ export APP_HOME=$THIS
1818

1919
PROJECT_ROOT=$APP_HOME/..
2020

21-
export PYTHONPATH=$PYTHONPATH:$PROJECT_ROOT/python:$P1B1_DIR:$PYTHONPATH
21+
export PYTHONPATH=$PYTHONPATH:$PROJECT_ROOT/python:$P1B1_DIR:$PROJECT_ROOT/../common/python:$PYTHONPATH
2222

2323
export EXPID=$1
2424
export TURBINE_OUTPUT=$APP_HOME/../experiments/$EXPID
@@ -29,13 +29,17 @@ export TURBINE_OUTPUT=$APP_HOME/../experiments/$EXPID
2929
# be ignored if MACHINE flag (see below) is not set
3030
export QUEUE=batch
3131
export WALLTIME=00:10:00
32-
export PPN=16
32+
export PPN=1
3333
export TURBINE_JOBNAME="${EXPID}_job"
3434

35+
# Resident task workers and ranks
36+
export TURBINE_RESIDENT_WORK_WORKERS=1
37+
export RESIDENT_WORK_RANKS=$(( PROCS - 2 ))
38+
3539
echo $PYTHONPATH
3640

3741
### set the desired number of processors
38-
PROCS=2
42+
PROCS=3
3943
###
4044

4145
# remove -l option for removing printing processors ranks

0 commit comments

Comments
 (0)