Skip to content

Commit 1e2c83e

Browse files
committed
Update runner and fix output logging
1 parent b6d09da commit 1e2c83e

File tree

4 files changed

+25
-70
lines changed

4 files changed

+25
-70
lines changed
Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import sys
22
import p1b1_runner
33
import json, os
4-
4+
import socket
55

66
if (len(sys.argv) < 3):
77
print('requires arg1=param and arg2=filename')
@@ -12,7 +12,7 @@
1212

1313
# print (parameterString)
1414
print ("filename is " + filename)
15-
15+
print (socket.gethostname())
1616

1717
integs = [int(x) for x in parameterString.split(',')]
1818
print (integs)
@@ -21,16 +21,25 @@
2121
hyper_parameter_map['framework'] = 'keras'
2222
hyper_parameter_map['batch_size'] = integs[1]
2323
hyper_parameter_map['dense'] = [integs[2], integs[3]]
24-
hyper_parameter_map['save'] = os.environ['TURBINE_OUTPUT']+ "/"+'./output'+parameterString
25-
24+
hyper_parameter_map['run_id'] = parameterString
25+
# hyper_parameter_map['instance_directory'] = os.environ['TURBINE_OUTPUT']
26+
hyper_parameter_map['save'] = os.environ['TURBINE_OUTPUT']+ "/output-"+os.environ['PMI_RANK']
27+
sys.argv = ['p1b1_runner']
2628
val_loss = p1b1_runner.run(hyper_parameter_map)
2729
print (val_loss)
30+
31+
sfn = os.environ['TURBINE_OUTPUT']+ "/output-"+os.environ['PMI_RANK'] + "/procname-" + parameterString
32+
with open(sfn, 'w') as sfile:
33+
sfile.write(socket.getfqdn())
34+
proc_id = "-"+ str(os.getpid())
35+
sfile.write(proc_id)
36+
2837
# works around this error:
2938
# https://github.com/tensorflow/tensorflow/issues/3388
3039
from keras import backend as K
3140
K.clear_session()
3241

33-
# writing the val loss to the output file
42+
# writing the val loss to the output file (result-*)
3443
with open(filename, 'w') as the_file:
3544
the_file.write(repr(val_loss))
3645

workflows/p1b1_random/python/p1b1_runner.py

Lines changed: 2 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -7,39 +7,7 @@
77
import json
88
import os
99
import p1b1
10-
import numpy as np
11-
12-
DATA_TYPES = {type(np.float16): 'f16', type(np.float32): 'f32', type(np.float64): 'f64'}
13-
14-
def write_params(params, hyper_parameter_map):
15-
parent_dir = hyper_parameter_map['instance_directory'] if 'instance_directory' in hyper_parameter_map else '.'
16-
f = "{}/parameters_p1b1.txt".format(parent_dir)
17-
with open(f, "w") as f_out:
18-
f_out.write("[parameters]\n")
19-
for k,v in params.items():
20-
if type(v) in DATA_TYPES:
21-
v = DATA_TYPES[type(v)]
22-
if isinstance(v, basestring):
23-
v = "'{}'".format(v)
24-
f_out.write("{}={}\n".format(k, v))
25-
26-
def is_numeric(val):
27-
try:
28-
float(val)
29-
return True
30-
except ValueError:
31-
return False
32-
33-
def format_params(hyper_parameter_map):
34-
for k,v in hyper_parameter_map.items():
35-
vals = str(v).split(" ")
36-
if len(vals) > 1 and is_numeric(vals[0]):
37-
# assume this should be a list
38-
if "." in vals[0]:
39-
hyper_parameter_map[k] = [float(x) for x in vals]
40-
else:
41-
hyper_parameter_map[k] = [int(x) for x in vals]
42-
10+
import runner_utils
4311

4412
def run(hyper_parameter_map):
4513
framework = hyper_parameter_map['framework']
@@ -56,17 +24,15 @@ def run(hyper_parameter_map):
5624
raise ValueError("Invalid framework: {}".format(framework))
5725

5826
# params is python dictionary
59-
sys.argv = ['fail here', '--epochs', '54321']
6027
params = pkg.initialize_parameters()
61-
format_params(hyper_parameter_map)
28+
runner_utils.format_params(hyper_parameter_map)
6229

6330
for k,v in hyper_parameter_map.items():
6431
#if not k in params:
6532
# raise Exception("Parameter '{}' not found in set of valid arguments".format(k))
6633
params[k] = v
6734

6835
print(params)
69-
write_params(params, hyper_parameter_map)
7036
history = pkg.run(params)
7137

7238
if framework is 'keras':
@@ -81,27 +47,3 @@ def run(hyper_parameter_map):
8147
# use the last validation_loss as the value to minimize
8248
val_loss = history.history['val_loss']
8349
return val_loss[-1]
84-
85-
def write_output(result, instance_directory):
86-
with open('{}/result.txt'.format(instance_directory), 'w') as f_out:
87-
f_out.write("{}\n".format(result))
88-
89-
def init(param_file, instance_directory):
90-
with open(param_file) as f_in:
91-
hyper_parameter_map = json.load(f_in)
92-
93-
hyper_parameter_map['framework'] = 'keras'
94-
hyper_parameter_map['save'] = '{}/output'.format(instance_directory)
95-
hyper_parameter_map['instance_directory'] = instance_directory
96-
97-
return hyper_parameter_map
98-
99-
if __name__ == '__main__':
100-
print('p1b1_runner main ' + str(argv))
101-
param_file = sys.argv[1]
102-
instance_directory = sys.argv[2]
103-
hyper_parameter_map = init(param_file, instance_directory)
104-
# clear sys.argv so that argparse doesn't object
105-
sys.argv = ['p1b1_runner']
106-
result = run(hyper_parameter_map)
107-
write_output(result, instance_directory)
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#! /usr/bin/env bash
22

3-
P1B1_DIR=../../../../Benchmarks/Pilot1/P1B1
4-
export PYTHONPATH="$PWD/..:$P1B1_DIR"
3+
P1B1_DIR=../../../../../Benchmarks/Pilot1/P1B1
4+
export PYTHONPATH="$PWD/..:$P1B1_DIR:../../../common/python"
55
echo $PYTHONPATH
66

77
python test_p1b1.py

workflows/p1b1_random/swift/run

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,16 @@ fi
1313
P1B1_DIR=../../../../Benchmarks/Pilot1/P1B1
1414
###
1515

16+
# Resident task workers and ranks
17+
export TURBINE_RESIDENT_WORK_WORKERS=1
18+
export RESIDENT_WORK_RANKS=$(( PROCS - 2 ))
19+
1620
THIS=$( cd $( dirname $0 ); /bin/pwd )
1721
export APP_HOME=$THIS
1822

1923
PROJECT_ROOT=$APP_HOME/..
2024

21-
export PYTHONPATH=$PYTHONPATH:$PROJECT_ROOT/python:$P1B1_DIR:$PYTHONPATH
25+
export PYTHONPATH=$PYTHONPATH:$PROJECT_ROOT/python:$P1B1_DIR:$PROJECT_ROOT/../common/python:$PYTHONPATH
2226

2327
export EXPID=$1
2428
export TURBINE_OUTPUT=$APP_HOME/../experiments/$EXPID
@@ -35,10 +39,10 @@ export TURBINE_JOBNAME="${EXPID}_job"
3539
echo $PYTHONPATH
3640

3741
### set the desired number of processors
38-
PROCS=3
42+
PROCS=8
3943
###
4044

4145
# remove -l option for removing printing processors ranks
4246
# settings.json file has all the parameter combinations to be tested
4347
echo swift-t -n $PROCS $APP_HOME/random-sweep.swift $*
44-
swift-t -l -n $PROCS $APP_HOME/random-sweep.swift $* --settings=$PWD/../data/settings.json
48+
swift-t -l -n $PROCS $APP_HOME/random-sweep.swift $* --settings=$PWD/../data/settings.json

0 commit comments

Comments
 (0)