Skip to content

Commit 6743c3f

Browse files
committed
o Add a general workflow that is capable of running any benchmark and perform a basic grid or random search.
1 parent 7393eb9 commit 6743c3f

21 files changed

+840
-0
lines changed

workflows/rnd_or_grid/README.md

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# Simple grid or random parameter sweep with Swift for all the benchmarks, using command line. D type , which runs a parameter sweep. It calls command-line programs as follows:
2+
- determineParameters.{sh,py}: Reads data/**settings.json** for sweep parameters, and return as a string for use by Swift program (sweep-parameters.txt)
3+
- evaluateOne.{sh,py}: Runs a single experiment. (Calls the specified benchmark).
4+
- computeStats.{sh,py}: Ingests data from all of the experiments and computes simple stats.
5+
6+
Usage: ./run <run directory> <benchmark name> <search type> ./run ex3_p1b1_grid p1b1 grid
7+
8+
Notes:
9+
**settings.json**: sweep parameters variation
10+
1. json file must be present in the data folder and named as: <benchmark name>_settings.json, samples files are available and must be modified as per needs.
11+
2. Run directory will be created in the experiments folder
12+
3. New variables can be introduced in the determineParameters.py and evaluateOne.py.
13+
4. Variations of parameters must be specified in data/*.json files
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
{
2+
"parameters":
3+
{
4+
"epochs": [1, 2 ],
5+
"batch_size": [10, 20],
6+
"classes": [2, 3]
7+
},
8+
"samples":
9+
{
10+
"num": [2]
11+
}
12+
}
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
{
2+
"parameters":
3+
{
4+
"epochs": [1, 2, 8],
5+
"batch_size": [20, 40],
6+
"N1": [1000, 2000],
7+
"NE": [500, 600]
8+
},
9+
"samples":
10+
{
11+
"num": [2]
12+
}
13+
}
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
{
2+
"parameters":
3+
{
4+
"epochs": [1, 2],
5+
"batch_size": [50, 60],
6+
"test_cell_split": [0.15, 0.25],
7+
"drop": [0.1, 0.15]
8+
},
9+
"samples":
10+
{
11+
"num": [2]
12+
}
13+
14+
}
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
{
2+
"parameters":
3+
{
4+
"epochs": [1, 2],
5+
"batch_size": [32, 40],
6+
"molecular_epochs": [1, 3],
7+
"weight_decay": [0.0005, 0.0006]
8+
},
9+
"samples":
10+
{
11+
"num": [2]
12+
}
13+
}
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
{
2+
"parameters":
3+
{
4+
"epochs": [1 , 2 ],
5+
"batch_size": [20, 40],
6+
"shared_nnet_spec": [1200, 1400],
7+
"n_fold": [1, 2]
8+
},
9+
"samples":
10+
{
11+
"num": [2]
12+
}
13+
}
14+
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
import sys
2+
from collections import defaultdict
3+
import json, os
4+
5+
def extractVals(A):
6+
B = defaultdict(dict)
7+
A1 = A.split()
8+
for n, val in zip(A1[0::2], A1[1::2]):
9+
B[n] = float(val)
10+
return(B)
11+
12+
def computeStats(swiftArrayAsString):
13+
A = extractVals(swiftArrayAsString)
14+
vals = []
15+
for a in A:
16+
vals += [A[a]]
17+
print('%d values, with min=%f, max=%f, avg=%f\n'%(len(vals),min(vals),max(vals),sum(vals)/float(len(vals))))
18+
19+
filename = os.environ['TURBINE_OUTPUT']+ "/final_stats.txt"
20+
# writing the val loss to the output file
21+
with open(filename, 'w') as the_file:
22+
the_file.write('%d values, with min=%f, max=%f, avg=%f\n'%(len(vals),min(vals),max(vals),sum(vals)/float(len(vals))))
23+
24+
25+
26+
if (len(sys.argv) < 2):
27+
print('requires arg=dataFilename')
28+
sys.exit(1)
29+
30+
dataFilename = sys.argv[1]
31+
32+
try:
33+
with open(dataFilename, 'r') as the_file:
34+
data = the_file.read()
35+
except IOError as e:
36+
print("Could not open: %s" % dataFilename)
37+
print("PWD is: '%s'" % os.getcwd())
38+
39+
computeStats(data)
40+
Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,149 @@
1+
import sys, json, os
2+
import random
3+
4+
# ===== Definitions =========================================================
5+
def expand(Vs, fr, to, soFar):
6+
soFarNew = []
7+
for s in soFar:
8+
print Vs[fr]
9+
if (Vs[fr] == None):
10+
print ("ERROR: The order of json inputs and values must be preserved")
11+
sys.exit(1)
12+
for v in Vs[fr]:
13+
if s == '':
14+
soFarNew += [str(v)]
15+
else:
16+
soFarNew += [s+','+str(v)]
17+
if fr==to:
18+
return(soFarNew)
19+
else:
20+
return expand(Vs, fr+1, to, soFarNew)
21+
22+
def generate_random(values, n_samples, benchmarkName):
23+
# select '#samples' random numbers between the range provided in settings.json file
24+
result = ""
25+
for s in range(samples[0]):
26+
if(benchmarkName=="p1b1"):
27+
# values = {1:epochs, 2: batch_size, 3: N1, 4: NE}
28+
t_epoch= random.randint(values[1][0], values[1][1])
29+
t_batch_size= random.randint(values[2][0], values[2][1])
30+
t_N1= random.randint(values[3][0], values[3][1])
31+
t_NE= random.randint(values[4][0], values[4][1])
32+
result+=str(t_epoch) + ',' + str(t_batch_size) + ',' + str(t_N1) + ',' + str(t_NE)
33+
elif(benchmarkName=="p1b3"):
34+
# values = {1:epochs, 2: batch_size, 3: test_cell_split, 4: drop}
35+
t_epoch= random.randint(values[1][0], values[1][1])
36+
t_batch_size= random.randint(values[2][0], values[2][1])
37+
t_tcs= random.uniform(values[3][0], values[3][1])
38+
t_drop= random.uniform(values[4][0], values[4][1])
39+
result+=str(t_epoch) + ',' + str(t_batch_size) + ',' + str(t_tcs) + ',' + str(t_drop)
40+
elif(benchmarkName=="nt3"):
41+
# values = {1:epochs, 2: batch_size, 3: classes}
42+
t_epoch= random.randint(values[1][0], values[1][1])
43+
t_batch_size= random.randint(values[2][0], values[2][1])
44+
t_classes= random.randint(values[3][0], values[3][1])
45+
result+=str(t_epoch) + ',' + str(t_batch_size) + ',' + str(t_classes)
46+
elif(benchmarkName=="p2b1"):
47+
# values = {1:epochs, 2: batch_size, 3: molecular_epochs, 4: weight_decay}
48+
t_epoch= random.randint(values[1][0], values[1][1])
49+
t_batch_size= random.randint(values[2][0], values[2][1])
50+
t_me= random.randint(values[3][0], values[3][1])
51+
t_wd= random.uniform(values[4][0], values[4][1])
52+
result+=str(t_epoch) + ',' + str(t_batch_size) + ',' + str(t_me) + ',' + str(t_wd)
53+
elif(benchmarkName=="p3b1"):
54+
# values = {1:epochs, 2: batch_size, 3: shared_nnet_spec, 4: n_fold}
55+
t_epoch= random.randint(values[1][0], values[1][1])
56+
t_batch_size= random.randint(values[2][0], values[2][1])
57+
t_sns= random.randint(values[3][0], values[3][1])
58+
t_nf= random.randint(values[4][0], values[4][1])
59+
result+=str(t_epoch) + ',' + str(t_batch_size) + ',' + str(t_sns) + ',' + str(t_nf)
60+
else:
61+
print('ERROR: Tried all possible benchmarks, Invalid benchmark name or json file')
62+
sys.exit(1)
63+
# Populate the result string for writing sweep-parameters file
64+
if(s < (samples[0]-1)):
65+
result+=":"
66+
return result
67+
68+
# ===== Main program ========================================================
69+
if (len(sys.argv) < 3):
70+
print('requires arg1=settingsFilename and arg2=paramsFilename')
71+
sys.exit(1)
72+
73+
settingsFilename = sys.argv[1]
74+
paramsFilename = sys.argv[2]
75+
benchmarkName = sys.argv[3]
76+
searchType = sys.argv[4]
77+
78+
#Trying to open the settings file
79+
print("Reading settings: %s" % settingsFilename)
80+
try:
81+
with open(settingsFilename) as fp:
82+
settings = json.load(fp)
83+
except IOError as e:
84+
print("Could not open: %s" % settingsFilename)
85+
print("PWD is: '%s'" % os.getcwd())
86+
sys.exit(1)
87+
88+
# Read in the variables from json file
89+
# Register new variables for any benchmark here
90+
#Common variables
91+
epochs = settings.get('parameters').get('epochs')
92+
batch_size = settings.get('parameters').get('batch_size')
93+
# P1B1
94+
N1 = settings.get('parameters').get('N1')
95+
NE = settings.get('parameters').get('NE')
96+
#NT3
97+
classes = settings.get('parameters').get('classes')
98+
#P2B1
99+
molecular_epochs = settings.get('parameters').get('molecular_epochs')
100+
weight_decay = settings.get('parameters').get('weight_decay')
101+
#P3B1
102+
shared_nnet_spec = settings.get('parameters').get('shared_nnet_spec')
103+
n_fold = settings.get('parameters').get('n_fold')
104+
#P1B3
105+
test_cell_split = settings.get('parameters').get('test_cell_split')
106+
drop = settings.get('parameters').get('drop')
107+
108+
# For random scheme determine number of samples
109+
samples = settings.get('samples', {}).get('num', None)
110+
111+
112+
# Make values for computing grid sweep parameters
113+
values = {}
114+
if(benchmarkName=="p1b1"):
115+
values = {1:epochs, 2: batch_size, 3: N1, 4: NE}
116+
print values
117+
elif(benchmarkName=="p1b3"):
118+
values = {1:epochs, 2: batch_size, 3: test_cell_split, 4: drop}
119+
print values
120+
elif(benchmarkName=="nt3"):
121+
values = {1:epochs, 2: batch_size, 3: classes}
122+
print values
123+
elif(benchmarkName=="p2b1"):
124+
values = {1:epochs, 2: batch_size, 3: molecular_epochs, 4: weight_decay}
125+
print values
126+
elif(benchmarkName=="p3b1"):
127+
values = {1:epochs, 2: batch_size, 3: shared_nnet_spec, 4: n_fold}
128+
print values
129+
else:
130+
print('ERROR: Tried all possible benchmarks, Invalid benchmark name or json file')
131+
sys.exit(1)
132+
133+
result = {}
134+
if(searchType == "grid"):
135+
results = expand(values, 1, len(values), [''])
136+
result = ':'.join(results)
137+
elif(searchType =="random"):
138+
if(samples == None):
139+
print ("ERROR: Provide number of samples in json file")
140+
sys.exit(1)
141+
result = generate_random(values, samples, benchmarkName)
142+
else:
143+
print ("ERROR: Invalid search type, specify either - grid or random")
144+
sys.exit(1)
145+
146+
147+
with open(paramsFilename, 'w') as the_file:
148+
the_file.write(result)
149+
Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
import sys
2+
import json, os
3+
import socket
4+
5+
6+
if (len(sys.argv) < 3):
7+
print('requires arg1=param and arg2=filename')
8+
sys.exit(1)
9+
10+
parameterString = sys.argv[1]
11+
filename = sys.argv[2]
12+
benchmarkName = sys.argv[3]
13+
14+
integs = [float(x) for x in parameterString.split(',')]
15+
print (integs)
16+
17+
if (benchmarkName == "p1b1"):
18+
import p1b1_runner
19+
hyper_parameter_map = {'epochs' : int(integs[0])}
20+
hyper_parameter_map['framework'] = 'keras'
21+
hyper_parameter_map['batch_size'] = int(integs[1])
22+
hyper_parameter_map['dense'] = [int(integs[2]), int(integs[3])]
23+
hyper_parameter_map['run_id'] = parameterString
24+
# hyper_parameter_map['instance_directory'] = os.environ['TURBINE_OUTPUT']
25+
hyper_parameter_map['save'] = os.environ['TURBINE_OUTPUT']+ "/output-"+str(os.getpid())
26+
sys.argv = ['p1b1_runner']
27+
val_loss = p1b1_runner.run(hyper_parameter_map)
28+
elif (benchmarkName == "p1b3"):
29+
import p1b3_runner
30+
hyper_parameter_map = {'epochs' : int(integs[0])}
31+
hyper_parameter_map['framework'] = 'keras'
32+
hyper_parameter_map['batch_size'] = int(integs[1])
33+
hyper_parameter_map['test_cell_split'] = int(integs[2])
34+
hyper_parameter_map['drop'] = int(integs[3])
35+
hyper_parameter_map['run_id'] = parameterString
36+
# hyper_parameter_map['instance_directory'] = os.environ['TURBINE_OUTPUT']
37+
hyper_parameter_map['save'] = os.environ['TURBINE_OUTPUT']+ "/output-"+str(os.getpid())
38+
sys.argv = ['p1b3_runner']
39+
val_loss = p1b3_runner.run(hyper_parameter_map)
40+
elif (benchmarkName == "p2b1"):
41+
import p2b1_runner
42+
hyper_parameter_map = {'epochs' : int(integs[0])}
43+
hyper_parameter_map['framework'] = 'keras'
44+
hyper_parameter_map['batch_size'] = int(integs[1])
45+
hyper_parameter_map['molecular_epochs'] = int(integs[2])
46+
hyper_parameter_map['weight_decay'] = integs[3]
47+
hyper_parameter_map['run_id'] = parameterString
48+
hyper_parameter_map['save'] = os.environ['TURBINE_OUTPUT']+ "/output-"+str(os.getpid())
49+
sys.argv = ['p2b1_runner']
50+
val_loss = p2b1_runner.run(hyper_parameter_map)
51+
elif (benchmarkName == "nt3"):
52+
import nt3_tc1_runner
53+
hyper_parameter_map = {'epochs' : int(integs[0])}
54+
hyper_parameter_map['framework'] = 'keras'
55+
hyper_parameter_map['batch_size'] = int(integs[1])
56+
hyper_parameter_map['classes'] = int(integs[2])
57+
hyper_parameter_map['model_name'] = 'nt3'
58+
hyper_parameter_map['save'] = os.environ['TURBINE_OUTPUT']+ "/output-"+str(os.getpid())
59+
sys.argv = ['nt3_runner']
60+
val_loss = nt3_tc1_runner.run(hyper_parameter_map)
61+
elif (benchmarkName == "p3b1"):
62+
import p3b1_runner
63+
hyper_parameter_map = {'epochs' : int(integs[0])}
64+
hyper_parameter_map['framework'] = 'keras'
65+
hyper_parameter_map['batch_size'] = int(integs[1])
66+
hyper_parameter_map['shared_nnet_spec'] = int(integs[2])
67+
hyper_parameter_map['n_fold'] = int(integs[3])
68+
hyper_parameter_map['run_id'] = parameterString
69+
# hyper_parameter_map['instance_directory'] = os.environ['TURBINE_OUTPUT']
70+
hyper_parameter_map['save'] = os.environ['TURBINE_OUTPUT']+ "/output-"+str(os.getpid())
71+
sys.argv = ['p3b1_runner']
72+
val_loss = p3b1_runner.run(hyper_parameter_map)
73+
74+
# print (parameterString)
75+
# print ("filename is " + filename)
76+
# print (str(os.getpid()))
77+
print (val_loss)
78+
79+
# sfn = os.environ['TURBINE_OUTPUT']+ "/output-"+str(os.getpid()) + "/procname-" + parameterString
80+
# with open(sfn, 'w') as sfile:
81+
# sfile.write(socket.getfqdn())
82+
# proc_id = "-"+ str(os.getpid())
83+
# sfile.write(proc_id)
84+
85+
# works around this error:
86+
# https://github.com/tensorflow/tensorflow/issues/3388
87+
from keras import backend as K
88+
K.clear_session()
89+
90+
# writing the val loss to the output file (result-*)
91+
with open(filename, 'w') as the_file:
92+
the_file.write(repr(val_loss))
93+

0 commit comments

Comments
 (0)