11from __future__ import print_function
22
3+ import logging
34import os
45import sys
5- import logging
66
7- import pandas as pd
87import numpy as np
8+ import pandas as pd
99
1010file_path = os .path .dirname (os .path .realpath (__file__ ))
1111
1515candle .set_parallelism_threads ()
1616
1717additional_definitions = [
18- {'name' : 'latent_dim' ,
19- 'action' : 'store' ,
20- 'type' : int ,
21- 'help' : 'latent dimensions' },
22- {'name' : 'residual' ,
23- 'type' : candle .str2bool ,
24- 'default' : False ,
25- 'help' : 'add skip connections to the layers' },
26- {'name' : 'reduce_lr' ,
27- 'type' : candle .str2bool ,
28- 'default' : False ,
29- 'help' : 'reduce learning rate on plateau' },
30- {'name' : 'warmup_lr' ,
31- 'type' : candle .str2bool ,
32- 'default' : False ,
33- 'help' : 'gradually increase learning rate on start' },
34- {'name' : 'base_lr' ,
35- 'type' : float ,
36- 'help' : 'base learning rate' },
37- {'name' : 'epsilon_std' ,
38- 'type' : float ,
39- 'help' : 'epsilon std for sampling latent noise' },
40- {'name' : 'use_cp' ,
41- 'type' : candle .str2bool ,
42- 'default' : False ,
43- 'help' : 'checkpoint models with best val_loss' },
44- {'name' : 'use_tb' ,
45- 'type' : candle .str2bool ,
46- 'default' : False ,
47- 'help' : 'use tensorboard' },
48- {'name' : 'tsne' ,
49- 'type' : candle .str2bool ,
50- 'default' : False ,
51- 'help' : 'generate tsne plot of the latent representation' }
18+ {"name" : "latent_dim" , "action" : "store" , "type" : int , "help" : "latent dimensions" },
19+ {
20+ "name" : "residual" ,
21+ "type" : candle .str2bool ,
22+ "default" : False ,
23+ "help" : "add skip connections to the layers" ,
24+ },
25+ {
26+ "name" : "reduce_lr" ,
27+ "type" : candle .str2bool ,
28+ "default" : False ,
29+ "help" : "reduce learning rate on plateau" ,
30+ },
31+ {
32+ "name" : "warmup_lr" ,
33+ "type" : candle .str2bool ,
34+ "default" : False ,
35+ "help" : "gradually increase learning rate on start" ,
36+ },
37+ {"name" : "base_lr" , "type" : float , "help" : "base learning rate" },
38+ {
39+ "name" : "epsilon_std" ,
40+ "type" : float ,
41+ "help" : "epsilon std for sampling latent noise" ,
42+ },
43+ {
44+ "name" : "use_cp" ,
45+ "type" : candle .str2bool ,
46+ "default" : False ,
47+ "help" : "checkpoint models with best val_loss" ,
48+ },
49+ {
50+ "name" : "use_tb" ,
51+ "type" : candle .str2bool ,
52+ "default" : False ,
53+ "help" : "use tensorboard" ,
54+ },
55+ {
56+ "name" : "tsne" ,
57+ "type" : candle .str2bool ,
58+ "default" : False ,
59+ "help" : "generate tsne plot of the latent representation" ,
60+ },
5261]
5362
5463required = [
55- ' activation' ,
56- ' batch_size' ,
57- ' dense' ,
58- ' dropout' ,
59- ' epochs' ,
60- ' initialization' ,
61- ' learning_rate' ,
62- ' loss' ,
63- ' optimizer' ,
64- ' rng_seed' ,
65- ' scaling' ,
66- ' val_split' ,
67- ' latent_dim' ,
68- ' batch_normalization' ,
69- ' epsilon_std' ,
70- ' timeout'
64+ " activation" ,
65+ " batch_size" ,
66+ " dense" ,
67+ " dropout" ,
68+ " epochs" ,
69+ " initialization" ,
70+ " learning_rate" ,
71+ " loss" ,
72+ " optimizer" ,
73+ " rng_seed" ,
74+ " scaling" ,
75+ " val_split" ,
76+ " latent_dim" ,
77+ " batch_normalization" ,
78+ " epsilon_std" ,
79+ " timeout" ,
7180]
7281
7382
7483class BenchmarkAttn (candle .Benchmark ):
75-
7684 def set_locals (self ):
7785 """Functionality to set variables specific for the benchmark
7886 - required: set of required parameters for the benchmark.
@@ -86,63 +94,65 @@ def set_locals(self):
8694 self .additional_definitions = additional_definitions
8795
8896
89- def extension_from_parameters (params , framework = '' ):
97+ def extension_from_parameters (params , framework = "" ):
9098 """Construct string for saving model with annotation of parameters"""
9199 ext = framework
92- for i , n in enumerate (params [' dense' ]):
100+ for i , n in enumerate (params [" dense" ]):
93101 if n :
94- ext += ' .D{}={}' .format (i + 1 , n )
95- ext += ' .A={}' .format (params [' activation' ][0 ])
96- ext += ' .B={}' .format (params [' batch_size' ])
97- ext += ' .E={}' .format (params [' epochs' ])
98- ext += ' .L={}' .format (params [' latent_dim' ])
99- ext += ' .LR={}' .format (params [' learning_rate' ])
100- ext += ' .S={}' .format (params [' scaling' ])
101-
102- if params [' epsilon_std' ] != 1.0 :
103- ext += ' .EPS={}' .format (params [' epsilon_std' ])
104- if params [' dropout' ]:
105- ext += ' .DR={}' .format (params [' dropout' ])
106- if params [' batch_normalization' ]:
107- ext += ' .BN'
108- if params [' warmup_lr' ]:
109- ext += ' .WU_LR'
110- if params [' reduce_lr' ]:
111- ext += ' .Re_LR'
112- if params [' residual' ]:
113- ext += ' .Res'
102+ ext += " .D{}={}" .format (i + 1 , n )
103+ ext += " .A={}" .format (params [" activation" ][0 ])
104+ ext += " .B={}" .format (params [" batch_size" ])
105+ ext += " .E={}" .format (params [" epochs" ])
106+ ext += " .L={}" .format (params [" latent_dim" ])
107+ ext += " .LR={}" .format (params [" learning_rate" ])
108+ ext += " .S={}" .format (params [" scaling" ])
109+
110+ if params [" epsilon_std" ] != 1.0 :
111+ ext += " .EPS={}" .format (params [" epsilon_std" ])
112+ if params [" dropout" ]:
113+ ext += " .DR={}" .format (params [" dropout" ])
114+ if params [" batch_normalization" ]:
115+ ext += " .BN"
116+ if params [" warmup_lr" ]:
117+ ext += " .WU_LR"
118+ if params [" reduce_lr" ]:
119+ ext += " .Re_LR"
120+ if params [" residual" ]:
121+ ext += " .Res"
114122
115123 return ext
116124
117125
118126def load_data (params , seed ):
119127
120128 # start change #
121- if params [' train_data' ].endswith ('h5' ) or params [' train_data' ].endswith (' hdf5' ):
122- print (' processing h5 in file {}' .format (params [' train_data' ]))
129+ if params [" train_data" ].endswith ("h5" ) or params [" train_data" ].endswith (" hdf5" ):
130+ print (" processing h5 in file {}" .format (params [" train_data" ]))
123131
124- url = params ['data_url' ]
125- file_train = params ['train_data' ]
126- train_file = candle .get_file (file_train , url + file_train , cache_subdir = 'Pilot1' )
132+ url = params ["data_url" ]
133+ file_train = params ["train_data" ]
134+ train_file = candle .get_file (
135+ file_train , url + file_train , cache_subdir = "Pilot1"
136+ )
127137
128- df_x_train_0 = pd .read_hdf (train_file , ' x_train_0' ).astype (np .float32 )
129- df_x_train_1 = pd .read_hdf (train_file , ' x_train_1' ).astype (np .float32 )
138+ df_x_train_0 = pd .read_hdf (train_file , " x_train_0" ).astype (np .float32 )
139+ df_x_train_1 = pd .read_hdf (train_file , " x_train_1" ).astype (np .float32 )
130140 X_train = pd .concat ([df_x_train_0 , df_x_train_1 ], axis = 1 , sort = False )
131141 del df_x_train_0 , df_x_train_1
132142
133- df_x_test_0 = pd .read_hdf (train_file , ' x_test_0' ).astype (np .float32 )
134- df_x_test_1 = pd .read_hdf (train_file , ' x_test_1' ).astype (np .float32 )
143+ df_x_test_0 = pd .read_hdf (train_file , " x_test_0" ).astype (np .float32 )
144+ df_x_test_1 = pd .read_hdf (train_file , " x_test_1" ).astype (np .float32 )
135145 X_test = pd .concat ([df_x_test_0 , df_x_test_1 ], axis = 1 , sort = False )
136146 del df_x_test_0 , df_x_test_1
137147
138- df_x_val_0 = pd .read_hdf (train_file , ' x_val_0' ).astype (np .float32 )
139- df_x_val_1 = pd .read_hdf (train_file , ' x_val_1' ).astype (np .float32 )
148+ df_x_val_0 = pd .read_hdf (train_file , " x_val_0" ).astype (np .float32 )
149+ df_x_val_1 = pd .read_hdf (train_file , " x_val_1" ).astype (np .float32 )
140150 X_val = pd .concat ([df_x_val_0 , df_x_val_1 ], axis = 1 , sort = False )
141151 del df_x_val_0 , df_x_val_1
142152
143- Y_train = pd .read_hdf (train_file , ' y_train' )
144- Y_test = pd .read_hdf (train_file , ' y_test' )
145- Y_val = pd .read_hdf (train_file , ' y_val' )
153+ Y_train = pd .read_hdf (train_file , " y_train" )
154+ Y_test = pd .read_hdf (train_file , " y_test" )
155+ Y_val = pd .read_hdf (train_file , " y_val" )
146156
147157 # assumes AUC is in the third column at index 2
148158 # df_y = df['AUC'].astype('int')
@@ -152,36 +162,36 @@ def load_data(params, seed):
152162 # scaler = StandardScaler()
153163 # df_x = scaler.fit_transform(df_x)
154164 else :
155- print (' expecting in file file suffix h5' )
165+ print (" expecting in file file suffix h5" )
156166 sys .exit ()
157167
158- print (' x_train shape:' , X_train .shape )
159- print (' x_test shape:' , X_test .shape )
168+ print (" x_train shape:" , X_train .shape )
169+ print (" x_test shape:" , X_test .shape )
160170
161171 return X_train , Y_train , X_val , Y_val , X_test , Y_test
162172
163173 # start change #
164- if train_file .endswith ('h5' ) or train_file .endswith (' hdf5' ):
165- print (' processing h5 in file {}' .format (train_file ))
174+ if train_file .endswith ("h5" ) or train_file .endswith (" hdf5" ):
175+ print (" processing h5 in file {}" .format (train_file ))
166176
167- df_x_train_0 = pd .read_hdf (train_file , ' x_train_0' ).astype (np .float32 )
168- df_x_train_1 = pd .read_hdf (train_file , ' x_train_1' ).astype (np .float32 )
177+ df_x_train_0 = pd .read_hdf (train_file , " x_train_0" ).astype (np .float32 )
178+ df_x_train_1 = pd .read_hdf (train_file , " x_train_1" ).astype (np .float32 )
169179 X_train = pd .concat ([df_x_train_0 , df_x_train_1 ], axis = 1 , sort = False )
170180 del df_x_train_0 , df_x_train_1
171181
172- df_x_test_0 = pd .read_hdf (train_file , ' x_test_0' ).astype (np .float32 )
173- df_x_test_1 = pd .read_hdf (train_file , ' x_test_1' ).astype (np .float32 )
182+ df_x_test_0 = pd .read_hdf (train_file , " x_test_0" ).astype (np .float32 )
183+ df_x_test_1 = pd .read_hdf (train_file , " x_test_1" ).astype (np .float32 )
174184 X_test = pd .concat ([df_x_test_0 , df_x_test_1 ], axis = 1 , sort = False )
175185 del df_x_test_0 , df_x_test_1
176186
177- df_x_val_0 = pd .read_hdf (train_file , ' x_val_0' ).astype (np .float32 )
178- df_x_val_1 = pd .read_hdf (train_file , ' x_val_1' ).astype (np .float32 )
187+ df_x_val_0 = pd .read_hdf (train_file , " x_val_0" ).astype (np .float32 )
188+ df_x_val_1 = pd .read_hdf (train_file , " x_val_1" ).astype (np .float32 )
179189 X_val = pd .concat ([df_x_val_0 , df_x_val_1 ], axis = 1 , sort = False )
180190 del df_x_val_0 , df_x_val_1
181191
182- Y_train = pd .read_hdf (train_file , ' y_train' )
183- Y_test = pd .read_hdf (train_file , ' y_test' )
184- Y_val = pd .read_hdf (train_file , ' y_val' )
192+ Y_train = pd .read_hdf (train_file , " y_train" )
193+ Y_test = pd .read_hdf (train_file , " y_test" )
194+ Y_val = pd .read_hdf (train_file , " y_val" )
185195
186196 # assumes AUC is in the third column at index 2
187197 # df_y = df['AUC'].astype('int')
@@ -191,10 +201,10 @@ def load_data(params, seed):
191201 # scaler = StandardScaler()
192202 # df_x = scaler.fit_transform(df_x)
193203 else :
194- print (' expecting in file file suffix h5' )
204+ print (" expecting in file file suffix h5" )
195205 sys .exit ()
196206
197- print (' x_train shape:' , X_train .shape )
198- print (' x_test shape:' , X_test .shape )
207+ print (" x_train shape:" , X_train .shape )
208+ print (" x_test shape:" , X_test .shape )
199209
200210 return X_train , Y_train , X_val , Y_val , X_test , Y_test
0 commit comments