13
13
_utils .suppress_tensorflow_warnings ()
14
14
import tensorflow .compat .v1 as _tf
15
15
16
+ # This toolkit is compatible with TensorFlow V2 behavior.
17
+ # However, until all toolkits are compatible, we must call `disable_v2_behavior()`.
16
18
_tf .disable_v2_behavior ()
17
19
18
20
19
21
class SoundClassifierTensorFlowModel (TensorFlowModel ):
20
22
def __init__ (self , num_inputs , num_classes , custom_layer_sizes ):
21
23
"""
22
24
Defines the TensorFlow model, loss, optimisation and accuracy.
23
-
24
25
"""
26
+ self .num_inputs = num_inputs
27
+ self .num_classes = num_classes
28
+ self .custom_layer_sizes = custom_layer_sizes
29
+
25
30
self .gpu_policy = _utils .TensorFlowGPUPolicy ()
26
31
self .gpu_policy .start ()
27
32
28
33
self .sc_graph = _tf .Graph ()
29
- self .num_classes = num_classes
30
34
self .sess = _tf .Session (graph = self .sc_graph )
31
- with self . sc_graph . as_default ():
32
- self .init_sound_classifier_graph ( num_inputs , custom_layer_sizes )
35
+
36
+ self .is_initialized = False
33
37
34
38
def __del__ (self ):
35
39
self .sess .close ()
36
40
self .gpu_policy .stop ()
37
41
38
- def init_sound_classifier_graph (self , num_inputs , custom_layer_sizes ):
42
+ @staticmethod
43
+ def _build_network (x , weights , biases ):
44
+ # Add customized layers
45
+ for i in range (len (weights .keys ())):
46
+ weight_name = "sound_dense{}_weight" .format (i )
47
+ bias_name = "sound_dense{}_bias" .format (i )
48
+ if i == 0 :
49
+ curr_dense = _tf .nn .xw_plus_b (
50
+ x , weights = weights [weight_name ], biases = biases [bias_name ]
51
+ )
52
+ else :
53
+ curr_dense = _tf .nn .xw_plus_b (
54
+ curr_dense , weights = weights [weight_name ], biases = biases [bias_name ]
55
+ )
56
+ if i == (len (weights .keys ()) - 1 ):
57
+ out = _tf .nn .softmax (curr_dense )
58
+ else :
59
+ curr_dense = _tf .nn .relu (curr_dense )
60
+
61
+ return out , curr_dense
39
62
40
- self .x = _tf .placeholder ("float" , [None , 12288 ])
63
+ def init (self ):
64
+ assert not self .is_initialized
65
+ with self .sc_graph .as_default ():
66
+ self .init_sound_classifier_graph ()
67
+ self .is_initialized = True
68
+
69
+ def init_sound_classifier_graph (self ):
70
+ self .x = _tf .placeholder ("float" , [None , self .num_inputs ])
41
71
self .y = _tf .placeholder ("float" , [None , self .num_classes ])
42
72
43
73
# Xavier initialization
@@ -48,13 +78,13 @@ def init_sound_classifier_graph(self, num_inputs, custom_layer_sizes):
48
78
self .names_of_layers = []
49
79
50
80
# Create variables for customized layers
51
- for i , cur_layer_size in enumerate (custom_layer_sizes ):
81
+ for i , cur_layer_size in enumerate (self . custom_layer_sizes ):
52
82
weight_name = "sound_dense{}_weight" .format (i )
53
83
bias_name = "sound_dense{}_bias" .format (i )
54
84
self .names_of_layers .append ("dense{}" .format (i ))
55
85
out_units = cur_layer_size
56
86
if i == 0 :
57
- in_units = num_inputs
87
+ in_units = self . num_inputs
58
88
weights [weight_name ] = _tf .Variable (
59
89
initializer ([in_units , out_units ]), name = weight_name
60
90
)
@@ -72,24 +102,7 @@ def init_sound_classifier_graph(self, num_inputs, custom_layer_sizes):
72
102
initializer ([self .num_classes ]), name = bias_name
73
103
)
74
104
75
- # Add customized layers
76
- for i in range (len (weights .keys ())):
77
- weight_name = "sound_dense{}_weight" .format (i )
78
- bias_name = "sound_dense{}_bias" .format (i )
79
- if i == 0 :
80
- curr_dense = _tf .nn .xw_plus_b (
81
- self .x , weights = weights [weight_name ], biases = biases [bias_name ]
82
- )
83
- else :
84
- curr_dense = _tf .nn .xw_plus_b (
85
- curr_dense , weights = weights [weight_name ], biases = biases [bias_name ]
86
- )
87
- if i == (len (weights .keys ()) - 1 ):
88
- out = _tf .nn .softmax (curr_dense )
89
- else :
90
- curr_dense = _tf .nn .relu (curr_dense )
91
-
92
- self .predictions = out
105
+ self .predictions , curr_dense = SoundClassifierTensorFlowModel ._build_network (self .x , weights , biases )
93
106
94
107
# Loss
95
108
self .cost = _tf .reduce_mean (
@@ -113,6 +126,8 @@ def init_sound_classifier_graph(self, num_inputs, custom_layer_sizes):
113
126
self .sess .run (_tf .global_variables_initializer ())
114
127
115
128
def train (self , data , label ):
129
+ assert self .is_initialized
130
+
116
131
data_shape = data .shape [0 ]
117
132
_ , final_train_loss , final_train_accuracy = self .sess .run (
118
133
[self .optimizer , self .cost , self .accuracy ],
@@ -127,6 +142,8 @@ def train(self, data, label):
127
142
return result
128
143
129
144
def evaluate (self , data , label ):
145
+ assert self .is_initialized
146
+
130
147
data_shape = data .shape [0 ]
131
148
pred_probs , final_accuracy = self .sess .run (
132
149
[self .predictions , self .accuracy ],
@@ -141,6 +158,8 @@ def evaluate(self, data, label):
141
158
return result
142
159
143
160
def predict (self , data ):
161
+ assert self .is_initialized
162
+
144
163
data_shape = data .shape [0 ]
145
164
pred_probs = self .sess .run (
146
165
self .predictions , feed_dict = {self .x : data .reshape ((data_shape , 12288 ))}
@@ -161,6 +180,7 @@ def export_weights(self):
161
180
`numpy.ndarray` converted to the CoreML format and the
162
181
respective activation applied to the layer.
163
182
"""
183
+ assert self .is_initialized
164
184
165
185
with self .sc_graph .as_default ():
166
186
layer_names = _tf .trainable_variables ()
@@ -195,6 +215,7 @@ def get_weights(self):
195
215
shapes are transposed.
196
216
197
217
"""
218
+ assert self .is_initialized
198
219
199
220
with self .sc_graph .as_default ():
200
221
layer_names = _tf .trainable_variables ()
@@ -219,24 +240,18 @@ def load_weights(self, net_params):
219
240
need to be transposed to match TF format.
220
241
221
242
"""
222
- layers = net_params ["data" ].keys ()
243
+ with self .sc_graph .as_default ():
244
+ weights , biases = {}, {}
245
+ for cur_name , cur_layer in net_params ["data" ].items ():
246
+ if "bias" in cur_name :
247
+ biases [cur_name ] = _tf .Variable (cur_layer .astype ('float32' ), name = cur_name )
248
+ else :
249
+ assert "weight" in cur_name
250
+ weights [cur_name ] = _tf .Variable (cur_layer .transpose (1 , 0 ).astype ('float32' ), name = cur_name )
223
251
224
- for layer_name in layers :
225
- new_layer_name = layer_name .replace ("custom" , "sound" )
226
- if "bias" in layer_name :
227
- self .sess .run (
228
- _tf .assign (
229
- self .sc_graph .get_tensor_by_name (new_layer_name + ":0" ),
230
- net_params ["data" ][layer_name ],
231
- )
232
- )
233
- else :
234
- curr_shape = [int (x ) for x in net_params ["shapes" ][layer_name ]]
235
- self .sess .run (
236
- _tf .assign (
237
- self .sc_graph .get_tensor_by_name (new_layer_name + ":0" ),
238
- net_params ["data" ][layer_name ]
239
- .reshape (curr_shape )
240
- .transpose (1 , 0 ),
241
- )
242
- )
252
+ self .x = _tf .placeholder ("float" , [None , self .num_inputs ])
253
+ self .predictions , _ = SoundClassifierTensorFlowModel ._build_network (self .x , weights , biases )
254
+
255
+ self .sess .run (_tf .global_variables_initializer ())
256
+
257
+ self .is_initialized = True
0 commit comments