Skip to content
This repository was archived by the owner on Dec 21, 2023. It is now read-only.

Commit 3fe09b5

Browse files
authored
Sound classifier works with TF V2 behavior enabled (#3021)
* Move the neural network build to seperate function * Seperate object construction from object initialization * Make neural network work in V2 functionality by directly using saved weights when constructing graph * Fix error message * Remove unused class * Add comment about still needing to disable V2 behavior * Fix python 2.7 issue
1 parent 845fc13 commit 3fe09b5

File tree

3 files changed

+65
-71
lines changed

3 files changed

+65
-71
lines changed

src/python/turicreate/toolkits/sound_classifier/_audio_feature_extractor.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@
1818
# We need to disable this here to match behavior in the rest of TuriCreate
1919
from tensorflow.compat.v1 import disable_v2_behavior
2020

21+
# This toolkit is compatible with TensorFlow V2 behavior.
22+
# However, until all toolkits are compatible, we must call `disable_v2_behavior()`.
2123
disable_v2_behavior()
2224

2325

src/python/turicreate/toolkits/sound_classifier/_tf_sound_classifier.py

Lines changed: 61 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -13,31 +13,61 @@
1313
_utils.suppress_tensorflow_warnings()
1414
import tensorflow.compat.v1 as _tf
1515

16+
# This toolkit is compatible with TensorFlow V2 behavior.
17+
# However, until all toolkits are compatible, we must call `disable_v2_behavior()`.
1618
_tf.disable_v2_behavior()
1719

1820

1921
class SoundClassifierTensorFlowModel(TensorFlowModel):
2022
def __init__(self, num_inputs, num_classes, custom_layer_sizes):
2123
"""
2224
Defines the TensorFlow model, loss, optimisation and accuracy.
23-
2425
"""
26+
self.num_inputs = num_inputs
27+
self.num_classes = num_classes
28+
self.custom_layer_sizes = custom_layer_sizes
29+
2530
self.gpu_policy = _utils.TensorFlowGPUPolicy()
2631
self.gpu_policy.start()
2732

2833
self.sc_graph = _tf.Graph()
29-
self.num_classes = num_classes
3034
self.sess = _tf.Session(graph=self.sc_graph)
31-
with self.sc_graph.as_default():
32-
self.init_sound_classifier_graph(num_inputs, custom_layer_sizes)
35+
36+
self.is_initialized = False
3337

3438
def __del__(self):
3539
self.sess.close()
3640
self.gpu_policy.stop()
3741

38-
def init_sound_classifier_graph(self, num_inputs, custom_layer_sizes):
42+
@staticmethod
43+
def _build_network(x, weights, biases):
44+
# Add customized layers
45+
for i in range(len(weights.keys())):
46+
weight_name = "sound_dense{}_weight".format(i)
47+
bias_name = "sound_dense{}_bias".format(i)
48+
if i == 0:
49+
curr_dense = _tf.nn.xw_plus_b(
50+
x, weights=weights[weight_name], biases=biases[bias_name]
51+
)
52+
else:
53+
curr_dense = _tf.nn.xw_plus_b(
54+
curr_dense, weights=weights[weight_name], biases=biases[bias_name]
55+
)
56+
if i == (len(weights.keys()) - 1):
57+
out = _tf.nn.softmax(curr_dense)
58+
else:
59+
curr_dense = _tf.nn.relu(curr_dense)
60+
61+
return out, curr_dense
3962

40-
self.x = _tf.placeholder("float", [None, 12288])
63+
def init(self):
64+
assert not self.is_initialized
65+
with self.sc_graph.as_default():
66+
self.init_sound_classifier_graph()
67+
self.is_initialized = True
68+
69+
def init_sound_classifier_graph(self):
70+
self.x = _tf.placeholder("float", [None, self.num_inputs])
4171
self.y = _tf.placeholder("float", [None, self.num_classes])
4272

4373
# Xavier initialization
@@ -48,13 +78,13 @@ def init_sound_classifier_graph(self, num_inputs, custom_layer_sizes):
4878
self.names_of_layers = []
4979

5080
# Create variables for customized layers
51-
for i, cur_layer_size in enumerate(custom_layer_sizes):
81+
for i, cur_layer_size in enumerate(self.custom_layer_sizes):
5282
weight_name = "sound_dense{}_weight".format(i)
5383
bias_name = "sound_dense{}_bias".format(i)
5484
self.names_of_layers.append("dense{}".format(i))
5585
out_units = cur_layer_size
5686
if i == 0:
57-
in_units = num_inputs
87+
in_units = self.num_inputs
5888
weights[weight_name] = _tf.Variable(
5989
initializer([in_units, out_units]), name=weight_name
6090
)
@@ -72,24 +102,7 @@ def init_sound_classifier_graph(self, num_inputs, custom_layer_sizes):
72102
initializer([self.num_classes]), name=bias_name
73103
)
74104

75-
# Add customized layers
76-
for i in range(len(weights.keys())):
77-
weight_name = "sound_dense{}_weight".format(i)
78-
bias_name = "sound_dense{}_bias".format(i)
79-
if i == 0:
80-
curr_dense = _tf.nn.xw_plus_b(
81-
self.x, weights=weights[weight_name], biases=biases[bias_name]
82-
)
83-
else:
84-
curr_dense = _tf.nn.xw_plus_b(
85-
curr_dense, weights=weights[weight_name], biases=biases[bias_name]
86-
)
87-
if i == (len(weights.keys()) - 1):
88-
out = _tf.nn.softmax(curr_dense)
89-
else:
90-
curr_dense = _tf.nn.relu(curr_dense)
91-
92-
self.predictions = out
105+
self.predictions, curr_dense = SoundClassifierTensorFlowModel._build_network(self.x, weights, biases)
93106

94107
# Loss
95108
self.cost = _tf.reduce_mean(
@@ -113,6 +126,8 @@ def init_sound_classifier_graph(self, num_inputs, custom_layer_sizes):
113126
self.sess.run(_tf.global_variables_initializer())
114127

115128
def train(self, data, label):
129+
assert self.is_initialized
130+
116131
data_shape = data.shape[0]
117132
_, final_train_loss, final_train_accuracy = self.sess.run(
118133
[self.optimizer, self.cost, self.accuracy],
@@ -127,6 +142,8 @@ def train(self, data, label):
127142
return result
128143

129144
def evaluate(self, data, label):
145+
assert self.is_initialized
146+
130147
data_shape = data.shape[0]
131148
pred_probs, final_accuracy = self.sess.run(
132149
[self.predictions, self.accuracy],
@@ -141,6 +158,8 @@ def evaluate(self, data, label):
141158
return result
142159

143160
def predict(self, data):
161+
assert self.is_initialized
162+
144163
data_shape = data.shape[0]
145164
pred_probs = self.sess.run(
146165
self.predictions, feed_dict={self.x: data.reshape((data_shape, 12288))}
@@ -161,6 +180,7 @@ def export_weights(self):
161180
`numpy.ndarray` converted to the CoreML format and the
162181
respective activation applied to the layer.
163182
"""
183+
assert self.is_initialized
164184

165185
with self.sc_graph.as_default():
166186
layer_names = _tf.trainable_variables()
@@ -195,6 +215,7 @@ def get_weights(self):
195215
shapes are transposed.
196216
197217
"""
218+
assert self.is_initialized
198219

199220
with self.sc_graph.as_default():
200221
layer_names = _tf.trainable_variables()
@@ -219,24 +240,18 @@ def load_weights(self, net_params):
219240
need to be transposed to match TF format.
220241
221242
"""
222-
layers = net_params["data"].keys()
243+
with self.sc_graph.as_default():
244+
weights, biases = {}, {}
245+
for cur_name, cur_layer in net_params["data"].items():
246+
if "bias" in cur_name:
247+
biases[cur_name] = _tf.Variable(cur_layer.astype('float32'), name=cur_name)
248+
else:
249+
assert "weight" in cur_name
250+
weights[cur_name] = _tf.Variable(cur_layer.transpose(1, 0).astype('float32'), name=cur_name)
223251

224-
for layer_name in layers:
225-
new_layer_name = layer_name.replace("custom", "sound")
226-
if "bias" in layer_name:
227-
self.sess.run(
228-
_tf.assign(
229-
self.sc_graph.get_tensor_by_name(new_layer_name + ":0"),
230-
net_params["data"][layer_name],
231-
)
232-
)
233-
else:
234-
curr_shape = [int(x) for x in net_params["shapes"][layer_name]]
235-
self.sess.run(
236-
_tf.assign(
237-
self.sc_graph.get_tensor_by_name(new_layer_name + ":0"),
238-
net_params["data"][layer_name]
239-
.reshape(curr_shape)
240-
.transpose(1, 0),
241-
)
242-
)
252+
self.x = _tf.placeholder("float", [None, self.num_inputs])
253+
self.predictions, _ = SoundClassifierTensorFlowModel._build_network(self.x, weights, biases)
254+
255+
self.sess.run(_tf.global_variables_initializer())
256+
257+
self.is_initialized = True

src/python/turicreate/toolkits/sound_classifier/sound_classifier.py

Lines changed: 2 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -54,30 +54,6 @@ def reset(self):
5454
raise NotImplementedError
5555

5656

57-
class _TFDataIterator(_DataIterator):
58-
def __init__(self, data, label=None, batch_size=1, shuffle=False):
59-
import tensorflow as tf
60-
61-
# Always pass a tuple, so that the impl's built-in iterator returns a
62-
# tuple.
63-
tensor_slices = (data, label) if label is not None else (data,)
64-
65-
self.impl = tf.data.Dataset.from_tensor_slices(tensor_slices)
66-
67-
# Apply options.
68-
self.impl = self.impl.batch(batch_size)
69-
if shuffle:
70-
self.impl = self.impl.shuffle(data.shape[0])
71-
72-
def __iter__(self):
73-
return self.impl.__iter__()
74-
75-
def reset(self):
76-
# Each call to __iter__ returns a fresh iterator object that will do one
77-
# pass through the data.
78-
pass
79-
80-
8157
class _NumPyDataIterator(_DataIterator):
8258
def __init__(self, data, label=None, batch_size=1, shuffle=False):
8359

@@ -459,6 +435,7 @@ def create(
459435
custom_NN = SoundClassifierTensorFlowModel(
460436
feature_extractor.output_length, num_labels, custom_layer_sizes
461437
)
438+
custom_NN.init()
462439

463440
if verbose:
464441
# Setup progress table
@@ -1065,7 +1042,7 @@ class as a vector. Label ordering is dictated by the ``classes``
10651042

10661043
if output_type not in ("probability", "probability_vector", "class"):
10671044
raise ValueError(
1068-
"'dataset' parameter must be either an SFrame, SArray or dictionary"
1045+
"'output_type' parameter must be either 'probability', 'probability_vector', 'class'."
10691046
)
10701047
if output_type == "probability" and self.num_classes != 2:
10711048
raise _ToolkitError(

0 commit comments

Comments
 (0)