Skip to content

Commit d028489

Browse files
committed
✍️ update contextnet and init notebook examples
1 parent 51d8c55 commit d028489

File tree

9 files changed

+26
-30
lines changed

9 files changed

+26
-30
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ TensorFlowASR implements some automatic speech recognition architectures such as
2121

2222
## What's New?
2323

24+
- (04/17/2021) Refactor repository with new version 1.x
2425
- (02/16/2021) Supported for TPU training
2526
- (12/27/2020) Supported _naive_ token level timestamp, see [demo](./examples/demonstration/conformer.py) with flag `--timestamp`
2627
- (12/17/2020) Supported ContextNet [http://arxiv.org/abs/2005.03191](http://arxiv.org/abs/2005.03191)

examples/contextnet/config.yml

Lines changed: 11 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -207,8 +207,8 @@ learning_config:
207207
num_masks: 1
208208
mask_factor: 27
209209
data_paths:
210-
- /mnt/Miscellanea/Datasets/Speech/LibriSpeech/train-clean-100/transcripts.tsv
211-
tfrecords_dir: /mnt/Miscellanea/Datasets/Speech/LibriSpeech/tfrecords
210+
- /mnt/h/ML/Datasets/ASR/Raw/LibriSpeech/train-clean-100/transcripts.tsv
211+
tfrecords_dir: null
212212
shuffle: True
213213
cache: True
214214
buffer_size: 100
@@ -217,10 +217,8 @@ learning_config:
217217

218218
eval_dataset_config:
219219
use_tf: True
220-
data_paths:
221-
- /mnt/Miscellanea/Datasets/Speech/LibriSpeech/dev-clean/transcripts.tsv
222-
- /mnt/Miscellanea/Datasets/Speech/LibriSpeech/dev-other/transcripts.tsv
223-
tfrecords_dir: /mnt/Miscellanea/Datasets/Speech/LibriSpeech/tfrecords
220+
data_paths: null
221+
tfrecords_dir: null
224222
shuffle: False
225223
cache: True
226224
buffer_size: 100
@@ -230,8 +228,8 @@ learning_config:
230228
test_dataset_config:
231229
use_tf: True
232230
data_paths:
233-
- /mnt/Miscellanea/Datasets/Speech/LibriSpeech/test-clean/transcripts.tsv
234-
tfrecords_dir: /mnt/Miscellanea/Datasets/Speech/LibriSpeech/tfrecords
231+
- /mnt/e/Datasets/Speech/LibriSpeech/test-clean/transcripts.tsv
232+
tfrecords_dir: null
235233
shuffle: False
236234
cache: True
237235
buffer_size: 100
@@ -240,26 +238,21 @@ learning_config:
240238

241239
optimizer_config:
242240
warmup_steps: 40000
243-
beta1: 0.9
244-
beta2: 0.98
241+
beta_1: 0.9
242+
beta_2: 0.98
245243
epsilon: 1e-9
246244

247245
running_config:
248246
batch_size: 2
249-
accumulation_steps: 4
250247
num_epochs: 20
251-
outdir: /mnt/Miscellanea/Models/local/contextnet
252-
log_interval_steps: 300
253-
eval_interval_steps: 500
254-
save_interval_steps: 1000
255248
checkpoint:
256-
filepath: /mnt/Miscellanea/Models/local/contextnet/checkpoints/{epoch:02d}.h5
249+
filepath: /mnt/e/Models/local/contextnet/checkpoints/{epoch:02d}.h5
257250
save_best_only: True
258251
save_weights_only: False
259252
save_freq: epoch
260-
states_dir: /mnt/Miscellanea/Models/local/contextnet/states
253+
states_dir: /mnt/e/Models/local/contextnet/states
261254
tensorboard:
262-
log_dir: /mnt/Miscellanea/Models/local/contextnet/tensorboard
255+
log_dir: /mnt/e/Models/local/contextnet/tensorboard
263256
histogram_freq: 1
264257
write_graph: True
265258
write_images: True

notebooks/conformer.ipynb

Whitespace-only changes.

notebooks/contextnet.ipynb

Whitespace-only changes.

notebooks/deepspeech2.ipynb

Whitespace-only changes.

notebooks/jasper.ipynb

Whitespace-only changes.

tensorflow_asr/models/base_model.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -95,10 +95,12 @@ def train_step(self, batch):
9595
y_pred = self(inputs, training=True)
9696
loss = self.loss(y_true, y_pred)
9797
if self.use_loss_scale:
98-
loss = self.optimizer.get_scaled_loss(loss)
99-
gradients = tape.gradient(loss, self.trainable_weights)
98+
scaled_loss = self.optimizer.get_scaled_loss(loss)
10099
if self.use_loss_scale:
100+
gradients = tape.gradient(scaled_loss, self.trainable_weights)
101101
gradients = self.optimizer.get_unscaled_gradients(gradients)
102+
else:
103+
gradients = tape.gradient(loss, self.trainable_weights)
102104
self.optimizer.apply_gradients(zip(gradients, self.trainable_variables))
103105
self._metrics["loss"].update_state(loss)
104106
return {m.name: m.result() for m in self.metrics}
@@ -127,6 +129,8 @@ def predict_step(self, batch):
127129
beam_search_decoding = self.recognize_beam(inputs)
128130
return tf.stack([labels, greedy_decoding, beam_search_decoding], axis=-1)
129131

132+
# -------------------------------- INFERENCE FUNCTIONS -------------------------------------
133+
130134
def recognize(self, features, input_lengths, **kwargs):
131135
pass
132136

tensorflow_asr/models/ctc/ctc.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -38,10 +38,6 @@ def __init__(self,
3838
self.decoder = decoder
3939
self.time_reduction_factor = 1
4040

41-
@property
42-
def metrics(self):
43-
return [self.loss_metric]
44-
4541
def _build(self, input_shape, batch_size=None):
4642
inputs = tf.keras.Input(input_shape, batch_size=batch_size, dtype=tf.float32)
4743
inputs_length = tf.keras.Input(shape=[], batch_size=batch_size, dtype=tf.int32)

tensorflow_asr/models/transducer/contextnet.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818
from ..encoders.contextnet import ContextNetEncoder, L2
1919
from .transducer import Transducer
20-
from ...utils import math_util
20+
from ...utils import math_util, data_util
2121

2222

2323
class ContextNet(Transducer):
@@ -80,11 +80,13 @@ def __init__(self,
8080
for block in self.encoder.blocks: self.time_reduction_factor *= block.time_reduction_factor
8181

8282
def call(self, inputs, training=False, **kwargs):
83-
features, input_length, prediction, prediction_length = inputs
84-
enc = self.encoder([features, input_length], training=training, **kwargs)
85-
pred = self.predict_net([prediction, prediction_length], training=training, **kwargs)
86-
outputs = self.joint_net([enc, pred], training=training, **kwargs)
87-
return outputs
83+
enc = self.encoder([inputs["inputs"], inputs["inputs_length"]], training=training, **kwargs)
84+
pred = self.predict_net([inputs["predictions"], inputs["predictions_length"]], training=training, **kwargs)
85+
logits = self.joint_net([enc, pred], training=training, **kwargs)
86+
return data_util.create_logits(
87+
logits=logits,
88+
logits_length=math_util.get_reduced_length(inputs["inputs_length"], self.time_reduction_factor)
89+
)
8890

8991
def encoder_inference(self, features: tf.Tensor, input_length: tf.Tensor):
9092
with tf.name_scope(f"{self.name}_encoder"):

0 commit comments

Comments
 (0)