Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/transformers/audio_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -620,7 +620,7 @@ def window_function(
return padded_window


# TODO This method does not support batching yet as we are mainly focused on inference.
# Note: This method processes a single waveform. For batch processing, use spectrogram_batch().
def spectrogram(
waveform: np.ndarray,
window: np.ndarray,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,14 @@ class SwitchTransformersConfig(PreTrainedConfig):
Number of dense hidden layers in the Transformer encoder layer.
num_sparse_encoder_layers (`int`, *optional*, defaults to 3):
Number of sparse (MoE) dense hidden layers in the Transformer encoder layer.
Note: When set to 0 with `num_layers=1`, the current implementation may still create a sparse layer
due to the sparse step calculation. This edge case is not encountered in existing checkpoints.
num_decoder_layers (`int`, *optional*, defaults to 12):
Number of hidden layers in the Transformer decoder. Will use the same value as `num_layers` if not set.
num_sparse_decoder_layers (`int`, *optional*, defaults to 3):
Number of sparse (MoE) dense hidden layers in the Transformer decoder layer.
Note: When set to 0 with `num_decoder_layers=1`, the current implementation may still create a sparse
layer due to the sparse step calculation. This edge case is not encountered in existing checkpoints.
num_heads (`int`, *optional*, defaults to 12):
Number of attention heads for each attention layer in the Transformer encoder.
num_experts (`int`, *optional*, defaults to 8):
Expand Down Expand Up @@ -150,7 +154,7 @@ def __init__(
else:
self.encoder_sparse_step = self.num_layers # HACK: this will create 0 sparse layers

# This tells us, each how many encoder layer we'll have to set a sparse layer.
# This tells us, each how many decoder layer we'll have to set a sparse layer.
if self.num_sparse_decoder_layers > 0:
self.decoder_sparse_step = self.num_decoder_layers // self.num_sparse_decoder_layers
else:
Expand Down