@@ -302,6 +302,41 @@ def test_generate_chunked_raises_when_overlap_is_not_smaller_than_chunk(self):
302302 overlap_duration = 5.0 ,
303303 )
304304
305+ def test_log_mel_spectrogram_shape_and_params (self ):
306+ """Verify log_mel_spectrogram output shape and NeMo-aligned parameters."""
307+ from mlx_audio .stt .models .parakeet .audio import (
308+ PreprocessArgs ,
309+ log_mel_spectrogram ,
310+ )
311+
312+ args = PreprocessArgs (
313+ sample_rate = 16000 ,
314+ normalize = "per_feature" ,
315+ window_size = 0.025 ,
316+ window_stride = 0.01 ,
317+ window = "hann" ,
318+ features = 80 ,
319+ n_fft = 512 ,
320+ dither = 0.0 ,
321+ )
322+
323+ duration_s = 0.5
324+ audio = mx .random .normal ((int (16000 * duration_s ),))
325+ mel = log_mel_spectrogram (audio , args )
326+
327+ # Shape: [1, time_frames, n_mels]
328+ self .assertEqual (mel .ndim , 3 )
329+ self .assertEqual (mel .shape [0 ], 1 )
330+ self .assertEqual (mel .shape [2 ], 80 )
331+ self .assertGreater (mel .shape [1 ], 0 )
332+
333+ # Output should be normalized (mean ≈ 0 per feature)
334+ per_feat_mean = np .abs (np .array (mx .mean (mel , axis = 1 )))
335+ self .assertTrue (np .all (per_feat_mean < 1.0 ))
336+
337+ # Verify configurable log_zero_guard_value default
338+ self .assertAlmostEqual (args .log_zero_guard_value , 2 ** - 24 , places = 15 )
339+
305340 @patch ("mlx.nn.Module.load_weights" )
306341 @patch ("mlx_audio.stt.models.parakeet.parakeet.hf_hub_download" )
307342 @patch ("mlx_audio.stt.models.parakeet.parakeet.json.load" )
0 commit comments