|
45 | 45 | ######################################################################
|
46 | 46 | # Preparation
|
47 | 47 | # -----------
|
48 |
| -# |
49 |
| -# .. note:: |
50 |
| -# |
51 |
| -# When running this tutorial in Google Colab, install the required packages |
52 |
| -# |
53 |
| -# .. code:: |
54 |
| -# |
55 |
| -# !pip install librosa |
56 |
| -# |
| 48 | + |
57 | 49 | from IPython.display import Audio
|
58 | 50 | from matplotlib.patches import Rectangle
|
59 | 51 | from torchaudio.utils import download_asset
|
@@ -284,31 +276,6 @@ def plot_fbank(fbank, title=None):
|
284 | 276 |
|
285 | 277 | plot_fbank(mel_filters, "Mel Filter Bank - torchaudio")
|
286 | 278 |
|
287 |
| -###################################################################### |
288 |
| -# Comparison against librosa |
289 |
| -# ~~~~~~~~~~~~~~~~~~~~~~~~~~ |
290 |
| -# |
291 |
| -# For reference, here is the equivalent way to get the mel filter bank |
292 |
| -# with ``librosa``. |
293 |
| -# |
294 |
| - |
295 |
| -mel_filters_librosa = librosa.filters.mel( |
296 |
| - sr=sample_rate, |
297 |
| - n_fft=n_fft, |
298 |
| - n_mels=n_mels, |
299 |
| - fmin=0.0, |
300 |
| - fmax=sample_rate / 2.0, |
301 |
| - norm="slaney", |
302 |
| - htk=True, |
303 |
| -).T |
304 |
| - |
305 |
| -###################################################################### |
306 |
| -# |
307 |
| - |
308 |
| -plot_fbank(mel_filters_librosa, "Mel Filter Bank - librosa") |
309 |
| - |
310 |
| -mse = torch.square(mel_filters - mel_filters_librosa).mean().item() |
311 |
| -print("Mean Square Difference: ", mse) |
312 | 279 |
|
313 | 280 | ######################################################################
|
314 | 281 | # MelSpectrogram
|
@@ -345,35 +312,6 @@ def plot_fbank(fbank, title=None):
|
345 | 312 |
|
346 | 313 | plot_spectrogram(melspec[0], title="MelSpectrogram - torchaudio", ylabel="mel freq")
|
347 | 314 |
|
348 |
| -###################################################################### |
349 |
| -# Comparison against librosa |
350 |
| -# ~~~~~~~~~~~~~~~~~~~~~~~~~~ |
351 |
| -# |
352 |
| -# For reference, here is the equivalent means of generating mel-scale |
353 |
| -# spectrograms with ``librosa``. |
354 |
| -# |
355 |
| - |
356 |
| -melspec_librosa = librosa.feature.melspectrogram( |
357 |
| - y=SPEECH_WAVEFORM.numpy()[0], |
358 |
| - sr=sample_rate, |
359 |
| - n_fft=n_fft, |
360 |
| - hop_length=hop_length, |
361 |
| - win_length=win_length, |
362 |
| - center=True, |
363 |
| - pad_mode="reflect", |
364 |
| - power=2.0, |
365 |
| - n_mels=n_mels, |
366 |
| - norm="slaney", |
367 |
| - htk=True, |
368 |
| -) |
369 |
| - |
370 |
| -###################################################################### |
371 |
| -# |
372 |
| - |
373 |
| -plot_spectrogram(melspec_librosa, title="MelSpectrogram - librosa", ylabel="mel freq") |
374 |
| - |
375 |
| -mse = torch.square(melspec - melspec_librosa).mean().item() |
376 |
| -print("Mean Square Difference: ", mse) |
377 | 315 |
|
378 | 316 | ######################################################################
|
379 | 317 | # MFCC
|
@@ -404,37 +342,6 @@ def plot_fbank(fbank, title=None):
|
404 | 342 |
|
405 | 343 | plot_spectrogram(mfcc[0], title="MFCC")
|
406 | 344 |
|
407 |
| -###################################################################### |
408 |
| -# Comparison against librosa |
409 |
| -# ~~~~~~~~~~~~~~~~~~~~~~~~~~ |
410 |
| -# |
411 |
| - |
412 |
| -melspec = librosa.feature.melspectrogram( |
413 |
| - y=SPEECH_WAVEFORM.numpy()[0], |
414 |
| - sr=sample_rate, |
415 |
| - n_fft=n_fft, |
416 |
| - win_length=win_length, |
417 |
| - hop_length=hop_length, |
418 |
| - n_mels=n_mels, |
419 |
| - htk=True, |
420 |
| - norm=None, |
421 |
| -) |
422 |
| - |
423 |
| -mfcc_librosa = librosa.feature.mfcc( |
424 |
| - S=librosa.core.spectrum.power_to_db(melspec), |
425 |
| - n_mfcc=n_mfcc, |
426 |
| - dct_type=2, |
427 |
| - norm="ortho", |
428 |
| -) |
429 |
| - |
430 |
| -###################################################################### |
431 |
| -# |
432 |
| - |
433 |
| -plot_spectrogram(mfcc_librosa, title="MFCC (librosa)") |
434 |
| - |
435 |
| -mse = torch.square(mfcc - mfcc_librosa).mean().item() |
436 |
| -print("Mean Square Difference: ", mse) |
437 |
| - |
438 | 345 | ######################################################################
|
439 | 346 | # LFCC
|
440 | 347 | # ----
|
|
0 commit comments