Remove dependencies for mvdr tutorial (#4003)

samanklesaria · web-flow · commit 6ad76d149892 · 2025-07-31T10:54:08.000-05:00
* Remove dependencies for mvdr tutorial

* Add back `evaluate` function to mvdr tutorial

---------

Co-authored-by: Sam Anklesaria &lt;sanklesaria@openteams.com&gt;
diff --git a/examples/tutorials/mvdr_tutorial.py b/examples/tutorials/mvdr_tutorial.py
@@ -38,7 +38,6 @@
 
 
 import matplotlib.pyplot as plt
-import mir_eval
 from IPython.display import Audio
 
 ######################################################################
@@ -48,21 +47,8 @@
 
 ######################################################################
 # 2.1. Import the packages
-# ~~~~~~~~~~~~~~~~~~~~~~~~
 #
-# First, we install and import the necessary packages.
-#
-# ``mir_eval``, ``pesq``, and ``pystoi`` packages are required for
-# evaluating the speech enhancement performance.
-#
-
-# When running this example in notebook, install the following packages.
-# !pip3 install mir_eval
-# !pip3 install pesq
-# !pip3 install pystoi
 
-from pesq import pesq
-from pystoi import stoi
 from torchaudio.utils import download_asset
 
 ######################################################################
@@ -142,8 +128,14 @@ def generate_mixture(waveform_clean, waveform_noise, target_snr):
     waveform_noise *= 10 ** (-(target_snr - current_snr) / 20)
     return waveform_clean + waveform_noise
 
-
+# If you have mir_eval installed, you can use it to evaluate the separation quality of the estimated sources.
+# You can also evaluate the intelligibility of the speech with the Short-Time Objective Intelligibility (STOI) metric
+# available in the `pystoi` package, or the Perceptual Evaluation of Speech Quality (PESQ) metric available in the `pesq` package.
 def evaluate(estimate, reference):
+    from pesq import pesq
+    from pystoi import stoi
+    import mir_eval
+
     si_snr_score = si_snr(estimate, reference)
     (
         sdr,
@@ -158,7 +150,6 @@ def evaluate(estimate, reference):
     print(f"PESQ score: {pesq_mix}")
     print(f"STOI score: {stoi_mix}")
 
-
 ######################################################################
 # 3. Generate Ideal Ratio Masks (IRMs)
 # ------------------------------------
@@ -211,18 +202,9 @@ def evaluate(estimate, reference):
 # 3.2.1. Visualize mixture speech
 # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 #
-# We evaluate the quality of the mixture speech or the enhanced speech
-# using the following three metrics:
-#
-# -  signal-to-distortion ratio (SDR)
-# -  scale-invariant signal-to-noise ratio (Si-SNR, or Si-SDR in some papers)
-# -  Perceptual Evaluation of Speech Quality (PESQ)
-#
-# We also evaluate the intelligibility of the speech with the Short-Time Objective Intelligibility
-# (STOI) metric.
+
 
 plot_spectrogram(stft_mix[0], "Spectrogram of Mixture Speech (dB)")
-evaluate(waveform_mix[0:1], waveform_clean[0:1])
 Audio(waveform_mix[0], rate=SAMPLE_RATE)
 
 
@@ -335,7 +317,6 @@ def get_irms(stft_clean, stft_noise):
 
 plot_spectrogram(stft_souden, "Enhanced Spectrogram by SoudenMVDR (dB)")
 waveform_souden = waveform_souden.reshape(1, -1)
-evaluate(waveform_souden, waveform_clean[0:1])
 Audio(waveform_souden, rate=SAMPLE_RATE)
 
 
@@ -393,7 +374,6 @@ def get_irms(stft_clean, stft_noise):
 
 plot_spectrogram(stft_rtf_evd, "Enhanced Spectrogram by RTFMVDR and F.rtf_evd (dB)")
 waveform_rtf_evd = waveform_rtf_evd.reshape(1, -1)
-evaluate(waveform_rtf_evd, waveform_clean[0:1])
 Audio(waveform_rtf_evd, rate=SAMPLE_RATE)
 
 
@@ -404,5 +384,4 @@ def get_irms(stft_clean, stft_noise):
 
 plot_spectrogram(stft_rtf_power, "Enhanced Spectrogram by RTFMVDR and F.rtf_power (dB)")
 waveform_rtf_power = waveform_rtf_power.reshape(1, -1)
-evaluate(waveform_rtf_power, waveform_clean[0:1])
 Audio(waveform_rtf_power, rate=SAMPLE_RATE)