73
73
#
74
74
# First import the modules and define the helper functions.
75
75
#
76
- # We will need torch, torchaudio to use Torchaudio-squim, Matplotlib to
77
- # plot data, pystoi, pesq for computing reference metrics .
76
+ # We will need torch and torchaudio to use Torchaudio-squim and Matplotlib to
77
+ # plot data.
78
78
#
79
79
80
80
try :
81
- from pesq import pesq
82
- from pystoi import stoi
83
81
from torchaudio .pipelines import SQUIM_OBJECTIVE , SQUIM_SUBJECTIVE
84
82
except ImportError :
85
83
try :
92
90
of the notebook before running it:
93
91
!pip3 uninstall -y torch torchvision torchaudio
94
92
!pip3 install --pre torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/nightly/cpu
95
- !pip3 install pesq
96
- !pip3 install pystoi
97
93
"""
98
94
)
99
95
except Exception :
@@ -280,12 +276,17 @@ def plot(waveform, title, sample_rate=16000):
280
276
print (f"PESQ: { pesq_hyp [0 ]} " )
281
277
print (f"SI-SDR: { si_sdr_hyp [0 ]} \n " )
282
278
283
- pesq_ref = pesq (16000 , WAVEFORM_SPEECH [0 ].numpy (), WAVEFORM_DISTORTED [0 ].numpy (), mode = "wb" )
284
- stoi_ref = stoi (WAVEFORM_SPEECH [0 ].numpy (), WAVEFORM_DISTORTED [0 ].numpy (), 16000 , extended = False )
285
- si_sdr_ref = si_snr (WAVEFORM_DISTORTED [0 :1 ], WAVEFORM_SPEECH )
279
+ # To calculate the STOI and PESQ reference metrics,
280
+ # we would need to install the pystoi and pesq packages and execute the following:
281
+ # ```python
282
+ # pesq_ref = pesq(16000, WAVEFORM_SPEECH[0].numpy(), WAVEFORM_DISTORTED[0].numpy(), mode="wb")
283
+ # stoi_ref = stoi(WAVEFORM_SPEECH[0].numpy(), WAVEFORM_DISTORTED[0].numpy(), 16000, extended=False)
284
+ # ```
285
+ # These values are precomputed and hard-coded below.
286
286
print (f"Reference metrics for distorted speech at { snr_dbs [0 ]} dB are\n " )
287
- print (f"STOI: { stoi_ref } " )
288
- print (f"PESQ: { pesq_ref } " )
287
+ print (f"STOI: 0.9670831113894452" )
288
+ print (f"PESQ: 2.7961528301239014" )
289
+ si_sdr_ref = si_snr (WAVEFORM_DISTORTED [0 :1 ], WAVEFORM_SPEECH )
289
290
print (f"SI-SDR: { si_sdr_ref } " )
290
291
291
292
@@ -300,12 +301,11 @@ def plot(waveform, title, sample_rate=16000):
300
301
print (f"PESQ: { pesq_hyp [0 ]} " )
301
302
print (f"SI-SDR: { si_sdr_hyp [0 ]} \n " )
302
303
303
- pesq_ref = pesq (16000 , WAVEFORM_SPEECH [0 ].numpy (), WAVEFORM_DISTORTED [1 ].numpy (), mode = "wb" )
304
- stoi_ref = stoi (WAVEFORM_SPEECH [0 ].numpy (), WAVEFORM_DISTORTED [1 ].numpy (), 16000 , extended = False )
305
304
si_sdr_ref = si_snr (WAVEFORM_DISTORTED [1 :2 ], WAVEFORM_SPEECH )
305
+ # STOI and PESQ metrics are precomputed and hardcoded below.
306
306
print (f"Reference metrics for distorted speech at { snr_dbs [1 ]} dB are\n " )
307
- print (f"STOI: { stoi_ref } " )
308
- print (f"PESQ: { pesq_ref } " )
307
+ print (f"STOI: 0.5743247866630554 " )
308
+ print (f"PESQ: 1.1112866401672363 " )
309
309
print (f"SI-SDR: { si_sdr_ref } " )
310
310
311
311
0 commit comments