Skip to content

Commit 6519052

Browse files
Remove librosa from resampling tutorial (#4000)
* Remove librosa from resampling tutorial * Replace use of pandas in resampling tutorial * Remove html table code for resample tutorial --------- Co-authored-by: Sam Anklesaria <[email protected]>
1 parent 3187fcb commit 6519052

File tree

1 file changed

+25
-93
lines changed

1 file changed

+25
-93
lines changed

examples/tutorials/audio_resampling_tutorial.py

Lines changed: 25 additions & 93 deletions
Original file line numberDiff line numberDiff line change
@@ -26,15 +26,10 @@
2626
import math
2727
import timeit
2828

29-
import librosa
3029
import matplotlib.colors as mcolors
3130
import matplotlib.pyplot as plt
32-
import pandas as pd
33-
import resampy
3431
from IPython.display import Audio
35-
36-
pd.set_option("display.max_rows", None)
37-
pd.set_option("display.max_columns", None)
32+
import numpy as np
3833

3934
DEFAULT_OFFSET = 201
4035

@@ -250,11 +245,11 @@ def plot_sweep(
250245

251246

252247
######################################################################
253-
# Comparison against librosa
248+
# Resampling support
254249
# --------------------------
255250
#
256251
# ``torchaudio``’s resample function can be used to produce results similar to
257-
# that of librosa (resampy)’s kaiser window resampling, with some noise
252+
# that of librosa's kaiser window resampling, with some noise
258253
#
259254

260255
sample_rate = 48000
@@ -275,20 +270,6 @@ def plot_sweep(
275270
)
276271
plot_sweep(resampled_waveform, resample_rate, title="Kaiser Window Best (torchaudio)")
277272

278-
######################################################################
279-
#
280-
281-
librosa_resampled_waveform = torch.from_numpy(
282-
librosa.resample(waveform.squeeze().numpy(), orig_sr=sample_rate, target_sr=resample_rate, res_type="kaiser_best")
283-
).unsqueeze(0)
284-
plot_sweep(librosa_resampled_waveform, resample_rate, title="Kaiser Window Best (librosa)")
285-
286-
######################################################################
287-
#
288-
289-
mse = torch.square(resampled_waveform - librosa_resampled_waveform).mean().item()
290-
print("torchaudio and librosa kaiser best MSE:", mse)
291-
292273
######################################################################
293274
# kaiser_fast
294275
# ~~~~~~~~~~~
@@ -304,35 +285,16 @@ def plot_sweep(
304285
)
305286
plot_sweep(resampled_waveform, resample_rate, title="Kaiser Window Fast (torchaudio)")
306287

307-
######################################################################
308-
#
309-
310-
librosa_resampled_waveform = torch.from_numpy(
311-
librosa.resample(waveform.squeeze().numpy(), orig_sr=sample_rate, target_sr=resample_rate, res_type="kaiser_fast")
312-
).unsqueeze(0)
313-
plot_sweep(librosa_resampled_waveform, resample_rate, title="Kaiser Window Fast (librosa)")
314-
315-
######################################################################
316-
#
317-
318-
mse = torch.square(resampled_waveform - librosa_resampled_waveform).mean().item()
319-
print("torchaudio and librosa kaiser fast MSE:", mse)
320-
321288
######################################################################
322289
# Performance Benchmarking
323290
# ------------------------
324291
#
325292
# Below are benchmarks for downsampling and upsampling waveforms between
326293
# two pairs of sampling rates. We demonstrate the performance implications
327294
# that the ``lowpass_filter_width``, window type, and sample rates can
328-
# have. Additionally, we provide a comparison against ``librosa``\ ’s
329-
# ``kaiser_best`` and ``kaiser_fast`` using their corresponding parameters
330-
# in ``torchaudio``.
331-
#
295+
# have.
332296

333297
print(f"torchaudio: {torchaudio.__version__}")
334-
print(f"librosa: {librosa.__version__}")
335-
print(f"resampy: {resampy.__version__}")
336298

337299
######################################################################
338300
#
@@ -413,37 +375,6 @@ def benchmark_resample_transforms(
413375
#
414376

415377

416-
def benchmark_resample_librosa(
417-
waveform,
418-
sample_rate,
419-
resample_rate,
420-
res_type=None,
421-
iters=5,
422-
):
423-
waveform_np = waveform.squeeze().numpy()
424-
return (
425-
timeit.timeit(
426-
stmt="""
427-
librosa.resample(
428-
waveform_np,
429-
orig_sr=sample_rate,
430-
target_sr=resample_rate,
431-
res_type=res_type,
432-
)
433-
""",
434-
setup="import librosa",
435-
number=iters,
436-
globals=locals(),
437-
)
438-
* 1000
439-
/ iters
440-
)
441-
442-
443-
######################################################################
444-
#
445-
446-
447378
def benchmark(sample_rate, resample_rate):
448379
times, rows = [], []
449380
waveform = get_sine_sweep(sample_rate).to(torch.float32)
@@ -453,13 +384,13 @@ def benchmark(sample_rate, resample_rate):
453384
# sinc 64 zero-crossings
454385
f_time = benchmark_resample_functional(*args, lowpass_filter_width=64)
455386
t_time = benchmark_resample_transforms(*args, lowpass_filter_width=64)
456-
times.append([None, f_time, t_time])
387+
times.append([f_time, t_time])
457388
rows.append("sinc (width 64)")
458389

459390
# sinc 6 zero-crossings
460391
f_time = benchmark_resample_functional(*args, lowpass_filter_width=16)
461392
t_time = benchmark_resample_transforms(*args, lowpass_filter_width=16)
462-
times.append([None, f_time, t_time])
393+
times.append([f_time, t_time])
463394
rows.append("sinc (width 16)")
464395

465396
# kaiser best
@@ -469,10 +400,9 @@ def benchmark(sample_rate, resample_rate):
469400
"resampling_method": "sinc_interp_kaiser",
470401
"beta": 14.769656459379492,
471402
}
472-
lib_time = benchmark_resample_librosa(*args, res_type="kaiser_best")
473403
f_time = benchmark_resample_functional(*args, **kwargs)
474404
t_time = benchmark_resample_transforms(*args, **kwargs)
475-
times.append([lib_time, f_time, t_time])
405+
times.append([f_time, t_time])
476406
rows.append("kaiser_best")
477407

478408
# kaiser fast
@@ -482,26 +412,28 @@ def benchmark(sample_rate, resample_rate):
482412
"resampling_method": "sinc_interp_kaiser",
483413
"beta": 8.555504641634386,
484414
}
485-
lib_time = benchmark_resample_librosa(*args, res_type="kaiser_fast")
486415
f_time = benchmark_resample_functional(*args, **kwargs)
487416
t_time = benchmark_resample_transforms(*args, **kwargs)
488-
times.append([lib_time, f_time, t_time])
417+
times.append([f_time, t_time])
489418
rows.append("kaiser_fast")
490-
491-
df = pd.DataFrame(times, columns=["librosa", "functional", "transforms"], index=rows)
492-
return df
419+
return (np.array(times), ["functional", "transforms"], rows)
493420

494421

495422
######################################################################
496423
#
497-
def plot(df):
498-
print(df.round(2))
499-
ax = df.plot(kind="bar")
424+
425+
def plot(data, cols, rows):
426+
fig, ax = plt.subplots()
427+
x_data = np.arange(len(rows))
428+
bar_width = 0.8 / len(cols)
429+
for (i, (c, d)) in enumerate(zip(cols, data.T)):
430+
x_pos = x_data + (i - len(cols)/2 + 0.5) * bar_width
431+
ax.bar(x_pos, d, bar_width, label=c)
432+
ax.legend()
433+
ax.set_xticks(x_data)
434+
ax.set_xticklabels(rows)
500435
plt.ylabel("Time Elapsed [ms]")
501-
plt.xticks(rotation=0, fontsize=10)
502-
for cont, col, color in zip(ax.containers, df.columns, mcolors.TABLEAU_COLORS):
503-
label = ["N/A" if v != v else str(v) for v in df[col].round(2)]
504-
ax.bar_label(cont, labels=label, color=color, fontweight="bold", fontsize="x-small")
436+
return ax
505437

506438

507439
######################################################################
@@ -510,31 +442,31 @@ def plot(df):
510442
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~
511443

512444
df = benchmark(48_000, 44_100)
513-
plot(df)
445+
plot(*df)
514446

515447
######################################################################
516448
#
517449
# Downsample (16 -> 8 kHz)
518450
# ~~~~~~~~~~~~~~~~~~~~~~~~
519451

520452
df = benchmark(16_000, 8_000)
521-
plot(df)
453+
plot(*df)
522454

523455
######################################################################
524456
#
525457
# Upsample (44.1 -> 48 kHz)
526458
# ~~~~~~~~~~~~~~~~~~~~~~~~~
527459

528460
df = benchmark(44_100, 48_000)
529-
plot(df)
461+
plot(*df)
530462

531463
######################################################################
532464
#
533465
# Upsample (8 -> 16 kHz)
534466
# ~~~~~~~~~~~~~~~~~~~~~~
535467

536468
df = benchmark(8_000, 16_000)
537-
plot(df)
469+
plot(*df)
538470

539471
######################################################################
540472
#

0 commit comments

Comments
 (0)