26
26
import math
27
27
import timeit
28
28
29
- import librosa
30
29
import matplotlib .colors as mcolors
31
30
import matplotlib .pyplot as plt
32
- import pandas as pd
33
- import resampy
34
31
from IPython .display import Audio
35
-
36
- pd .set_option ("display.max_rows" , None )
37
- pd .set_option ("display.max_columns" , None )
32
+ import numpy as np
38
33
39
34
DEFAULT_OFFSET = 201
40
35
@@ -250,11 +245,11 @@ def plot_sweep(
250
245
251
246
252
247
######################################################################
253
- # Comparison against librosa
248
+ # Resampling support
254
249
# --------------------------
255
250
#
256
251
# ``torchaudio``’s resample function can be used to produce results similar to
257
- # that of librosa (resampy)’ s kaiser window resampling, with some noise
252
+ # that of librosa' s kaiser window resampling, with some noise
258
253
#
259
254
260
255
sample_rate = 48000
@@ -275,20 +270,6 @@ def plot_sweep(
275
270
)
276
271
plot_sweep (resampled_waveform , resample_rate , title = "Kaiser Window Best (torchaudio)" )
277
272
278
- ######################################################################
279
- #
280
-
281
- librosa_resampled_waveform = torch .from_numpy (
282
- librosa .resample (waveform .squeeze ().numpy (), orig_sr = sample_rate , target_sr = resample_rate , res_type = "kaiser_best" )
283
- ).unsqueeze (0 )
284
- plot_sweep (librosa_resampled_waveform , resample_rate , title = "Kaiser Window Best (librosa)" )
285
-
286
- ######################################################################
287
- #
288
-
289
- mse = torch .square (resampled_waveform - librosa_resampled_waveform ).mean ().item ()
290
- print ("torchaudio and librosa kaiser best MSE:" , mse )
291
-
292
273
######################################################################
293
274
# kaiser_fast
294
275
# ~~~~~~~~~~~
@@ -304,35 +285,16 @@ def plot_sweep(
304
285
)
305
286
plot_sweep (resampled_waveform , resample_rate , title = "Kaiser Window Fast (torchaudio)" )
306
287
307
- ######################################################################
308
- #
309
-
310
- librosa_resampled_waveform = torch .from_numpy (
311
- librosa .resample (waveform .squeeze ().numpy (), orig_sr = sample_rate , target_sr = resample_rate , res_type = "kaiser_fast" )
312
- ).unsqueeze (0 )
313
- plot_sweep (librosa_resampled_waveform , resample_rate , title = "Kaiser Window Fast (librosa)" )
314
-
315
- ######################################################################
316
- #
317
-
318
- mse = torch .square (resampled_waveform - librosa_resampled_waveform ).mean ().item ()
319
- print ("torchaudio and librosa kaiser fast MSE:" , mse )
320
-
321
288
######################################################################
322
289
# Performance Benchmarking
323
290
# ------------------------
324
291
#
325
292
# Below are benchmarks for downsampling and upsampling waveforms between
326
293
# two pairs of sampling rates. We demonstrate the performance implications
327
294
# that the ``lowpass_filter_width``, window type, and sample rates can
328
- # have. Additionally, we provide a comparison against ``librosa``\ ’s
329
- # ``kaiser_best`` and ``kaiser_fast`` using their corresponding parameters
330
- # in ``torchaudio``.
331
- #
295
+ # have.
332
296
333
297
print (f"torchaudio: { torchaudio .__version__ } " )
334
- print (f"librosa: { librosa .__version__ } " )
335
- print (f"resampy: { resampy .__version__ } " )
336
298
337
299
######################################################################
338
300
#
@@ -413,37 +375,6 @@ def benchmark_resample_transforms(
413
375
#
414
376
415
377
416
- def benchmark_resample_librosa (
417
- waveform ,
418
- sample_rate ,
419
- resample_rate ,
420
- res_type = None ,
421
- iters = 5 ,
422
- ):
423
- waveform_np = waveform .squeeze ().numpy ()
424
- return (
425
- timeit .timeit (
426
- stmt = """
427
- librosa.resample(
428
- waveform_np,
429
- orig_sr=sample_rate,
430
- target_sr=resample_rate,
431
- res_type=res_type,
432
- )
433
- """ ,
434
- setup = "import librosa" ,
435
- number = iters ,
436
- globals = locals (),
437
- )
438
- * 1000
439
- / iters
440
- )
441
-
442
-
443
- ######################################################################
444
- #
445
-
446
-
447
378
def benchmark (sample_rate , resample_rate ):
448
379
times , rows = [], []
449
380
waveform = get_sine_sweep (sample_rate ).to (torch .float32 )
@@ -453,13 +384,13 @@ def benchmark(sample_rate, resample_rate):
453
384
# sinc 64 zero-crossings
454
385
f_time = benchmark_resample_functional (* args , lowpass_filter_width = 64 )
455
386
t_time = benchmark_resample_transforms (* args , lowpass_filter_width = 64 )
456
- times .append ([None , f_time , t_time ])
387
+ times .append ([f_time , t_time ])
457
388
rows .append ("sinc (width 64)" )
458
389
459
390
# sinc 6 zero-crossings
460
391
f_time = benchmark_resample_functional (* args , lowpass_filter_width = 16 )
461
392
t_time = benchmark_resample_transforms (* args , lowpass_filter_width = 16 )
462
- times .append ([None , f_time , t_time ])
393
+ times .append ([f_time , t_time ])
463
394
rows .append ("sinc (width 16)" )
464
395
465
396
# kaiser best
@@ -469,10 +400,9 @@ def benchmark(sample_rate, resample_rate):
469
400
"resampling_method" : "sinc_interp_kaiser" ,
470
401
"beta" : 14.769656459379492 ,
471
402
}
472
- lib_time = benchmark_resample_librosa (* args , res_type = "kaiser_best" )
473
403
f_time = benchmark_resample_functional (* args , ** kwargs )
474
404
t_time = benchmark_resample_transforms (* args , ** kwargs )
475
- times .append ([lib_time , f_time , t_time ])
405
+ times .append ([f_time , t_time ])
476
406
rows .append ("kaiser_best" )
477
407
478
408
# kaiser fast
@@ -482,26 +412,28 @@ def benchmark(sample_rate, resample_rate):
482
412
"resampling_method" : "sinc_interp_kaiser" ,
483
413
"beta" : 8.555504641634386 ,
484
414
}
485
- lib_time = benchmark_resample_librosa (* args , res_type = "kaiser_fast" )
486
415
f_time = benchmark_resample_functional (* args , ** kwargs )
487
416
t_time = benchmark_resample_transforms (* args , ** kwargs )
488
- times .append ([lib_time , f_time , t_time ])
417
+ times .append ([f_time , t_time ])
489
418
rows .append ("kaiser_fast" )
490
-
491
- df = pd .DataFrame (times , columns = ["librosa" , "functional" , "transforms" ], index = rows )
492
- return df
419
+ return (np .array (times ), ["functional" , "transforms" ], rows )
493
420
494
421
495
422
######################################################################
496
423
#
497
- def plot (df ):
498
- print (df .round (2 ))
499
- ax = df .plot (kind = "bar" )
424
+
425
+ def plot (data , cols , rows ):
426
+ fig , ax = plt .subplots ()
427
+ x_data = np .arange (len (rows ))
428
+ bar_width = 0.8 / len (cols )
429
+ for (i , (c , d )) in enumerate (zip (cols , data .T )):
430
+ x_pos = x_data + (i - len (cols )/ 2 + 0.5 ) * bar_width
431
+ ax .bar (x_pos , d , bar_width , label = c )
432
+ ax .legend ()
433
+ ax .set_xticks (x_data )
434
+ ax .set_xticklabels (rows )
500
435
plt .ylabel ("Time Elapsed [ms]" )
501
- plt .xticks (rotation = 0 , fontsize = 10 )
502
- for cont , col , color in zip (ax .containers , df .columns , mcolors .TABLEAU_COLORS ):
503
- label = ["N/A" if v != v else str (v ) for v in df [col ].round (2 )]
504
- ax .bar_label (cont , labels = label , color = color , fontweight = "bold" , fontsize = "x-small" )
436
+ return ax
505
437
506
438
507
439
######################################################################
@@ -510,31 +442,31 @@ def plot(df):
510
442
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~
511
443
512
444
df = benchmark (48_000 , 44_100 )
513
- plot (df )
445
+ plot (* df )
514
446
515
447
######################################################################
516
448
#
517
449
# Downsample (16 -> 8 kHz)
518
450
# ~~~~~~~~~~~~~~~~~~~~~~~~
519
451
520
452
df = benchmark (16_000 , 8_000 )
521
- plot (df )
453
+ plot (* df )
522
454
523
455
######################################################################
524
456
#
525
457
# Upsample (44.1 -> 48 kHz)
526
458
# ~~~~~~~~~~~~~~~~~~~~~~~~~
527
459
528
460
df = benchmark (44_100 , 48_000 )
529
- plot (df )
461
+ plot (* df )
530
462
531
463
######################################################################
532
464
#
533
465
# Upsample (8 -> 16 kHz)
534
466
# ~~~~~~~~~~~~~~~~~~~~~~
535
467
536
468
df = benchmark (8_000 , 16_000 )
537
- plot (df )
469
+ plot (* df )
538
470
539
471
######################################################################
540
472
#
0 commit comments