Skip to content

Commit 5aeb925

Browse files
authored
Multimodal - audio tests (#25285)
Signed-off-by: Debolina Roy <[email protected]>
1 parent 04d3752 commit 5aeb925

File tree

1 file changed

+140
-0
lines changed

1 file changed

+140
-0
lines changed

tests/multimodal/test_audio.py

Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
# SPDX-License-Identifier: Apache-2.0
2+
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
3+
# test_audio.py
4+
import base64
5+
from pathlib import Path
6+
from unittest.mock import patch
7+
8+
import numpy as np
9+
import pytest
10+
11+
from vllm.multimodal.audio import (AudioMediaIO, AudioResampler,
12+
resample_audio_librosa,
13+
resample_audio_scipy)
14+
15+
16+
@pytest.fixture
17+
def dummy_audio():
18+
return np.array([0.0, 0.1, 0.2, 0.3, 0.4], dtype=float)
19+
20+
21+
def test_resample_audio_librosa(dummy_audio):
22+
with patch("vllm.multimodal.audio.librosa.resample") as mock_resample:
23+
mock_resample.return_value = dummy_audio * 2
24+
out = resample_audio_librosa(dummy_audio,
25+
orig_sr=44100,
26+
target_sr=22050)
27+
mock_resample.assert_called_once_with(dummy_audio,
28+
orig_sr=44100,
29+
target_sr=22050)
30+
assert np.all(out == dummy_audio * 2)
31+
32+
33+
def test_resample_audio_scipy(dummy_audio):
34+
out_down = resample_audio_scipy(dummy_audio, orig_sr=4, target_sr=2)
35+
out_up = resample_audio_scipy(dummy_audio, orig_sr=2, target_sr=4)
36+
out_same = resample_audio_scipy(dummy_audio, orig_sr=4, target_sr=4)
37+
38+
assert len(out_down) == 3
39+
assert len(out_up) == 10
40+
assert np.all(out_same == dummy_audio)
41+
42+
43+
@pytest.mark.xfail(
44+
reason="resample_audio_scipy is buggy for non-integer ratios")
45+
def test_resample_audio_scipy_non_integer_ratio(dummy_audio):
46+
out = resample_audio_scipy(dummy_audio, orig_sr=5, target_sr=3)
47+
48+
expected_len = int(round(len(dummy_audio) * 3 / 5))
49+
assert len(out) == expected_len
50+
51+
assert isinstance(out, np.ndarray)
52+
assert np.isfinite(out).all()
53+
54+
55+
def test_audio_resampler_librosa_calls_resample(dummy_audio):
56+
resampler = AudioResampler(target_sr=22050, method="librosa")
57+
with patch(
58+
"vllm.multimodal.audio.resample_audio_librosa") as mock_resample:
59+
mock_resample.return_value = dummy_audio
60+
out = resampler.resample(dummy_audio, orig_sr=44100)
61+
mock_resample.assert_called_once_with(dummy_audio,
62+
orig_sr=44100,
63+
target_sr=22050)
64+
assert np.all(out == dummy_audio)
65+
66+
67+
def test_audio_resampler_scipy_calls_resample(dummy_audio):
68+
resampler = AudioResampler(target_sr=22050, method="scipy")
69+
with patch("vllm.multimodal.audio.resample_audio_scipy") as mock_resample:
70+
mock_resample.return_value = dummy_audio
71+
out = resampler.resample(dummy_audio, orig_sr=44100)
72+
mock_resample.assert_called_once_with(dummy_audio,
73+
orig_sr=44100,
74+
target_sr=22050)
75+
assert np.all(out == dummy_audio)
76+
77+
78+
def test_audio_resampler_invalid_method(dummy_audio):
79+
resampler = AudioResampler(target_sr=22050, method="invalid")
80+
with pytest.raises(ValueError):
81+
resampler.resample(dummy_audio, orig_sr=44100)
82+
83+
84+
def test_audio_resampler_no_target_sr(dummy_audio):
85+
resampler = AudioResampler(target_sr=None)
86+
with pytest.raises(RuntimeError):
87+
resampler.resample(dummy_audio, orig_sr=44100)
88+
89+
90+
@pytest.fixture
91+
def dummy_audio_bytes():
92+
return b"FAKEAUDIOBYTES"
93+
94+
95+
def test_audio_media_io_load_bytes(dummy_audio_bytes):
96+
audio_io = AudioMediaIO()
97+
with patch("vllm.multimodal.audio.librosa.load") as mock_load:
98+
mock_load.return_value = (np.array([0.1, 0.2]), 16000)
99+
out = audio_io.load_bytes(dummy_audio_bytes)
100+
mock_load.assert_called_once()
101+
assert isinstance(out[0], np.ndarray)
102+
assert out[1] == 16000
103+
104+
105+
def test_audio_media_io_load_base64(dummy_audio_bytes):
106+
audio_io = AudioMediaIO()
107+
encoded = base64.b64encode(dummy_audio_bytes).decode("utf-8")
108+
with patch.object(AudioMediaIO, "load_bytes") as mock_load_bytes:
109+
mock_load_bytes.return_value = (np.array([0.1, 0.2]), 16000)
110+
out = audio_io.load_base64("audio/wav", encoded)
111+
mock_load_bytes.assert_called_once()
112+
assert isinstance(out[0], np.ndarray)
113+
assert out[1] == 16000
114+
115+
116+
def test_audio_media_io_load_file():
117+
audio_io = AudioMediaIO()
118+
path = Path("/fake/path.wav")
119+
with patch("vllm.multimodal.audio.librosa.load") as mock_load:
120+
mock_load.return_value = (np.array([0.1, 0.2]), 16000)
121+
out = audio_io.load_file(path)
122+
mock_load.assert_called_once_with(path, sr=None)
123+
assert isinstance(out[0], np.ndarray)
124+
assert out[1] == 16000
125+
126+
127+
def test_audio_media_io_encode_base64(dummy_audio):
128+
audio_io = AudioMediaIO()
129+
media = (dummy_audio, 16000)
130+
with patch("vllm.multimodal.audio.soundfile.write") as mock_write:
131+
132+
def write_to_buffer(buffer, *_args, **_kwargs):
133+
buffer.write(b"dummy_wav_data")
134+
135+
mock_write.side_effect = write_to_buffer
136+
137+
out = audio_io.encode_base64(media)
138+
decoded = base64.b64decode(out)
139+
assert decoded == b"dummy_wav_data"
140+
mock_write.assert_called_once()

0 commit comments

Comments
 (0)