Skip to content

Commit fc77045

Browse files
add text-to-speech tests (#33791)
* add tts tests * updates for aoai * update targeted api version
1 parent 09d6ce8 commit fc77045

File tree

3 files changed

+166
-13
lines changed

3 files changed

+166
-13
lines changed

sdk/openai/azure-openai/tests/conftest.py

Lines changed: 21 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -25,23 +25,27 @@
2525
ENV_OPENAI_TEST_MODE = "OPENAI_TEST_MODE"
2626

2727
# for pytest.parametrize
28-
ALL = ["azure", "azuread", "openai"]
2928
AZURE = "azure"
3029
OPENAI = "openai"
3130
AZURE_AD = "azuread"
31+
ALL = [AZURE, AZURE_AD, OPENAI]
3232
WHISPER_AZURE = "whisper_azure"
3333
WHISPER_AZURE_AD = "whisper_azuread"
34-
WHISPER_ALL = ["whisper_azure", "whisper_azuread", "openai"]
34+
WHISPER_ALL = [WHISPER_AZURE, WHISPER_AZURE_AD, OPENAI]
35+
TTS_OPENAI = "tts_openai"
36+
TTS_AZURE = "tts_azure"
37+
TTS_AZURE_AD = "tts_azuread"
38+
TTS_ALL = [TTS_AZURE, TTS_AZURE_AD, TTS_OPENAI]
3539
DALLE_AZURE = "dalle_azure"
3640
DALLE_AZURE_AD = "dalle_azuread"
37-
DALLE_ALL = ["dalle_azure", "dalle_azuread", "openai"]
41+
DALLE_ALL = [DALLE_AZURE, DALLE_AZURE_AD, OPENAI]
3842
GPT_4_AZURE = "gpt_4_azure"
3943
GPT_4_AZURE_AD = "gpt_4_azuread"
4044
GPT_4_OPENAI = "gpt_4_openai"
41-
GPT_4_ALL = ["gpt_4_azure", "gpt_4_azuread", "gpt_4_openai"]
45+
GPT_4_ALL = [GPT_4_AZURE, GPT_4_AZURE_AD, GPT_4_OPENAI]
4246
ASST_AZURE = "asst_azure"
4347
ASST_AZUREAD = "asst_azuread"
44-
ASST_ALL = ["asst_azure", "asst_azuread", "gpt_4_openai"]
48+
ASST_ALL = [ASST_AZURE, ASST_AZUREAD, GPT_4_OPENAI]
4549

4650
# Environment variable keys
4751
ENV_AZURE_OPENAI_ENDPOINT = "AZ_OPENAI_ENDPOINT"
@@ -65,6 +69,7 @@
6569
ENV_AZURE_OPENAI_AUDIO_NAME = "whisper"
6670
ENV_AZURE_OPENAI_DALLE_NAME = "dall-e-3"
6771
ENV_AZURE_OPENAI_CHAT_COMPLETIONS_GPT4_NAME = "gpt-4-1106-preview"
72+
ENV_AZURE_OPENAI_TTS_NAME = "tts"
6873

6974
ENV_OPENAI_KEY = "OPENAI_KEY"
7075
ENV_OPENAI_COMPLETIONS_MODEL = "gpt-3.5-turbo-instruct"
@@ -73,6 +78,7 @@
7378
ENV_OPENAI_AUDIO_MODEL = "whisper-1"
7479
ENV_OPENAI_DALLE_MODEL = "dall-e-3"
7580
ENV_OPENAI_CHAT_COMPLETIONS_GPT4_MODEL = "gpt-4-1106-preview"
81+
ENV_OPENAI_TTS_MODEL = "tts-1"
7682

7783
# Fake values
7884
TEST_ENDPOINT = "https://test-resource.openai.azure.com/"
@@ -140,17 +146,17 @@ def client(api_type):
140146
azure_ad_token_provider=get_bearer_token_provider(DefaultAzureCredential(), "https://cognitiveservices.azure.com/.default"),
141147
api_version=ENV_AZURE_OPENAI_API_VERSION,
142148
)
143-
elif api_type == "openai" or api_type == "gpt_4_openai":
149+
elif api_type in ["openai", "gpt_4_openai", "tts_openai"]:
144150
client = openai.OpenAI(
145151
api_key=os.getenv(ENV_OPENAI_KEY)
146152
)
147-
elif api_type == "whisper_azure":
153+
elif api_type in ["whisper_azure", "tts_azure"]:
148154
client = openai.AzureOpenAI(
149155
azure_endpoint=os.getenv(ENV_AZURE_OPENAI_NORTHCENTRALUS_ENDPOINT),
150156
api_key=os.getenv(ENV_AZURE_OPENAI_NORTHCENTRALUS_KEY),
151157
api_version=ENV_AZURE_OPENAI_API_VERSION,
152158
)
153-
elif api_type == "whisper_azuread":
159+
elif api_type in ["whisper_azuread", "tts_azuread"]:
154160
client = openai.AzureOpenAI(
155161
azure_endpoint=os.getenv(ENV_AZURE_OPENAI_NORTHCENTRALUS_ENDPOINT),
156162
azure_ad_token_provider=get_bearer_token_provider(DefaultAzureCredential(), "https://cognitiveservices.azure.com/.default"),
@@ -187,17 +193,17 @@ def client_async(api_type):
187193
azure_ad_token_provider=get_bearer_token_provider_async(AsyncDefaultAzureCredential(), "https://cognitiveservices.azure.com/.default"),
188194
api_version=ENV_AZURE_OPENAI_API_VERSION,
189195
)
190-
elif api_type == "openai" or api_type == "gpt_4_openai":
196+
elif api_type in ["openai", "gpt_4_openai", "tts_openai"]:
191197
client = openai.AsyncOpenAI(
192198
api_key=os.getenv(ENV_OPENAI_KEY)
193199
)
194-
elif api_type == "whisper_azure":
200+
elif api_type in ["whisper_azure", "tts_azure"]:
195201
client = openai.AsyncAzureOpenAI(
196202
azure_endpoint=os.getenv(ENV_AZURE_OPENAI_NORTHCENTRALUS_ENDPOINT),
197203
api_key=os.getenv(ENV_AZURE_OPENAI_NORTHCENTRALUS_KEY),
198204
api_version=ENV_AZURE_OPENAI_API_VERSION,
199205
)
200-
elif api_type == "whisper_azuread":
206+
elif api_type in ["whisper_azuread", "tts_azuread"]:
201207
client = openai.AsyncAzureOpenAI(
202208
azure_endpoint=os.getenv(ENV_AZURE_OPENAI_NORTHCENTRALUS_ENDPOINT),
203209
azure_ad_token_provider=get_bearer_token_provider_async(AsyncDefaultAzureCredential(), "https://cognitiveservices.azure.com/.default"),
@@ -225,6 +231,10 @@ def build_kwargs(args, api_type):
225231
return {"model": ENV_AZURE_OPENAI_AUDIO_NAME}
226232
elif api_type == "openai":
227233
return {"model": ENV_OPENAI_AUDIO_MODEL}
234+
elif api_type == "tts_openai":
235+
return {"model": ENV_OPENAI_TTS_MODEL}
236+
elif api_type in ["tts_azure", "tts_azuread"]:
237+
return {"model": ENV_AZURE_OPENAI_TTS_NAME}
228238
if test_feature.startswith("test_chat_completions") \
229239
or test_feature.startswith(("test_client", "test_models")):
230240
if api_type in ["azure", "azuread", "asst_azure"]:

sdk/openai/azure-openai/tests/v1_tests/test_audio.py

Lines changed: 71 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,10 @@
55

66
import os
77
import pytest
8+
import pathlib
9+
import uuid
810
from devtools_testutils import AzureRecordedTestCase
9-
from conftest import WHISPER_AZURE, OPENAI, WHISPER_ALL, configure
11+
from conftest import WHISPER_AZURE, OPENAI, WHISPER_ALL, configure, TTS_OPENAI, TTS_AZURE, TTS_AZURE_AD
1012

1113
audio_test_file = os.path.abspath(os.path.join(os.path.abspath(__file__), "..", "..", "./assets/hello.m4a"))
1214
audio_long_test_file = os.path.abspath(os.path.join(os.path.abspath(__file__), "..", "..", "./assets/wikipediaOcelot.wav"))
@@ -211,3 +213,71 @@ def test_translate_options(self, client, azure_openai_creds, api_type, **kwargs)
211213
**kwargs,
212214
)
213215
assert result.text == "Hello"
216+
217+
@configure
218+
@pytest.mark.parametrize("api_type", [TTS_OPENAI, TTS_AZURE, TTS_AZURE_AD])
219+
def test_tts(self, client, azure_openai_creds, api_type, **kwargs):
220+
221+
speech_file_path = pathlib.Path(__file__).parent / f"{uuid.uuid4()}.mp3"
222+
try:
223+
response = client.audio.speech.create(
224+
voice="alloy",
225+
input="The quick brown fox jumped over the lazy dog.",
226+
**kwargs,
227+
)
228+
assert response.encoding
229+
assert response.content
230+
assert response.text
231+
response.write_to_file(speech_file_path)
232+
finally:
233+
os.remove(speech_file_path)
234+
235+
@configure
236+
@pytest.mark.parametrize("api_type", [TTS_OPENAI, TTS_AZURE])
237+
def test_tts_hd_streaming(self, client, azure_openai_creds, api_type, **kwargs):
238+
239+
with client.audio.speech.with_streaming_response.create(
240+
voice="echo",
241+
input="The quick brown fox jumped over the lazy dog.",
242+
model="tts-1-hd"
243+
) as response:
244+
response.read()
245+
246+
247+
@configure
248+
@pytest.mark.parametrize("api_type", [TTS_OPENAI, TTS_AZURE])
249+
def test_tts_response_format(self, client, azure_openai_creds, api_type, **kwargs):
250+
251+
speech_file_path = pathlib.Path(__file__).parent / f"{uuid.uuid4()}.flac"
252+
try:
253+
response = client.audio.speech.create(
254+
voice="fable",
255+
input="The quick brown fox jumped over the lazy dog.",
256+
response_format="flac",
257+
**kwargs
258+
)
259+
assert response.encoding
260+
assert response.content
261+
assert response.text
262+
response.stream_to_file(speech_file_path) # deprecated
263+
finally:
264+
os.remove(speech_file_path)
265+
266+
@configure
267+
@pytest.mark.parametrize("api_type", [TTS_OPENAI, TTS_AZURE])
268+
def test_tts_speed(self, client, azure_openai_creds, api_type, **kwargs):
269+
270+
speech_file_path = pathlib.Path(__file__).parent / f"{uuid.uuid4()}.mp3"
271+
try:
272+
response = client.audio.speech.create(
273+
voice="onyx",
274+
input="The quick brown fox jumped over the lazy dog.",
275+
speed=3.0,
276+
**kwargs
277+
)
278+
assert response.encoding
279+
assert response.content
280+
assert response.text
281+
response.write_to_file(speech_file_path)
282+
finally:
283+
os.remove(speech_file_path)

sdk/openai/azure-openai/tests/v1_tests/test_audio_async.py

Lines changed: 74 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,10 @@
55

66
import os
77
import pytest
8+
import pathlib
9+
import uuid
810
from devtools_testutils import AzureRecordedTestCase
9-
from conftest import WHISPER_AZURE, OPENAI, WHISPER_ALL, configure_async
11+
from conftest import WHISPER_AZURE, OPENAI, WHISPER_ALL, configure_async, TTS_OPENAI, TTS_AZURE, TTS_AZURE_AD
1012

1113
audio_test_file = os.path.abspath(os.path.join(os.path.abspath(__file__), "..", "..", "./assets/hello.m4a"))
1214
audio_long_test_file = os.path.abspath(os.path.join(os.path.abspath(__file__), "..", "..", "./assets/wikipediaOcelot.wav"))
@@ -226,3 +228,74 @@ async def test_translate_options(self, client_async, azure_openai_creds, api_typ
226228
**kwargs,
227229
)
228230
assert result.text == "Hello"
231+
232+
@configure_async
233+
@pytest.mark.asyncio
234+
@pytest.mark.parametrize("api_type", [TTS_OPENAI, TTS_AZURE, TTS_AZURE_AD])
235+
async def test_tts(self, client_async, azure_openai_creds, api_type, **kwargs):
236+
237+
speech_file_path = pathlib.Path(__file__).parent / f"{uuid.uuid4()}.mp3"
238+
try:
239+
response = await client_async.audio.speech.create(
240+
voice="alloy",
241+
input="The quick brown fox jumped over the lazy dog.",
242+
**kwargs,
243+
)
244+
assert response.encoding
245+
assert response.content
246+
assert response.text
247+
response.write_to_file(speech_file_path)
248+
finally:
249+
os.remove(speech_file_path)
250+
251+
@configure_async
252+
@pytest.mark.asyncio
253+
@pytest.mark.parametrize("api_type", [TTS_OPENAI, TTS_AZURE])
254+
async def test_tts_hd(self, client_async, azure_openai_creds, api_type, **kwargs):
255+
256+
async with client_async.audio.speech.with_streaming_response.create(
257+
voice="echo",
258+
input="The quick brown fox jumped over the lazy dog.",
259+
model="tts-1-hd"
260+
) as response:
261+
await response.read()
262+
263+
@configure_async
264+
@pytest.mark.asyncio
265+
@pytest.mark.parametrize("api_type", [TTS_OPENAI, TTS_AZURE])
266+
async def test_tts_response_format(self, client_async, azure_openai_creds, api_type, **kwargs):
267+
268+
speech_file_path = pathlib.Path(__file__).parent / f"{uuid.uuid4()}.flac"
269+
try:
270+
response = await client_async.audio.speech.create(
271+
voice="fable",
272+
input="The quick brown fox jumped over the lazy dog.",
273+
response_format="flac",
274+
**kwargs
275+
)
276+
assert response.encoding
277+
assert response.content
278+
assert response.text
279+
await response.astream_to_file(speech_file_path) # deprecated
280+
finally:
281+
os.remove(speech_file_path)
282+
283+
@configure_async
284+
@pytest.mark.asyncio
285+
@pytest.mark.parametrize("api_type", [TTS_OPENAI, TTS_AZURE])
286+
async def test_tts_speed(self, client_async, azure_openai_creds, api_type, **kwargs):
287+
288+
speech_file_path = pathlib.Path(__file__).parent / f"{uuid.uuid4()}.mp3"
289+
try:
290+
response = await client_async.audio.speech.create(
291+
voice="onyx",
292+
input="The quick brown fox jumped over the lazy dog.",
293+
speed=3.0,
294+
**kwargs
295+
)
296+
assert response.encoding
297+
assert response.content
298+
assert response.text
299+
response.write_to_file(speech_file_path)
300+
finally:
301+
os.remove(speech_file_path)

0 commit comments

Comments
 (0)