Skip to content
36 changes: 35 additions & 1 deletion tests/integration_tests/test_audio.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import base64
import json
import logging
import logging.config
from pathlib import Path
Expand All @@ -14,7 +16,8 @@ def test_audio_speech(logging_conf):
response = client.audio.speech(
model='cogtts',
input='你好呀,欢迎来到智谱开放平台',
voice='female',
voice='tongtong',
stream=False,
response_format='wav',
)
response.stream_to_file(speech_file_path)
Expand All @@ -26,6 +29,37 @@ def test_audio_speech(logging_conf):
except zhipuai.core._errors.APIStatusError as err:
print(err)

def test_audio_speech_streaming(logging_conf):
logging.config.dictConfig(logging_conf) # type: ignore
client = ZhipuAI() # 填写您自己的APIKey
try:
response = client.audio.speech(
model='cogtts',
input='你好呀,欢迎来到智谱开放平台',
voice='tongtong',
stream=True,
response_format='wav',
)
with open("output.pcm", "wb") as f:
for item in response:
choice = item.choices[0]
index = choice.index
finish_reason = choice.finish_reason
audio_delta = choice.delta.content
if finish_reason is not None:
break
f.write(base64.b64decode(audio_delta))
print(f"{index}.finish_reason = {finish_reason}, audio_delta = {len(audio_delta)}")

except zhipuai.core._errors.APIRequestFailedError as err:
print(err)
except zhipuai.core._errors.APIInternalError as err:
print(err)
except zhipuai.core._errors.APIStatusError as err:
print(err)
except Exception as e:
print(e)


def test_audio_customization(logging_conf):
logging.config.dictConfig(logging_conf)
Expand Down
13 changes: 9 additions & 4 deletions zhipuai/api_resource/audio/audio.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from zhipuai.types.audio import AudioSpeechParams
from ...types.audio import audio_customization_param

from zhipuai.core import BaseAPI, maybe_transform
from zhipuai.core import BaseAPI, maybe_transform, StreamResponse
from zhipuai.core import NOT_GIVEN, Body, Headers, NotGiven, FileTypes
from zhipuai.core import _legacy_response

Expand All @@ -20,6 +20,7 @@
make_request_options,
)
from zhipuai.core import deepcopy_minimal
from ...types.audio.audio_speech_chunk import AudioSpeechChunk

if TYPE_CHECKING:
from zhipuai._client import ZhipuAI
Expand All @@ -46,15 +47,17 @@ def speech(
sensitive_word_check: Optional[SensitiveWordCheckRequest] | NotGiven = NOT_GIVEN,
request_id: str = None,
user_id: str = None,
stream: bool = False,
extra_headers: Headers | None = None,
extra_body: Body | None = None,
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
) -> _legacy_response.HttpxBinaryResponseContent:
) -> _legacy_response.HttpxBinaryResponseContent | StreamResponse[AudioSpeechChunk]:
body = deepcopy_minimal(
{
"model": model,
"input": input,
"voice": voice,
"stream": stream,
"response_format": response_format,
"sensitive_word_check": sensitive_word_check,
"request_id": request_id,
Expand All @@ -63,11 +66,13 @@ def speech(
)
return self._post(
"/audio/speech",
body=maybe_transform(body, AudioSpeechParams),
body=body,
options=make_request_options(
extra_headers=extra_headers, extra_body=extra_body, timeout=timeout
),
cast_type=_legacy_response.HttpxBinaryResponseContent
cast_type=_legacy_response.HttpxBinaryResponseContent,
stream= stream or False,
stream_cls=StreamResponse[AudioSpeechChunk]
)

def customization(
Expand Down
32 changes: 32 additions & 0 deletions zhipuai/types/audio/audio_speech_chunk.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
from typing import List, Optional, Dict, Any

from ...core import BaseModel

__all__ = [
"AudioSpeechChunk",
"AudioError",
"AudioSpeechChoice",
"AudioSpeechDelta"
]


class AudioSpeechDelta(BaseModel):
content: Optional[str] = None
role: Optional[str] = None


class AudioSpeechChoice(BaseModel):
delta: AudioSpeechDelta
finish_reason: Optional[str] = None
index: int

class AudioError:
code: Optional[str] = None
message: Optional[str] = None


class AudioSpeechChunk(BaseModel):
choices: List[AudioSpeechChoice]
request_id: Optional[str] = None
created: Optional[int] = None
error: Optional[AudioError] = None