diff --git a/tests/integration_tests/test_audio.py b/tests/integration_tests/test_audio.py index e4891b5..2ef05d6 100644 --- a/tests/integration_tests/test_audio.py +++ b/tests/integration_tests/test_audio.py @@ -1,3 +1,5 @@ +import base64 +import json import logging import logging.config from pathlib import Path @@ -14,7 +16,8 @@ def test_audio_speech(logging_conf): response = client.audio.speech( model='cogtts', input='你好呀,欢迎来到智谱开放平台', - voice='female', + voice='tongtong', + stream=False, response_format='wav', ) response.stream_to_file(speech_file_path) @@ -26,6 +29,37 @@ def test_audio_speech(logging_conf): except zhipuai.core._errors.APIStatusError as err: print(err) +def test_audio_speech_streaming(logging_conf): + logging.config.dictConfig(logging_conf) # type: ignore + client = ZhipuAI() # 填写您自己的APIKey + try: + response = client.audio.speech( + model='cogtts', + input='你好呀,欢迎来到智谱开放平台', + voice='tongtong', + stream=True, + response_format='wav', + ) + with open("output.pcm", "wb") as f: + for item in response: + choice = item.choices[0] + index = choice.index + finish_reason = choice.finish_reason + audio_delta = choice.delta.content + if finish_reason is not None: + break + f.write(base64.b64decode(audio_delta)) + print(f"{index}.finish_reason = {finish_reason}, audio_delta = {len(audio_delta)}") + + except zhipuai.core._errors.APIRequestFailedError as err: + print(err) + except zhipuai.core._errors.APIInternalError as err: + print(err) + except zhipuai.core._errors.APIStatusError as err: + print(err) + except Exception as e: + print(e) + def test_audio_customization(logging_conf): logging.config.dictConfig(logging_conf) diff --git a/zhipuai/api_resource/audio/audio.py b/zhipuai/api_resource/audio/audio.py index aa987cb..d916048 100644 --- a/zhipuai/api_resource/audio/audio.py +++ b/zhipuai/api_resource/audio/audio.py @@ -9,7 +9,7 @@ from zhipuai.types.audio import AudioSpeechParams from ...types.audio import audio_customization_param -from zhipuai.core import BaseAPI, maybe_transform +from zhipuai.core import BaseAPI, maybe_transform, StreamResponse from zhipuai.core import NOT_GIVEN, Body, Headers, NotGiven, FileTypes from zhipuai.core import _legacy_response @@ -20,6 +20,7 @@ make_request_options, ) from zhipuai.core import deepcopy_minimal +from ...types.audio.audio_speech_chunk import AudioSpeechChunk if TYPE_CHECKING: from zhipuai._client import ZhipuAI @@ -46,15 +47,17 @@ def speech( sensitive_word_check: Optional[SensitiveWordCheckRequest] | NotGiven = NOT_GIVEN, request_id: str = None, user_id: str = None, + stream: bool = False, extra_headers: Headers | None = None, extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, - ) -> _legacy_response.HttpxBinaryResponseContent: + ) -> _legacy_response.HttpxBinaryResponseContent | StreamResponse[AudioSpeechChunk]: body = deepcopy_minimal( { "model": model, "input": input, "voice": voice, + "stream": stream, "response_format": response_format, "sensitive_word_check": sensitive_word_check, "request_id": request_id, @@ -63,11 +66,13 @@ def speech( ) return self._post( "/audio/speech", - body=maybe_transform(body, AudioSpeechParams), + body=body, options=make_request_options( extra_headers=extra_headers, extra_body=extra_body, timeout=timeout ), - cast_type=_legacy_response.HttpxBinaryResponseContent + cast_type=_legacy_response.HttpxBinaryResponseContent, + stream= stream or False, + stream_cls=StreamResponse[AudioSpeechChunk] ) def customization( diff --git a/zhipuai/types/audio/audio_speech_chunk.py b/zhipuai/types/audio/audio_speech_chunk.py new file mode 100644 index 0000000..7788d9d --- /dev/null +++ b/zhipuai/types/audio/audio_speech_chunk.py @@ -0,0 +1,32 @@ +from typing import List, Optional, Dict, Any + +from ...core import BaseModel + +__all__ = [ + "AudioSpeechChunk", + "AudioError", + "AudioSpeechChoice", + "AudioSpeechDelta" +] + + +class AudioSpeechDelta(BaseModel): + content: Optional[str] = None + role: Optional[str] = None + + +class AudioSpeechChoice(BaseModel): + delta: AudioSpeechDelta + finish_reason: Optional[str] = None + index: int + +class AudioError: + code: Optional[str] = None + message: Optional[str] = None + + +class AudioSpeechChunk(BaseModel): + choices: List[AudioSpeechChoice] + request_id: Optional[str] = None + created: Optional[int] = None + error: Optional[AudioError] = None \ No newline at end of file