新增音频合成流式返回

yuhongxiao · yuhongxiao · commit 538fe6e58abe · 2025-07-14T18:32:18.000+08:00
diff --git a/pyproject.toml b/pyproject.toml
@@ -84,15 +84,15 @@ build-backend = "poetry.core.masonry.api"
 #
 # https://github.com/tophat/syrupy
 # --snapshot-warn-unused    Prints a warning on unused snapshots rather than fail the test suite.
-addopts = "--strict-markers --strict-config --durations=5 --snapshot-warn-unused -svv"
+addopts = "--strict-markers --strict-config --durations=5 -svv"
 # Registering custom markers.
 # https://docs.pytest.org/en/7.1.x/example/markers.html#registering-markers
 markers = [
     "requires: mark tests as requiring a specific library",
     "scheduled: mark tests to run in scheduled testing",
     "compile: mark placeholder test used to compile integration tests without running them"
 ]
-asyncio_mode = "auto"
+# asyncio_mode = "auto"
 
 
 # https://python-poetry.org/docs/repositories/
diff --git a/tests/integration_tests/test_audio.py b/tests/integration_tests/test_audio.py
@@ -14,7 +14,7 @@ def test_audio_speech(logging_conf):
 		response = client.audio.speech(
 			model='cogtts',
 			input='你好呀,欢迎来到智谱开放平台',
-			voice='female',
+			voice='tongtong',
 			stream=False,
 			response_format='wav',
 		)
@@ -27,6 +27,27 @@ def test_audio_speech(logging_conf):
 	except zhipuai.core._errors.APIStatusError as err:
 		print(err)
 
+def test_audio_speech_streaming(logging_conf):
+	logging.config.dictConfig(logging_conf)  # type: ignore
+	client = ZhipuAI()  # 填写您自己的APIKey
+	try:
+		response = client.audio.speech(
+			model='cogtts',
+			input='你好呀,欢迎来到智谱开放平台',
+			voice='tongtong',
+			stream=True,
+			response_format='wav',
+		)
+		for item in response:
+			print(item)
+
+	except zhipuai.core._errors.APIRequestFailedError as err:
+		print(err)
+	except zhipuai.core._errors.APIInternalError as err:
+		print(err)
+	except zhipuai.core._errors.APIStatusError as err:
+		print(err)
+
 
 def test_audio_customization(logging_conf):
 	logging.config.dictConfig(logging_conf)
diff --git a/zhipuai/api_resource/audio/audio.py b/zhipuai/api_resource/audio/audio.py
@@ -9,7 +9,7 @@
 from zhipuai.types.audio import AudioSpeechParams
 from ...types.audio import audio_customization_param
 
-from zhipuai.core import BaseAPI, maybe_transform
+from zhipuai.core import BaseAPI, maybe_transform, StreamResponse
 from zhipuai.core import NOT_GIVEN, Body, Headers, NotGiven, FileTypes
 from zhipuai.core import _legacy_response
 
@@ -20,6 +20,7 @@
     make_request_options,
 )
 from zhipuai.core import deepcopy_minimal
+from ...types.audio.audio_speech_chunk import AudioSpeechChunk
 
 if TYPE_CHECKING:
     from zhipuai._client import ZhipuAI
@@ -50,7 +51,7 @@ def speech(
             extra_headers: Headers | None = None,
             extra_body: Body | None = None,
             timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> _legacy_response.HttpxBinaryResponseContent:
+    ) -> _legacy_response.HttpxBinaryResponseContent | StreamResponse[AudioSpeechChunk]:
         body = deepcopy_minimal(
             {
                 "model": model,
@@ -65,11 +66,13 @@ def speech(
         )
         return self._post(
             "/audio/speech",
-            body=maybe_transform(body, AudioSpeechParams),
+            body=body,
             options=make_request_options(
                 extra_headers=extra_headers, extra_body=extra_body, timeout=timeout
             ),
-            cast_type=_legacy_response.HttpxBinaryResponseContent
+            cast_type=_legacy_response.HttpxBinaryResponseContent,
+            stream= stream or False,
+            stream_cls=StreamResponse[AudioSpeechChunk]
         )
 
     def customization(
diff --git a/zhipuai/types/audio/audio_speech_chunk.py b/zhipuai/types/audio/audio_speech_chunk.py
@@ -0,0 +1,32 @@
+from typing import List, Optional, Dict, Any
+
+from ...core import BaseModel
+
+__all__ = [
+    "AudioSpeechChunk",
+    "AudioError",
+    "AudioSpeechChoice",
+    "AudioSpeechDelta"
+]
+
+
+class AudioSpeechDelta(BaseModel):
+    content: Optional[str] = None
+    role: Optional[str] = None
+
+
+class AudioSpeechChoice(BaseModel):
+    delta: AudioSpeechDelta
+    finish_reason: Optional[str] = None
+    index: int
+
+class AudioError:
+    code: Optional[str] = None
+    message: Optional[str] = None
+
+
+class AudioSpeechChunk(BaseModel):
+    choices: List[AudioSpeechChoice]
+    request_id: Optional[str] = None
+    created: Optional[int] = None
+    error: Optional[AudioError] = None