Skip to content

Commit 0d314d2

Browse files
iceAndFireisFailedyuhongxiao赵嘉琦tomsun28
authored
feat: tts streaming (#90)
* 接口变更 * 新增音频合成流式返回 * 增加流失音频合成功能 * feat:豆神一期,添加音色,查询音色列表 * feat:APIKEY * feat:APIKEY * Revert "feat:APIKEY" This reverts commit 5775bc5. * Revert "feat:APIKEY" This reverts commit 616b652. * Revert "feat:豆神一期,添加音色,查询音色列表" This reverts commit 7d79be9. * 优化 * 优化测试代码 * 参数优化 --------- Co-authored-by: yuhongxiao <[email protected]> Co-authored-by: 赵嘉琦 <[email protected]> Co-authored-by: tomsun28 <[email protected]>
1 parent 042eae1 commit 0d314d2

File tree

3 files changed

+76
-5
lines changed

3 files changed

+76
-5
lines changed

tests/integration_tests/test_audio.py

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
import base64
2+
import json
13
import logging
24
import logging.config
35
from pathlib import Path
@@ -14,7 +16,8 @@ def test_audio_speech(logging_conf):
1416
response = client.audio.speech(
1517
model='cogtts',
1618
input='你好呀,欢迎来到智谱开放平台',
17-
voice='female',
19+
voice='tongtong',
20+
stream=False,
1821
response_format='wav',
1922
)
2023
response.stream_to_file(speech_file_path)
@@ -26,6 +29,37 @@ def test_audio_speech(logging_conf):
2629
except zhipuai.core._errors.APIStatusError as err:
2730
print(err)
2831

32+
def test_audio_speech_streaming(logging_conf):
33+
logging.config.dictConfig(logging_conf) # type: ignore
34+
client = ZhipuAI() # 填写您自己的APIKey
35+
try:
36+
response = client.audio.speech(
37+
model='cogtts',
38+
input='你好呀,欢迎来到智谱开放平台',
39+
voice='tongtong',
40+
stream=True,
41+
response_format='wav',
42+
)
43+
with open("output.pcm", "wb") as f:
44+
for item in response:
45+
choice = item.choices[0]
46+
index = choice.index
47+
finish_reason = choice.finish_reason
48+
audio_delta = choice.delta.content
49+
if finish_reason is not None:
50+
break
51+
f.write(base64.b64decode(audio_delta))
52+
print(f"{index}.finish_reason = {finish_reason}, audio_delta = {len(audio_delta)}")
53+
54+
except zhipuai.core._errors.APIRequestFailedError as err:
55+
print(err)
56+
except zhipuai.core._errors.APIInternalError as err:
57+
print(err)
58+
except zhipuai.core._errors.APIStatusError as err:
59+
print(err)
60+
except Exception as e:
61+
print(e)
62+
2963

3064
def test_audio_customization(logging_conf):
3165
logging.config.dictConfig(logging_conf)

zhipuai/api_resource/audio/audio.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
from zhipuai.types.audio import AudioSpeechParams
1010
from ...types.audio import audio_customization_param
1111

12-
from zhipuai.core import BaseAPI, maybe_transform
12+
from zhipuai.core import BaseAPI, maybe_transform, StreamResponse
1313
from zhipuai.core import NOT_GIVEN, Body, Headers, NotGiven, FileTypes
1414
from zhipuai.core import _legacy_response
1515

@@ -20,6 +20,7 @@
2020
make_request_options,
2121
)
2222
from zhipuai.core import deepcopy_minimal
23+
from ...types.audio.audio_speech_chunk import AudioSpeechChunk
2324

2425
if TYPE_CHECKING:
2526
from zhipuai._client import ZhipuAI
@@ -46,15 +47,17 @@ def speech(
4647
sensitive_word_check: Optional[SensitiveWordCheckRequest] | NotGiven = NOT_GIVEN,
4748
request_id: str = None,
4849
user_id: str = None,
50+
stream: bool = False,
4951
extra_headers: Headers | None = None,
5052
extra_body: Body | None = None,
5153
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
52-
) -> _legacy_response.HttpxBinaryResponseContent:
54+
) -> _legacy_response.HttpxBinaryResponseContent | StreamResponse[AudioSpeechChunk]:
5355
body = deepcopy_minimal(
5456
{
5557
"model": model,
5658
"input": input,
5759
"voice": voice,
60+
"stream": stream,
5861
"response_format": response_format,
5962
"sensitive_word_check": sensitive_word_check,
6063
"request_id": request_id,
@@ -63,11 +66,13 @@ def speech(
6366
)
6467
return self._post(
6568
"/audio/speech",
66-
body=maybe_transform(body, AudioSpeechParams),
69+
body=body,
6770
options=make_request_options(
6871
extra_headers=extra_headers, extra_body=extra_body, timeout=timeout
6972
),
70-
cast_type=_legacy_response.HttpxBinaryResponseContent
73+
cast_type=_legacy_response.HttpxBinaryResponseContent,
74+
stream= stream or False,
75+
stream_cls=StreamResponse[AudioSpeechChunk]
7176
)
7277

7378
def customization(
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
from typing import List, Optional, Dict, Any
2+
3+
from ...core import BaseModel
4+
5+
__all__ = [
6+
"AudioSpeechChunk",
7+
"AudioError",
8+
"AudioSpeechChoice",
9+
"AudioSpeechDelta"
10+
]
11+
12+
13+
class AudioSpeechDelta(BaseModel):
14+
content: Optional[str] = None
15+
role: Optional[str] = None
16+
17+
18+
class AudioSpeechChoice(BaseModel):
19+
delta: AudioSpeechDelta
20+
finish_reason: Optional[str] = None
21+
index: int
22+
23+
class AudioError:
24+
code: Optional[str] = None
25+
message: Optional[str] = None
26+
27+
28+
class AudioSpeechChunk(BaseModel):
29+
choices: List[AudioSpeechChoice]
30+
request_id: Optional[str] = None
31+
created: Optional[int] = None
32+
error: Optional[AudioError] = None

0 commit comments

Comments
 (0)