Skip to content

Commit b6d279d

Browse files
author
jinhaiyang
committed
【tts】tts支持
1 parent 1af52ac commit b6d279d

File tree

9 files changed

+228
-2
lines changed

9 files changed

+228
-2
lines changed
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
from zhipuai import ZhipuAI
2+
import zhipuai
3+
4+
import logging
5+
import logging.config
6+
7+
def test_audio_speech(logging_conf):
8+
logging.config.dictConfig(logging_conf) # type: ignore
9+
client = ZhipuAI() # 填写您自己的APIKey
10+
try:
11+
response = client.audio.speech(
12+
model="cogtts",
13+
input="智谱ai,你好呀",
14+
voice="female",
15+
response_format="wav"
16+
)
17+
print(response)
18+
with open("output.wav", "wb") as f:
19+
f.write(response.content)
20+
print("文件已保存为 output.wav")
21+
22+
except zhipuai.core._errors.APIRequestFailedError as err:
23+
print(err)
24+
except zhipuai.core._errors.APIInternalError as err:
25+
print(err)
26+
except zhipuai.core._errors.APIStatusError as err:
27+
print(err)
28+
29+
def test_audio_customization(logging_conf):
30+
logging.config.dictConfig(logging_conf)
31+
client = ZhipuAI() # 填写您自己的APIKey
32+
with open('/Users/jhy/Desktop/tts/test_case_8s.wav', 'rb') as file:
33+
try:
34+
response = client.audio.customization(
35+
model="cogtts",
36+
input="智谱ai,你好呀",
37+
voice_text="这是一条测试用例",
38+
voice_data=file,
39+
response_format="wav"
40+
)
41+
print(response)
42+
with open("output.wav", "wb") as f:
43+
f.write(response.content)
44+
print("文件已保存为 output.wav")
45+
46+
except zhipuai.core._errors.APIRequestFailedError as err:
47+
print(err)
48+
except zhipuai.core._errors.APIInternalError as err:
49+
print(err)
50+
except zhipuai.core._errors.APIStatusError as err:
51+
print(err)

tests/integration_tests/test_videos.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ def test_videos(logging_conf):
1010
client = ZhipuAI() # 填写您自己的APIKey
1111
try:
1212
response = client.videos.generations(
13-
model="cogvideo",
13+
model="cogvideox",
1414
prompt="一个开船的人",
1515

1616
user_id="1212222"

zhipuai/_client.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ def __init__(
6262
self.tools = api_resource.Tools(self)
6363
self.videos = api_resource.Videos(self)
6464
self.assistant = api_resource.Assistant(self)
65+
self.audio = api_resource.Audio(self)
6566

6667
@property
6768
@override

zhipuai/api_resource/__init__.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,10 @@
3434
Assistant,
3535
)
3636

37+
from .audio import (
38+
Audio
39+
)
40+
3741
__all__ = [
3842
'Videos',
3943
'AsyncCompletions',
@@ -48,5 +52,5 @@
4852
'Knowledge',
4953
'Tools',
5054
'Assistant',
51-
55+
'Audio'
5256
]
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
from zhipuai.api_resource.audio.audio import Audio
2+
3+
__all__= [
4+
"Audio"
5+
]
Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
from __future__ import annotations
2+
3+
from typing import TYPE_CHECKING, List, Mapping, cast, Optional, Dict
4+
5+
from zhipuai.core._utils import extract_files
6+
7+
from zhipuai.types.sensitive_word_check import SensitiveWordCheckRequest
8+
from zhipuai.types.audio import AudioSpeechParams
9+
from ...types.audio import audio_customization_param
10+
11+
from zhipuai.core import BaseAPI, maybe_transform
12+
from zhipuai.core import NOT_GIVEN, Body, Headers, NotGiven, FileTypes
13+
from zhipuai.core import _legacy_response
14+
15+
import httpx
16+
17+
from zhipuai.core import (
18+
make_request_options,
19+
)
20+
from zhipuai.core import deepcopy_minimal
21+
22+
if TYPE_CHECKING:
23+
from zhipuai._client import ZhipuAI
24+
25+
__all__ = ["Audio"]
26+
27+
28+
class Audio(BaseAPI):
29+
30+
def __init__(self, client: "ZhipuAI") -> None:
31+
super().__init__(client)
32+
33+
def speech(
34+
self,
35+
*,
36+
model: str,
37+
input: str = None,
38+
voice: str = None,
39+
response_format: str = None,
40+
sensitive_word_check: Optional[SensitiveWordCheckRequest] | NotGiven = NOT_GIVEN,
41+
request_id: str = None,
42+
user_id: str = None,
43+
extra_headers: Headers | None = None,
44+
extra_body: Body | None = None,
45+
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
46+
) -> _legacy_response.HttpxBinaryResponseContent:
47+
body = deepcopy_minimal(
48+
{
49+
"model": model,
50+
"input": input,
51+
"voice": voice,
52+
"response_format": response_format,
53+
"sensitive_word_check": sensitive_word_check,
54+
"request_id": request_id,
55+
"user_id": user_id
56+
}
57+
)
58+
return self._post(
59+
"/audio/speech",
60+
body=maybe_transform(body, AudioSpeechParams),
61+
options=make_request_options(
62+
extra_headers=extra_headers, extra_body=extra_body, timeout=timeout
63+
),
64+
cast_type=_legacy_response.HttpxBinaryResponseContent
65+
)
66+
67+
def customization(
68+
self,
69+
*,
70+
model: str,
71+
input: str = None,
72+
voice_text: str = None,
73+
voice_data: FileTypes = None,
74+
response_format: str = None,
75+
sensitive_word_check: Optional[SensitiveWordCheckRequest] | NotGiven = NOT_GIVEN,
76+
request_id: str = None,
77+
user_id: str = None,
78+
extra_headers: Headers | None = None,
79+
extra_body: Body | None = None,
80+
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
81+
) -> _legacy_response.HttpxBinaryResponseContent:
82+
body = deepcopy_minimal(
83+
{
84+
"model": model,
85+
"input": input,
86+
"voice_text": voice_text,
87+
"voice_data": voice_data,
88+
"response_format": response_format,
89+
"sensitive_word_check": sensitive_word_check,
90+
"request_id": request_id,
91+
"user_id": user_id
92+
}
93+
)
94+
files = extract_files(cast(Mapping[str, object], body), paths=[["voice_data"]])
95+
96+
if files:
97+
extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
98+
return self._post(
99+
"/audio/customization",
100+
body=maybe_transform(body, audio_customization_param.AudioCustomizationParam),
101+
files=files,
102+
options=make_request_options(
103+
extra_headers=extra_headers, extra_body=extra_body, timeout=timeout
104+
),
105+
cast_type=_legacy_response.HttpxBinaryResponseContent
106+
)

zhipuai/types/audio/__init__.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
2+
from .audio_speech_params import(
3+
AudioSpeechParams
4+
)
5+
6+
from .audio_customization_param import(
7+
AudioCustomizationParam
8+
)
9+
__all__ = ["AudioSpeechParams","AudioCustomizationParam"]
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
from __future__ import annotations
2+
3+
from typing import List, Optional
4+
5+
from typing_extensions import Literal, Required, TypedDict
6+
__all__ = ["AudioCustomizationParam"]
7+
8+
from ..sensitive_word_check import SensitiveWordCheckRequest
9+
10+
class AudioCustomizationParam(TypedDict, total=False):
11+
model: str
12+
"""模型编码"""
13+
input: str
14+
"""需要生成语音的文本"""
15+
voice_text: str
16+
"""需要生成语音的音色"""
17+
response_format: str
18+
"""需要生成语音文件的格式"""
19+
sensitive_word_check: Optional[SensitiveWordCheckRequest]
20+
request_id: str
21+
"""由用户端传参,需保证唯一性;用于区分每次请求的唯一标识,用户端不传时平台会默认生成。"""
22+
user_id: str
23+
"""用户端。"""
24+
25+
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
from __future__ import annotations
2+
3+
from typing import List, Optional
4+
5+
from typing_extensions import Literal, Required, TypedDict
6+
7+
__all__ = ["AudioSpeechParams"]
8+
9+
from ..sensitive_word_check import SensitiveWordCheckRequest
10+
11+
12+
class AudioSpeechParams(TypedDict, total=False):
13+
model: str
14+
"""模型编码"""
15+
input: str
16+
"""需要生成语音的文本"""
17+
voice: str
18+
"""需要生成语音的音色"""
19+
response_format: str
20+
"""需要生成语音文件的格式"""
21+
sensitive_word_check: Optional[SensitiveWordCheckRequest]
22+
request_id: str
23+
"""由用户端传参,需保证唯一性;用于区分每次请求的唯一标识,用户端不传时平台会默认生成。"""
24+
user_id: str
25+
"""用户端。"""

0 commit comments

Comments
 (0)