Skip to content

Commit d8c68b4

Browse files
author
wangyue.demon
committed
fix: move pyaudio from py core dependency to extension
1 parent b0a781a commit d8c68b4

File tree

4 files changed

+63
-47
lines changed

4 files changed

+63
-47
lines changed

pyproject.toml

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -54,9 +54,7 @@ database = [
5454
"tos>=2.8.4", # For TOS storage and Viking DB
5555
"mem0ai==0.1.118", # For mem0
5656
]
57-
tts = [
58-
"pyaudio>=0.2.14",
59-
]
57+
speech = []
6058
eval = [
6159
"prometheus-client>=0.22.1", # For exporting data to Prometheus pushgateway
6260
"deepeval>=3.2.6", # For DeepEval-based evaluation

tests/tools/builtin_tools/test_tts.py

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -86,8 +86,7 @@ def test_tts_failure(self, mock_session):
8686
mock_session.return_value.post.assert_called_once()
8787

8888
@patch("builtins.open")
89-
@patch("pyaudio.PyAudio")
90-
def test_handle_server_response_success(self, mock_pyaudio, mock_open):
89+
def test_handle_server_response_success(self, mock_open):
9190
"""Test successful response handling"""
9291
# Setup mock response
9392
mock_response = MagicMock()
@@ -96,15 +95,10 @@ def test_handle_server_response_success(self, mock_pyaudio, mock_open):
9695
json.dumps({"code": 20000000}),
9796
]
9897

99-
# Setup mock audio stream
100-
mock_stream = MagicMock()
101-
mock_pyaudio.return_value.open.return_value = mock_stream
102-
10398
# Call function
10499
handle_server_response(mock_response, "test.pcm")
105100

106101
# Assertions
107-
mock_stream.write.assert_called_with(b"audio_chunk")
108102
mock_open.assert_called_once_with("test.pcm", "wb")
109103

110104
@patch("builtins.open")

veadk/tools/builtin_tools/tts.py

Lines changed: 33 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -24,26 +24,9 @@
2424
from google.adk.tools import ToolContext
2525
from veadk.config import getenv, settings
2626
from veadk.utils.logger import get_logger
27-
from veadk.utils.audio_manager import AudioDeviceManager, AudioConfig
2827

2928
logger = get_logger(__name__)
3029

31-
input_audio_config = {
32-
"chunk": 3200,
33-
"format": "pcm",
34-
"channels": 1,
35-
"sample_rate": 16000,
36-
"bit_size": 8,
37-
}
38-
39-
output_audio_config = {
40-
"chunk": 3200,
41-
"format": "pcm",
42-
"channels": 1,
43-
"sample_rate": 24000,
44-
"bit_size": 8,
45-
}
46-
4730

4831
def text_to_speech(text: str, tool_context: ToolContext) -> Dict[str, Any]:
4932
"""TTS provides users with the ability to convert text to speech, turning the text content of LLM into audio.
@@ -57,7 +40,7 @@ def text_to_speech(text: str, tool_context: ToolContext) -> Dict[str, Any]:
5740
A dict with the saved audio path.
5841
"""
5942
url = "https://openspeech.bytedance.com/api/v3/tts/unidirectional"
60-
audio_save_path = ""
43+
temp_dir = getenv("TOOL_VESPEECH_AUDIO_OUTPUT_PATH", tempfile.gettempdir())
6144

6245
app_id = getenv("TOOL_VESPEECH_APP_ID")
6346
speaker = getenv(
@@ -106,10 +89,13 @@ def text_to_speech(text: str, tool_context: ToolContext) -> Dict[str, Any]:
10689
logger.debug(f"Request TTS server with payload: {payload}.")
10790
response = session.post(url, headers=headers, json=payload, stream=True)
10891

92+
os.makedirs(temp_dir, exist_ok=True)
10993
with tempfile.NamedTemporaryFile(
110-
suffix=".pcm", delete=False, dir=tempfile.gettempdir()
94+
suffix=".pcm", delete=False, dir=temp_dir
11195
) as tmp:
11296
audio_save_path = tmp.name # e.g. /tmp/tts_12345.pcm
97+
logger.debug(f"Created temporary file: {audio_save_path}")
98+
11399
handle_server_response(response, audio_save_path)
114100

115101
except Exception as e:
@@ -122,8 +108,6 @@ def text_to_speech(text: str, tool_context: ToolContext) -> Dict[str, Any]:
122108
f"Execution Error: {e}"
123109
}
124110
finally:
125-
if audio_save_path and os.path.exists(audio_save_path):
126-
os.remove(audio_save_path)
127111
if response:
128112
response.close()
129113
session.close()
@@ -150,18 +134,29 @@ def handle_server_response(
150134
audio_queue = queue.Queue()
151135
total_audio_size = 0
152136

153-
audio_device = AudioDeviceManager(
154-
AudioConfig(**input_audio_config), AudioConfig(**output_audio_config)
155-
)
156-
157-
# init output stream
158-
output_stream = audio_device.open_output_stream()
137+
output_stream, player_thread = None, None
159138
stop_event = threading.Event()
160-
player_thread = threading.Thread(
161-
target=_audio_player_thread, args=(audio_queue, output_stream, stop_event)
162-
)
163-
player_thread.daemon = True
164-
player_thread.start()
139+
try:
140+
from veadk.utils.audio_manager import (
141+
AudioDeviceManager,
142+
AudioConfig,
143+
input_audio_config,
144+
output_audio_config,
145+
)
146+
147+
audio_device = AudioDeviceManager(
148+
AudioConfig(**input_audio_config), AudioConfig(**output_audio_config)
149+
)
150+
151+
# init output stream
152+
output_stream = audio_device.open_output_stream()
153+
player_thread = threading.Thread(
154+
target=_audio_player_thread, args=(audio_queue, output_stream, stop_event)
155+
)
156+
player_thread.daemon = True
157+
player_thread.start()
158+
except Exception as e:
159+
logger.error(f"Failed to initialize audio device: {e}")
165160

166161
try:
167162
for chunk in response.iter_lines(decode_unicode=True):
@@ -194,10 +189,12 @@ def handle_server_response(
194189
logger.error(f"handle tts failed: {e}, response: {response}")
195190
raise
196191
finally:
197-
audio_queue.join()
198-
stop_event.set()
199-
player_thread.join()
200-
output_stream.close()
192+
if output_stream:
193+
audio_queue.join()
194+
stop_event.set()
195+
if player_thread and player_thread.is_alive():
196+
player_thread.join()
197+
output_stream.close()
201198

202199

203200
def _audio_player_thread(audio_queue, output_stream, stop_event):

veadk/utils/audio_manager.py

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,29 @@
1515
from dataclasses import dataclass
1616
from typing import Optional
1717

18-
import pyaudio
18+
try:
19+
import pyaudio
20+
21+
PYAUDIO_AVAILABLE = True
22+
except ImportError:
23+
pyaudio = None
24+
PYAUDIO_AVAILABLE = False
25+
26+
input_audio_config = {
27+
"chunk": 3200,
28+
"format": "pcm",
29+
"channels": 1,
30+
"sample_rate": 16000,
31+
"bit_size": pyaudio.paInt16,
32+
}
33+
34+
output_audio_config = {
35+
"chunk": 3200,
36+
"format": "pcm",
37+
"channels": 1,
38+
"sample_rate": 24000,
39+
"bit_size": pyaudio.paInt16,
40+
}
1941

2042

2143
@dataclass
@@ -33,6 +55,11 @@ class AudioDeviceManager:
3355
"""audio device manager, handle audio input/output"""
3456

3557
def __init__(self, input_config: AudioConfig, output_config: AudioConfig):
58+
if not PYAUDIO_AVAILABLE:
59+
raise RuntimeError(
60+
"pyaudio is not installed. Please install it via: "
61+
"pip install veadk-python[speech]"
62+
)
3663
self.input_config = input_config
3764
self.output_config = output_config
3865
self.pyaudio = pyaudio.PyAudio()

0 commit comments

Comments
 (0)