fix: move pyaudio from py core dependency to extension

wangyue.demon · wangyue.demon · commit b0a781afcc82 · 2025-11-10T11:48:11.000+08:00
diff --git a/pyproject.toml b/pyproject.toml
@@ -36,7 +36,6 @@ dependencies = [
     "pymysql>=1.1.1", # For MySQL database (short term memory)
     "opensearch-py==2.8.0",
     "filetype>=1.2.0",
-    "pyaudio>=0.2.14",
 ]
 
 [project.scripts]
@@ -55,6 +54,9 @@ database = [
     "tos>=2.8.4",                   # For TOS storage and Viking DB
     "mem0ai==0.1.118",              # For mem0
 ]
+tts = [
+    "pyaudio>=0.2.14",
+]
 eval = [
     "prometheus-client>=0.22.1",    # For exporting data to Prometheus pushgateway
     "deepeval>=3.2.6",              # For DeepEval-based evaluation
diff --git a/tests/auth/veauth/test_speech_veauth.py b/tests/auth/veauth/test_speech_veauth.py
@@ -40,10 +40,9 @@ def test_get_speech_token_with_env_vars(monkeypatch):
             request_body={
                 "ProjectName": "default",
                 "OnlyAvailable": True,
-                "Filter": {},
             },
             header={"X-Security-Token": ""},
-            action="ListApiKeys",
+            action="ListAPIKeys",
             ak="test_access_key",
             sk="test_secret_key",
             service="speech_saas_prod",
@@ -85,10 +84,9 @@ def test_get_speech_token_with_vefaas_iam(monkeypatch):
             request_body={
                 "ProjectName": "default",
                 "OnlyAvailable": True,
-                "Filter": {},
             },
             header={"X-Security-Token": "vefaas_session_token"},
-            action="ListApiKeys",
+            action="ListAPIKeys",
             ak="vefaas_access_key",
             sk="vefaas_secret_key",
             service="speech_saas_prod",
diff --git a/veadk/tools/builtin_tools/tts.py b/veadk/tools/builtin_tools/tts.py
@@ -18,7 +18,6 @@
 import base64
 import time
 import queue
-import pyaudio
 import threading
 import tempfile
 from typing import Dict, Any
@@ -34,15 +33,15 @@
     "format": "pcm",
     "channels": 1,
     "sample_rate": 16000,
-    "bit_size": pyaudio.paInt16,
+    "bit_size": 8,
 }
 
 output_audio_config = {
     "chunk": 3200,
     "format": "pcm",
     "channels": 1,
     "sample_rate": 24000,
-    "bit_size": pyaudio.paInt16,
+    "bit_size": 8,
 }
 
 
diff --git a/veadk/utils/audio_manager.py b/veadk/utils/audio_manager.py
@@ -1,12 +1,27 @@
+# Copyright (c) 2025 Beijing Volcano Engine Technology Co., Ltd. and/or its affiliates.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from dataclasses import dataclass
-from typing import Optional, Dict, Any
+from typing import Optional
 
 import pyaudio
 
 
 @dataclass
 class AudioConfig:
     """audio config"""
+
     format: str
     bit_size: int
     channels: int
@@ -31,7 +46,7 @@ def open_input_stream(self) -> pyaudio.Stream:
             channels=self.input_config.channels,
             rate=self.input_config.sample_rate,
             input=True,
-            frames_per_buffer=self.input_config.chunk
+            frames_per_buffer=self.input_config.chunk,
         )
         return self.input_stream
 
@@ -41,7 +56,7 @@ def open_output_stream(self) -> pyaudio.Stream:
             channels=self.output_config.channels,
             rate=self.output_config.sample_rate,
             output=True,
-            frames_per_buffer=self.output_config.chunk
+            frames_per_buffer=self.output_config.chunk,
         )
         return self.output_stream
 
@@ -51,4 +66,3 @@ def cleanup(self) -> None:
                 stream.stop_stream()
                 stream.close()
         self.pyaudio.terminate()
-