Skip to content

Commit da1fa24

Browse files
songguocolakevinlin09
authored andcommitted
feat(model/qwen3-livetranslate&asr-realtime):add input params
1 parent 7a3767c commit da1fa24

File tree

1 file changed

+45
-1
lines changed

1 file changed

+45
-1
lines changed

dashscope/audio/qwen_omni/omni_realtime.py

Lines changed: 45 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import platform
55
import threading
66
import time
7+
from dataclasses import field, dataclass
78
from typing import List
89
import uuid
910
from enum import Enum, unique
@@ -29,6 +30,26 @@ def on_event(self, message: str) -> None:
2930
pass
3031

3132

33+
@dataclass
34+
class TranslationParams:
35+
"""
36+
TranslationParams
37+
"""
38+
language: str = field(default=None)
39+
40+
41+
@dataclass
42+
class TranscriptionParams:
43+
"""
44+
TranscriptionParams
45+
"""
46+
language: str = field(default=None)
47+
sample_rate: int = field(default=16000)
48+
input_audio_format: str = field(default="pcm")
49+
corpus: dict = field(default=None)
50+
corpus_text: str = field(default=None)
51+
52+
3253
@unique
3354
class AudioFormat(Enum):
3455
# format, sample_rate, channels, bit_rate, name
@@ -171,7 +192,7 @@ def __send_str(self, data: str, enable_log: bool = True):
171192

172193
def update_session(self,
173194
output_modalities: List[MultiModality],
174-
voice: str,
195+
voice: str = None,
175196
input_audio_format: AudioFormat = AudioFormat.
176197
PCM_16000HZ_MONO_16BIT,
177198
output_audio_format: AudioFormat = AudioFormat.
@@ -184,6 +205,8 @@ def update_session(self,
184205
turn_detection_threshold: float = 0.2,
185206
turn_detection_silence_duration_ms: int = 800,
186207
turn_detection_param: dict = None,
208+
translation_params: TranslationParams = None,
209+
transcription_params: TranscriptionParams = None,
187210
**kwargs) -> None:
188211
'''
189212
update session configuration, should be used before create response
@@ -206,6 +229,13 @@ def update_session(self,
206229
In a quiet environment, it may be necessary to decrease the threshold to improve sensitivity
207230
turn_detection_silence_duration_ms: int
208231
duration of silence in milliseconds to detect turn, range [200, 6000]
232+
translation_params: TranslationParams
233+
translation params, include language. Only effective with qwen3-livetranslate-flash-realtime model or
234+
further models. Do not set this parameter for other models.
235+
transcription_params: TranscriptionParams
236+
transcription params, include language, sample_rate, input_audio_format, corpus.
237+
Only effective with qwen3-asr-flash-realtime model or
238+
further models. Do not set this parameter for other models.
209239
'''
210240
self.config = {
211241
'modalities': [m.value for m in output_modalities],
@@ -230,6 +260,20 @@ def update_session(self,
230260
self.config['turn_detection'].update(turn_detection_param)
231261
else:
232262
self.config['turn_detection'] = None
263+
if translation_params is not None:
264+
self.config['translation'] = {
265+
'language': translation_params.language
266+
}
267+
if transcription_params is not None:
268+
self.config['language'] = transcription_params.language
269+
if transcription_params.corpus is not None:
270+
self.config['corpus'] = transcription_params.corpus
271+
if transcription_params.corpus_text is not None:
272+
self.config['corpus'] = {
273+
"text": transcription_params.corpus_text
274+
}
275+
self.config['input_audio_format'] = transcription_params.input_audio_format
276+
self.config['sample_rate']= transcription_params.sample_rate
233277
self.config.update(kwargs)
234278
self.__send_str(
235279
json.dumps({

0 commit comments

Comments
 (0)