Skip to content

Commit dae15c8

Browse files
committed
feat: Support stt model params setting
1 parent e1b3f47 commit dae15c8

File tree

11 files changed

+238
-15
lines changed

11 files changed

+238
-15
lines changed
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
# Generated by Django 5.2.4 on 2025-09-16 08:10
2+
3+
from django.db import migrations, models
4+
5+
6+
class Migration(migrations.Migration):
7+
8+
dependencies = [
9+
('application', '0002_application_simple_mcp'),
10+
]
11+
12+
operations = [
13+
migrations.AddField(
14+
model_name='application',
15+
name='stt_model_params_setting',
16+
field=models.JSONField(default=dict, verbose_name='STT模型参数相关设置'),
17+
),
18+
migrations.AddField(
19+
model_name='applicationversion',
20+
name='stt_model_params_setting',
21+
field=models.JSONField(default=dict, verbose_name='STT模型参数相关设置'),
22+
),
23+
]

apps/application/models/application.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ class Application(AppModelMixin):
7272
model_setting = models.JSONField(verbose_name="模型参数相关设置", default=get_model_setting_dict)
7373
model_params_setting = models.JSONField(verbose_name="模型参数相关设置", default=dict)
7474
tts_model_params_setting = models.JSONField(verbose_name="模型参数相关设置", default=dict)
75+
stt_model_params_setting = models.JSONField(verbose_name="STT模型参数相关设置", default=dict)
7576
problem_optimization = models.BooleanField(verbose_name="问题优化", default=False)
7677
icon = models.CharField(max_length=256, verbose_name="应用icon", default="./favicon.ico")
7778
work_flow = models.JSONField(verbose_name="工作流数据", default=dict)
@@ -145,6 +146,7 @@ class ApplicationVersion(AppModelMixin):
145146
model_setting = models.JSONField(verbose_name="模型参数相关设置", default=get_model_setting_dict)
146147
model_params_setting = models.JSONField(verbose_name="模型参数相关设置", default=dict)
147148
tts_model_params_setting = models.JSONField(verbose_name="模型参数相关设置", default=dict)
149+
stt_model_params_setting = models.JSONField(verbose_name="STT模型参数相关设置", default=dict)
148150
problem_optimization = models.BooleanField(verbose_name="问题优化", default=False)
149151
icon = models.CharField(max_length=256, verbose_name="应用icon", default="./favicon.ico")
150152
work_flow = models.JSONField(verbose_name="工作流数据", default=dict)

apps/application/serializers/application.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -700,6 +700,7 @@ def reset_application_version(application_version, application):
700700
'user_id': 'user_id', 'model_id': 'model_id', 'knowledge_setting': 'knowledge_setting',
701701
'model_setting': 'model_setting', 'model_params_setting': 'model_params_setting',
702702
'tts_model_params_setting': 'tts_model_params_setting',
703+
'stt_model_params_setting': 'stt_model_params_setting',
703704
'problem_optimization': 'problem_optimization', 'icon': 'icon', 'work_flow': 'work_flow',
704705
'problem_optimization_prompt': 'problem_optimization_prompt', 'tts_model_id': 'tts_model_id',
705706
'stt_model_id': 'stt_model_id', 'tts_model_enable': 'tts_model_enable',
@@ -785,6 +786,8 @@ def update_work_flow_model(instance):
785786
instance['stt_autosend'] = node_data['stt_autosend']
786787
if 'tts_model_params_setting' in node_data:
787788
instance['tts_model_params_setting'] = node_data['tts_model_params_setting']
789+
if 'stt_model_params_setting' in node_data:
790+
instance['stt_model_params_setting'] = node_data['stt_model_params_setting']
788791
if 'file_upload_enable' in node_data:
789792
instance['file_upload_enable'] = node_data['file_upload_enable']
790793
if 'file_upload_setting' in node_data:
@@ -830,7 +833,7 @@ def edit(self, instance: Dict, with_valid=True):
830833
'knowledge_setting', 'model_setting', 'problem_optimization', 'dialogue_number',
831834
'stt_model_id', 'tts_model_id', 'tts_model_enable', 'stt_model_enable', 'tts_type',
832835
'tts_autoplay', 'stt_autosend', 'file_upload_enable', 'file_upload_setting',
833-
'api_key_is_active', 'icon', 'work_flow', 'model_params_setting', 'tts_model_params_setting',
836+
'api_key_is_active', 'icon', 'work_flow', 'model_params_setting', 'tts_model_params_setting', 'stt_model_params_setting',
834837
'mcp_enable', 'mcp_tool_ids', 'mcp_servers', 'mcp_source', 'tool_enable', 'tool_ids', 'mcp_output_enable',
835838
'problem_optimization_prompt', 'clean_time', 'folder_id']
836839
for update_key in update_keys:

apps/models_provider/impl/aliyun_bai_lian_model_provider/model/asr_stt.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ def speech_to_text(self, audio_file):
6060
model=self.model,
6161
messages=messages,
6262
result_format="message",
63+
**self.params
6364
)
6465
if response.status_code == 200:
6566
text = response["output"]["choices"][0]["message"].content[0]["text"]

apps/models_provider/impl/aliyun_bai_lian_model_provider/model/omni_stt.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@ def speech_to_text(self, audio_file):
7777
# stream 必须设置为 True,否则会报错
7878
stream=True,
7979
stream_options={"include_usage": True},
80+
extra_body=self.params
8081
)
8182
result = []
8283
for chunk in completion:

apps/models_provider/impl/tencent_model_provider/model/stt.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ def speech_to_text(self, audio_file):
6969
"SourceType": 1,
7070
"VoiceFormat": "mp3",
7171
"Data": _v.decode(),
72+
**self.params
7273
}
7374
req.from_json_string(json.dumps(params))
7475

apps/models_provider/impl/xf_model_provider/model/zh_en_stt.py

Lines changed: 38 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -22,36 +22,48 @@
2222
ssl_context.verify_mode = ssl.CERT_NONE
2323

2424

25+
def deep_merge_dict(target_dict, source_dict):
26+
27+
if not isinstance(source_dict, dict):
28+
return source_dict
29+
result = target_dict.copy() if isinstance(target_dict, dict) else {}
30+
for key, value in source_dict.items():
31+
if key in result and isinstance(result[key], dict) and isinstance(value, dict):
32+
result[key] = deep_merge_dict(result[key], value)
33+
else:
34+
result[key] = value
35+
return result
36+
37+
2538
class XFZhEnSparkSpeechToText(MaxKBBaseModel, BaseSpeechToText):
2639
spark_app_id: str
2740
spark_api_key: str
2841
spark_api_secret: str
2942
spark_api_url: str
43+
params: dict
3044

3145
def __init__(self, **kwargs):
3246
super().__init__(**kwargs)
3347
self.spark_api_url = kwargs.get('spark_api_url')
3448
self.spark_app_id = kwargs.get('spark_app_id')
3549
self.spark_api_key = kwargs.get('spark_api_key')
3650
self.spark_api_secret = kwargs.get('spark_api_secret')
51+
self.params = kwargs.get('params')
3752

3853
@staticmethod
3954
def is_cache_model():
4055
return False
4156

4257
@staticmethod
4358
def new_instance(model_type, model_name, model_credential: Dict[str, object], **model_kwargs):
44-
optional_params = {}
45-
if 'max_tokens' in model_kwargs and model_kwargs['max_tokens'] is not None:
46-
optional_params['max_tokens'] = model_kwargs['max_tokens']
47-
if 'temperature' in model_kwargs and model_kwargs['temperature'] is not None:
48-
optional_params['temperature'] = model_kwargs['temperature']
59+
4960
return XFZhEnSparkSpeechToText(
5061
spark_app_id=model_credential.get('spark_app_id'),
5162
spark_api_key=model_credential.get('spark_api_key'),
5263
spark_api_secret=model_credential.get('spark_api_secret'),
5364
spark_api_url=model_credential.get('spark_api_url'),
54-
**optional_params
65+
params=model_kwargs,
66+
**model_kwargs
5567
)
5668

5769
# 生成url
@@ -106,6 +118,10 @@ async def handle():
106118
maxkb_logger.error(f"语音识别错误: {str(err)}: {traceback.format_exc()}")
107119
return ""
108120

121+
def merge_params_to_frame(self, frame,params):
122+
123+
return deep_merge_dict(frame, params)
124+
109125
async def send_audio(self, ws, audio_file):
110126
"""发送音频数据"""
111127
chunk_size = 4000
@@ -123,8 +139,11 @@ async def send_audio(self, ws, audio_file):
123139
"header": {"app_id": self.spark_app_id, "status": 0},
124140
"parameter": {
125141
"iat": {
126-
"domain": "slm", "language": "zh_cn", "accent": "mandarin",
127-
"eos": 10000, "vinfo": 1,
142+
"domain": "slm",
143+
"language": "zh_cn",
144+
"accent": "mandarin",
145+
"eos": 10000,
146+
"vinfo": 1,
128147
"result": {"encoding": "utf8", "compress": "raw", "format": "json"}
129148
}
130149
},
@@ -135,6 +154,9 @@ async def send_audio(self, ws, audio_file):
135154
}
136155
}
137156
}
157+
frame = self.merge_params_to_frame(frame,{key: value for key, value in self.params.items() if
158+
not ['model_id', 'use_local', 'streaming'].__contains__(key)})
159+
138160
# 中间帧
139161
else:
140162
frame = {
@@ -147,6 +169,9 @@ async def send_audio(self, ws, audio_file):
147169
}
148170
}
149171

172+
frame = self.merge_params_to_frame(frame,{key: value for key, value in self.params.items() if
173+
not ['model_id', 'use_local', 'streaming','parameter'].__contains__(key)})
174+
150175
await ws.send(json.dumps(frame))
151176
seq += 1
152177

@@ -160,17 +185,19 @@ async def send_audio(self, ws, audio_file):
160185
}
161186
}
162187
}
163-
await ws.send(json.dumps(end_frame))
164188

189+
end_frame = self.merge_params_to_frame(end_frame,{key: value for key, value in self.params.items() if
190+
not ['model_id', 'use_local', 'streaming','parameter'].__contains__(key)})
165191

166-
# 接受信息处理器
192+
await ws.send(json.dumps(end_frame))
193+
194+
# 接受信息处理器
167195
async def handle_message(self, ws):
168196
result_text = ""
169197
while True:
170198
try:
171199
message = await asyncio.wait_for(ws.recv(), timeout=30.0)
172200
data = json.loads(message)
173-
174201
if data['header']['code'] != 0:
175202
raise Exception("")
176203

ui/src/api/type/application.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ interface ApplicationFormType {
1717
work_flow?: any
1818
model_params_setting?: any
1919
tts_model_params_setting?: any
20+
stt_model_params_setting?: any
2021
stt_model_id?: string
2122
tts_model_id?: string
2223
stt_model_enable?: boolean

ui/src/views/application/ApplicationSetting.vue

Lines changed: 44 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -482,14 +482,28 @@
482482
</div>
483483
</div>
484484
</template>
485-
<ModelSelect
485+
<div class="flex-between w-full">
486+
<ModelSelect
486487
v-show="applicationForm.stt_model_enable"
487488
v-model="applicationForm.stt_model_id"
488489
:placeholder="$t('views.application.form.voiceInput.placeholder')"
489490
:options="sttModelOptions"
491+
@change="sttModelChange"
490492
:model-type="'STT'"
491-
>
492-
</ModelSelect>
493+
>
494+
</ModelSelect>
495+
496+
<el-button
497+
v-if="applicationForm.stt_model_enable"
498+
@click="openSTTParamSettingDialog"
499+
:disabled="!applicationForm.stt_model_id"
500+
class="ml-8"
501+
>
502+
<el-icon>
503+
<Operation />
504+
</el-icon>
505+
</el-button>
506+
</div>
493507
</el-form-item>
494508
<el-form-item
495509
prop="tts_model_id"
@@ -583,6 +597,7 @@
583597
<AIModeParamSettingDialog ref="AIModeParamSettingDialogRef" @refresh="refreshForm" />
584598
<GeneratePromptDialog @replace="replace" ref="GeneratePromptDialogRef" />
585599
<TTSModeParamSettingDialog ref="TTSModeParamSettingDialogRef" @refresh="refreshTTSForm" />
600+
<STTModeParamSettingDialog ref="STTModeParamSettingDialogRef" @refresh="refreshSTTForm" />
586601
<ParamSettingDialog ref="ParamSettingDialogRef" @refresh="refreshParam" />
587602
<AddKnowledgeDialog
588603
ref="AddKnowledgeDialogRef"
@@ -612,6 +627,7 @@ import { relatedObject } from '@/utils/array'
612627
import { MsgSuccess, MsgWarning } from '@/utils/message'
613628
import { t } from '@/locales'
614629
import TTSModeParamSettingDialog from './component/TTSModeParamSettingDialog.vue'
630+
import STTModeParamSettingDialog from './component/STTModelParamSettingDialog.vue'
615631
import ReasoningParamSettingDialog from './component/ReasoningParamSettingDialog.vue'
616632
import permissionMap from '@/permission'
617633
import { EditionConst } from '@/utils/permission/data'
@@ -652,6 +668,7 @@ const optimizationPrompt =
652668
const AIModeParamSettingDialogRef = ref<InstanceType<typeof AIModeParamSettingDialog>>()
653669
const ReasoningParamSettingDialogRef = ref<InstanceType<typeof ReasoningParamSettingDialog>>()
654670
const TTSModeParamSettingDialogRef = ref<InstanceType<typeof TTSModeParamSettingDialog>>()
671+
const STTModeParamSettingDialogRef = ref<InstanceType<typeof STTModeParamSettingDialog>>()
655672
const ParamSettingDialogRef = ref<InstanceType<typeof ParamSettingDialog>>()
656673
const GeneratePromptDialogRef = ref<InstanceType<typeof GeneratePromptDialog>>()
657674
@@ -756,6 +773,7 @@ const submit = async (formEl: FormInstance | undefined) => {
756773
if (!formEl) return
757774
await formEl.validate((valid, fields) => {
758775
if (valid) {
776+
console.log(applicationForm.value)
759777
loadSharedApi({ type: 'application', systemType: apiType.value })
760778
.putApplication(id, applicationForm.value, loading)
761779
.then(() => {
@@ -806,6 +824,17 @@ const openTTSParamSettingDialog = () => {
806824
}
807825
}
808826
827+
const openSTTParamSettingDialog = () => {
828+
if (applicationForm.value.stt_model_id) {
829+
STTModeParamSettingDialogRef.value?.open(
830+
applicationForm.value.stt_model_id,
831+
id,
832+
applicationForm.value.stt_model_params_setting,
833+
)
834+
}
835+
}
836+
837+
809838
const openParamSettingDialog = () => {
810839
ParamSettingDialogRef.value?.open(applicationForm.value)
811840
}
@@ -905,6 +934,10 @@ function refreshTTSForm(data: any) {
905934
applicationForm.value.tts_model_params_setting = data
906935
}
907936
937+
function refreshSTTForm(data: any) {
938+
applicationForm.value.stt_model_params_setting = data
939+
}
940+
908941
function removeKnowledge(id: any) {
909942
if (applicationForm.value.knowledge_id_list) {
910943
applicationForm.value.knowledge_id_list.splice(
@@ -1022,6 +1055,14 @@ function ttsModelChange() {
10221055
}
10231056
}
10241057
1058+
function sttModelChange() {
1059+
if (applicationForm.value.stt_model_id) {
1060+
STTModeParamSettingDialogRef.value?.reset_default(applicationForm.value.stt_model_id, id)
1061+
} else {
1062+
refreshSTTForm({})
1063+
}
1064+
}
1065+
10251066
function ttsModelEnableChange() {
10261067
if (!applicationForm.value.tts_model_enable) {
10271068
applicationForm.value.tts_model_id = undefined

0 commit comments

Comments
 (0)