Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Generated by Django 5.2.4 on 2025-09-16 08:10

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('application', '0002_application_simple_mcp'),
]

operations = [
migrations.AddField(
model_name='application',
name='stt_model_params_setting',
field=models.JSONField(default=dict, verbose_name='STT模型参数相关设置'),
),
migrations.AddField(
model_name='applicationversion',
name='stt_model_params_setting',
field=models.JSONField(default=dict, verbose_name='STT模型参数相关设置'),
),
]
2 changes: 2 additions & 0 deletions apps/application/models/application.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ class Application(AppModelMixin):
model_setting = models.JSONField(verbose_name="模型参数相关设置", default=get_model_setting_dict)
model_params_setting = models.JSONField(verbose_name="模型参数相关设置", default=dict)
tts_model_params_setting = models.JSONField(verbose_name="模型参数相关设置", default=dict)
stt_model_params_setting = models.JSONField(verbose_name="STT模型参数相关设置", default=dict)
problem_optimization = models.BooleanField(verbose_name="问题优化", default=False)
icon = models.CharField(max_length=256, verbose_name="应用icon", default="./favicon.ico")
work_flow = models.JSONField(verbose_name="工作流数据", default=dict)
Expand Down Expand Up @@ -145,6 +146,7 @@ class ApplicationVersion(AppModelMixin):
model_setting = models.JSONField(verbose_name="模型参数相关设置", default=get_model_setting_dict)
model_params_setting = models.JSONField(verbose_name="模型参数相关设置", default=dict)
tts_model_params_setting = models.JSONField(verbose_name="模型参数相关设置", default=dict)
stt_model_params_setting = models.JSONField(verbose_name="STT模型参数相关设置", default=dict)
problem_optimization = models.BooleanField(verbose_name="问题优化", default=False)
icon = models.CharField(max_length=256, verbose_name="应用icon", default="./favicon.ico")
work_flow = models.JSONField(verbose_name="工作流数据", default=dict)
Expand Down
5 changes: 4 additions & 1 deletion apps/application/serializers/application.py
Original file line number Diff line number Diff line change
Expand Up @@ -700,6 +700,7 @@ def reset_application_version(application_version, application):
'user_id': 'user_id', 'model_id': 'model_id', 'knowledge_setting': 'knowledge_setting',
'model_setting': 'model_setting', 'model_params_setting': 'model_params_setting',
'tts_model_params_setting': 'tts_model_params_setting',
'stt_model_params_setting': 'stt_model_params_setting',
'problem_optimization': 'problem_optimization', 'icon': 'icon', 'work_flow': 'work_flow',
'problem_optimization_prompt': 'problem_optimization_prompt', 'tts_model_id': 'tts_model_id',
'stt_model_id': 'stt_model_id', 'tts_model_enable': 'tts_model_enable',
Expand Down Expand Up @@ -785,6 +786,8 @@ def update_work_flow_model(instance):
instance['stt_autosend'] = node_data['stt_autosend']
if 'tts_model_params_setting' in node_data:
instance['tts_model_params_setting'] = node_data['tts_model_params_setting']
if 'stt_model_params_setting' in node_data:
instance['stt_model_params_setting'] = node_data['stt_model_params_setting']
if 'file_upload_enable' in node_data:
instance['file_upload_enable'] = node_data['file_upload_enable']
if 'file_upload_setting' in node_data:
Expand Down Expand Up @@ -830,7 +833,7 @@ def edit(self, instance: Dict, with_valid=True):
'knowledge_setting', 'model_setting', 'problem_optimization', 'dialogue_number',
'stt_model_id', 'tts_model_id', 'tts_model_enable', 'stt_model_enable', 'tts_type',
'tts_autoplay', 'stt_autosend', 'file_upload_enable', 'file_upload_setting',
'api_key_is_active', 'icon', 'work_flow', 'model_params_setting', 'tts_model_params_setting',
'api_key_is_active', 'icon', 'work_flow', 'model_params_setting', 'tts_model_params_setting', 'stt_model_params_setting',
'mcp_enable', 'mcp_tool_ids', 'mcp_servers', 'mcp_source', 'tool_enable', 'tool_ids', 'mcp_output_enable',
'problem_optimization_prompt', 'clean_time', 'folder_id']
for update_key in update_keys:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ def speech_to_text(self, audio_file):
model=self.model,
messages=messages,
result_format="message",
**self.params
)
if response.status_code == 200:
text = response["output"]["choices"][0]["message"].content[0]["text"]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ def speech_to_text(self, audio_file):
# stream 必须设置为 True,否则会报错
stream=True,
stream_options={"include_usage": True},
extra_body=self.params
)
result = []
for chunk in completion:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ def speech_to_text(self, audio_file):
"SourceType": 1,
"VoiceFormat": "mp3",
"Data": _v.decode(),
**self.params
}
req.from_json_string(json.dumps(params))

Expand Down
49 changes: 38 additions & 11 deletions apps/models_provider/impl/xf_model_provider/model/zh_en_stt.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,36 +22,48 @@
ssl_context.verify_mode = ssl.CERT_NONE


def deep_merge_dict(target_dict, source_dict):

if not isinstance(source_dict, dict):
return source_dict
result = target_dict.copy() if isinstance(target_dict, dict) else {}
for key, value in source_dict.items():
if key in result and isinstance(result[key], dict) and isinstance(value, dict):
result[key] = deep_merge_dict(result[key], value)
else:
result[key] = value
return result


class XFZhEnSparkSpeechToText(MaxKBBaseModel, BaseSpeechToText):
spark_app_id: str
spark_api_key: str
spark_api_secret: str
spark_api_url: str
params: dict

def __init__(self, **kwargs):
super().__init__(**kwargs)
self.spark_api_url = kwargs.get('spark_api_url')
self.spark_app_id = kwargs.get('spark_app_id')
self.spark_api_key = kwargs.get('spark_api_key')
self.spark_api_secret = kwargs.get('spark_api_secret')
self.params = kwargs.get('params')

@staticmethod
def is_cache_model():
return False

@staticmethod
def new_instance(model_type, model_name, model_credential: Dict[str, object], **model_kwargs):
optional_params = {}
if 'max_tokens' in model_kwargs and model_kwargs['max_tokens'] is not None:
optional_params['max_tokens'] = model_kwargs['max_tokens']
if 'temperature' in model_kwargs and model_kwargs['temperature'] is not None:
optional_params['temperature'] = model_kwargs['temperature']

return XFZhEnSparkSpeechToText(
spark_app_id=model_credential.get('spark_app_id'),
spark_api_key=model_credential.get('spark_api_key'),
spark_api_secret=model_credential.get('spark_api_secret'),
spark_api_url=model_credential.get('spark_api_url'),
**optional_params
params=model_kwargs,
**model_kwargs
)

# 生成url
Expand Down Expand Up @@ -106,6 +118,10 @@ async def handle():
maxkb_logger.error(f"语音识别错误: {str(err)}: {traceback.format_exc()}")
return ""

def merge_params_to_frame(self, frame,params):

return deep_merge_dict(frame, params)

async def send_audio(self, ws, audio_file):
"""发送音频数据"""
chunk_size = 4000
Expand All @@ -123,8 +139,11 @@ async def send_audio(self, ws, audio_file):
"header": {"app_id": self.spark_app_id, "status": 0},
"parameter": {
"iat": {
"domain": "slm", "language": "zh_cn", "accent": "mandarin",
"eos": 10000, "vinfo": 1,
"domain": "slm",
"language": "zh_cn",
"accent": "mandarin",
"eos": 10000,
"vinfo": 1,
"result": {"encoding": "utf8", "compress": "raw", "format": "json"}
}
},
Expand All @@ -135,6 +154,9 @@ async def send_audio(self, ws, audio_file):
}
}
}
frame = self.merge_params_to_frame(frame,{key: value for key, value in self.params.items() if
not ['model_id', 'use_local', 'streaming'].__contains__(key)})

# 中间帧
else:
frame = {
Expand All @@ -147,6 +169,9 @@ async def send_audio(self, ws, audio_file):
}
}

frame = self.merge_params_to_frame(frame,{key: value for key, value in self.params.items() if
not ['model_id', 'use_local', 'streaming','parameter'].__contains__(key)})

await ws.send(json.dumps(frame))
seq += 1

Expand All @@ -160,17 +185,19 @@ async def send_audio(self, ws, audio_file):
}
}
}
await ws.send(json.dumps(end_frame))

end_frame = self.merge_params_to_frame(end_frame,{key: value for key, value in self.params.items() if
not ['model_id', 'use_local', 'streaming','parameter'].__contains__(key)})

# 接受信息处理器
await ws.send(json.dumps(end_frame))

# 接受信息处理器
async def handle_message(self, ws):
result_text = ""
while True:
try:
message = await asyncio.wait_for(ws.recv(), timeout=30.0)
data = json.loads(message)

if data['header']['code'] != 0:
raise Exception("")

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The provided code has several areas that require consideration for both correctness and performance optimizations. Here are the main points to address:

  1. Imports: Ensure all necessary imports at the beginning of the script, especially those related to handling WebSocket connections (asyncio), JSON encoding/decoding (json), and SSL context setup.

  2. Deep Merge Function: The deep_merge_dict function seems correct but can be optimized slightly. Consider using recursion directly without returning the dictionary explicitly in some cases if it's only needed internally.

  3. Optional Initialization Parameters: In _new_instance, you're setting optional parameters conditionally based on existence checks within an empty dictionary optional_params. This could be done more efficiently if these values should always be present or defaulted elsewhere.

  4. Handling Empty Frames: There might be edge cases where frames can become empty after processing (e.g., no tokens passed). Adding checks around this could prevent invalid operations when sending back frames to the client.

  5. JSON Encoding Issues: Make sure there aren't any issues with whitespace formatting or additional trailing commas in the JSON strings being sent, as this can cause parsing errors.

  6. Exception Handling: For exception messages like "", provide proper error messages rather than an empty string; better yet, log them appropriately.

  7. Code Readability: Although brief, consider adding comments explaining complex logic blocks or variable names for clarity when reviewing again after a refactoring pass.

  8. Parameter Merging Logic: Adjust how parameters from self.params are merged into frames to ensure only valid keys specific to SparkAPI are used.

  9. WS Connection Management: Ensure that the connection states handle exceptions gracefully during receive/send operations due to timeouts or disconnections. You might want to add retries or close/reconnect mechanisms depending on application needs.

By addressing these points, you'll improve robustness, readability, efficiency, and reliability of the implemented streaming speech-to-text service framework.

Expand Down
1 change: 1 addition & 0 deletions ui/src/api/type/application.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ interface ApplicationFormType {
work_flow?: any
model_params_setting?: any
tts_model_params_setting?: any
stt_model_params_setting?: any
stt_model_id?: string
tts_model_id?: string
stt_model_enable?: boolean
Expand Down
47 changes: 44 additions & 3 deletions ui/src/views/application/ApplicationSetting.vue
Original file line number Diff line number Diff line change
Expand Up @@ -482,14 +482,28 @@
</div>
</div>
</template>
<ModelSelect
<div class="flex-between w-full">
<ModelSelect
v-show="applicationForm.stt_model_enable"
v-model="applicationForm.stt_model_id"
:placeholder="$t('views.application.form.voiceInput.placeholder')"
:options="sttModelOptions"
@change="sttModelChange"
:model-type="'STT'"
>
</ModelSelect>
>
</ModelSelect>

<el-button
v-if="applicationForm.stt_model_enable"
@click="openSTTParamSettingDialog"
:disabled="!applicationForm.stt_model_id"
class="ml-8"
>
<el-icon>
<Operation />
</el-icon>
</el-button>
</div>
</el-form-item>
<el-form-item
prop="tts_model_id"
Expand Down Expand Up @@ -583,6 +597,7 @@
<AIModeParamSettingDialog ref="AIModeParamSettingDialogRef" @refresh="refreshForm" />
<GeneratePromptDialog @replace="replace" ref="GeneratePromptDialogRef" />
<TTSModeParamSettingDialog ref="TTSModeParamSettingDialogRef" @refresh="refreshTTSForm" />
<STTModeParamSettingDialog ref="STTModeParamSettingDialogRef" @refresh="refreshSTTForm" />
<ParamSettingDialog ref="ParamSettingDialogRef" @refresh="refreshParam" />
<AddKnowledgeDialog
ref="AddKnowledgeDialogRef"
Expand Down Expand Up @@ -612,6 +627,7 @@ import { relatedObject } from '@/utils/array'
import { MsgSuccess, MsgWarning } from '@/utils/message'
import { t } from '@/locales'
import TTSModeParamSettingDialog from './component/TTSModeParamSettingDialog.vue'
import STTModeParamSettingDialog from './component/STTModelParamSettingDialog.vue'
import ReasoningParamSettingDialog from './component/ReasoningParamSettingDialog.vue'
import permissionMap from '@/permission'
import { EditionConst } from '@/utils/permission/data'
Expand Down Expand Up @@ -652,6 +668,7 @@ const optimizationPrompt =
const AIModeParamSettingDialogRef = ref<InstanceType<typeof AIModeParamSettingDialog>>()
const ReasoningParamSettingDialogRef = ref<InstanceType<typeof ReasoningParamSettingDialog>>()
const TTSModeParamSettingDialogRef = ref<InstanceType<typeof TTSModeParamSettingDialog>>()
const STTModeParamSettingDialogRef = ref<InstanceType<typeof STTModeParamSettingDialog>>()
const ParamSettingDialogRef = ref<InstanceType<typeof ParamSettingDialog>>()
const GeneratePromptDialogRef = ref<InstanceType<typeof GeneratePromptDialog>>()

Expand Down Expand Up @@ -756,6 +773,7 @@ const submit = async (formEl: FormInstance | undefined) => {
if (!formEl) return
await formEl.validate((valid, fields) => {
if (valid) {
console.log(applicationForm.value)
loadSharedApi({ type: 'application', systemType: apiType.value })
.putApplication(id, applicationForm.value, loading)
.then(() => {
Expand Down Expand Up @@ -806,6 +824,17 @@ const openTTSParamSettingDialog = () => {
}
}

const openSTTParamSettingDialog = () => {
if (applicationForm.value.stt_model_id) {
STTModeParamSettingDialogRef.value?.open(
applicationForm.value.stt_model_id,
id,
applicationForm.value.stt_model_params_setting,
)
}
}


const openParamSettingDialog = () => {
ParamSettingDialogRef.value?.open(applicationForm.value)
}
Expand Down Expand Up @@ -905,6 +934,10 @@ function refreshTTSForm(data: any) {
applicationForm.value.tts_model_params_setting = data
}

function refreshSTTForm(data: any) {
applicationForm.value.stt_model_params_setting = data
}

function removeKnowledge(id: any) {
if (applicationForm.value.knowledge_id_list) {
applicationForm.value.knowledge_id_list.splice(
Expand Down Expand Up @@ -1022,6 +1055,14 @@ function ttsModelChange() {
}
}

function sttModelChange() {
if (applicationForm.value.stt_model_id) {
STTModeParamSettingDialogRef.value?.reset_default(applicationForm.value.stt_model_id, id)
} else {
refreshSTTForm({})
}
}

function ttsModelEnableChange() {
if (!applicationForm.value.tts_model_enable) {
applicationForm.value.tts_model_id = undefined
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There are no obvious irregularities or issues with the provided code snippet. However, here are some points and suggestions for improvement:

Points to Consider:

  1. Accessibility: The @click event handler on the button that opens the dialog should be more descriptive of its purpose.
  2. Code Reusability: The method refreshSTTForm() can be reused across different parts if there is more context where it might be used.
  3. Conditional Rendering Optimization: Ensure that the elements under <div class="flex-between w-full"> are only rendered when applicationForm.stt_model_enable is true.

Specific Suggestions:

  1. Descriptive Event Handler:

    @click="openSTTParamSettingDialogWithDescription"
  2. Reusable Method:

    function refreshSttParams(setting: any) {
      this.applicationForm.stt_model_params_setting = setting;
    }
  3. Optimization:

    <template v-if="applicationForm.stt_model_enable">
      <div class="flex-between w-full">
        <!-- existing content -->
      </div>
    </template>

If you have specific use cases or additional improvements in mind, please let me know!

Expand Down
Loading
Loading