diff --git a/apps/locales/en_US/LC_MESSAGES/django.po b/apps/locales/en_US/LC_MESSAGES/django.po index 2a60f5396b8..619312e4533 100644 --- a/apps/locales/en_US/LC_MESSAGES/django.po +++ b/apps/locales/en_US/LC_MESSAGES/django.po @@ -8814,4 +8814,55 @@ msgid "Real-time speech recognition - Fun-ASR/Paraformer" msgstr "" msgid "Qwen-Omni" +msgstr "" + +msgid "Super-humanoid: Lingxiaoxuan Flow" +msgstr "" + +msgid "Super-humanoid: Lingyuyan Flow" +msgstr "" + +msgid "Super-humanoid: Lingfeiyi Flow" +msgstr "" + +msgid "Super-humanoid: Lingxiaoyue Flow" +msgstr "" + +msgid "Super-humanoid: Sun Dasheng Flow" +msgstr "" + +msgid "Super-humanoid: Lingyuzhao Flow" +msgstr "" + +msgid "Super-humanoid: Lingxiaotang Flow" +msgstr "" + +msgid "Super-humanoid: Lingxiaorong Flow" +msgstr "" + +msgid "Super-humanoid: Xinyun Flow" +msgstr "" + +msgid "Super-humanoid: Grant (EN)" +msgstr "" + +msgid "Super-humanoid: Lila (EN)" +msgstr "" + +msgid "Super-humanoid: Lingwanwan Pro" +msgstr "" + +msgid "Super-humanoid: Yiyi Pro" +msgstr "" + +msgid "Super-humanoid: Huifangnv Pro" +msgstr "" + +msgid "Super-humanoid: Lingxiaoying Pro" +msgstr "" + +msgid "Super-humanoid: Lingfeibo Pro" +msgstr "" + +msgid "Super-humanoid: Lingyuyan Pro" msgstr "" \ No newline at end of file diff --git a/apps/locales/zh_CN/LC_MESSAGES/django.po b/apps/locales/zh_CN/LC_MESSAGES/django.po index c4674986f0f..07f9994aacb 100644 --- a/apps/locales/zh_CN/LC_MESSAGES/django.po +++ b/apps/locales/zh_CN/LC_MESSAGES/django.po @@ -8940,4 +8940,55 @@ msgid "Real-time speech recognition - Fun-ASR/Paraformer" msgstr "实时语音识别-Fun-ASR/Paraformer" msgid "Qwen-Omni" -msgstr "多模态" \ No newline at end of file +msgstr "多模态" + +msgid "Super-humanoid: Lingxiaoxuan Flow" +msgstr "聆小璇" + +msgid "Super-humanoid: Lingyuyan Flow" +msgstr "聆玉言" + +msgid "Super-humanoid: Lingfeiyi Flow" +msgstr "聆飞逸" + +msgid "Super-humanoid: Lingxiaoyue Flow" +msgstr "聆小玥" + +msgid "Super-humanoid: Sun Dasheng Flow" +msgstr "孙大圣" + +msgid "Super-humanoid: Lingyuzhao Flow" +msgstr "聆玉昭" + +msgid "Super-humanoid: Lingxiaotang Flow" +msgstr "聆小糖" + +msgid "Super-humanoid: Lingxiaorong Flow" +msgstr "聆小蓉" + +msgid "Super-humanoid: Xinyun Flow" +msgstr "心云" + +msgid "Super-humanoid: Grant (EN)" +msgstr "Grant" + +msgid "Super-humanoid: Lila (EN)" +msgstr "Lila" + +msgid "Super-humanoid: Lingwanwan Pro" +msgstr "聆万万" + +msgid "Super-humanoid: Yiyi Pro" +msgstr "依依" + +msgid "Super-humanoid: Huifangnv Pro" +msgstr "惠芳女" + +msgid "Super-humanoid: Lingxiaoying Pro" +msgstr "聆小颖" + +msgid "Super-humanoid: Lingfeibo Pro" +msgstr "聆飞博" + +msgid "Super-humanoid: Lingyuyan Pro" +msgstr "聆玉言" \ No newline at end of file diff --git a/apps/locales/zh_Hant/LC_MESSAGES/django.po b/apps/locales/zh_Hant/LC_MESSAGES/django.po index 5f49094abb2..90d1b5ec682 100644 --- a/apps/locales/zh_Hant/LC_MESSAGES/django.po +++ b/apps/locales/zh_Hant/LC_MESSAGES/django.po @@ -8940,4 +8940,55 @@ msgid "Real-time speech recognition - Fun-ASR/Paraformer" msgstr "實時語音識別-Fun-ASR/Paraformer" msgid "Qwen-Omni" -msgstr "多模態" \ No newline at end of file +msgstr "多模態" + +msgid "Super-humanoid: Lingxiaoxuan Flow" +msgstr "聆小璇" + +msgid "Super-humanoid: Lingyuyan Flow" +msgstr "聆玉言" + +msgid "Super-humanoid: Lingfeiyi Flow" +msgstr "聆飛逸" + +msgid "Super-humanoid: Lingxiaoyue Flow" +msgstr "聆小玥" + +msgid "Super-humanoid: Sun Dasheng Flow" +msgstr "孫大聖" + +msgid "Super-humanoid: Lingyuzhao Flow" +msgstr "聆玉昭" + +msgid "Super-humanoid: Lingxiaotang Flow" +msgstr "聆小糖" + +msgid "Super-humanoid: Lingxiaorong Flow" +msgstr "聆小蓉" + +msgid "Super-humanoid: Xinyun Flow" +msgstr "心雲" + +msgid "Super-humanoid: Grant (EN)" +msgstr "Grant" + +msgid "Super-humanoid: Lila (EN)" +msgstr "Lila" + +msgid "Super-humanoid: Lingwanwan Pro" +msgstr "聆萬萬" + +msgid "Super-humanoid: Yiyi Pro" +msgstr "依依" + +msgid "Super-humanoid: Huifangnv Pro" +msgstr "惠芳女" + +msgid "Super-humanoid: Lingxiaoying Pro" +msgstr "聆小穎" + +msgid "Super-humanoid: Lingfeibo Pro" +msgstr "聆飛博" + +msgid "Super-humanoid: Lingyuyan Pro" +msgstr "聆玉言" \ No newline at end of file diff --git a/apps/models_provider/impl/aliyun_bai_lian_model_provider/credential/stt/default_stt.py b/apps/models_provider/impl/aliyun_bai_lian_model_provider/credential/stt/default_stt.py index 06f410b69b4..85a1802e2ad 100644 --- a/apps/models_provider/impl/aliyun_bai_lian_model_provider/credential/stt/default_stt.py +++ b/apps/models_provider/impl/aliyun_bai_lian_model_provider/credential/stt/default_stt.py @@ -39,7 +39,6 @@ def is_valid(self, raise_exception: bool = False ) -> bool: model_type_list = provider.get_model_type_list() - model_type_list = provider.get_model_type_list() if not any(mt.get('value') == model_type for mt in model_type_list): raise AppApiException( ValidCode.valid_error.value, diff --git a/apps/models_provider/impl/xf_model_provider/credential/default_tts.py b/apps/models_provider/impl/xf_model_provider/credential/default_tts.py deleted file mode 100644 index 6d4a10a8d43..00000000000 --- a/apps/models_provider/impl/xf_model_provider/credential/default_tts.py +++ /dev/null @@ -1,133 +0,0 @@ -# coding=utf-8 -""" -讯飞 TTS 工厂类 Credential,根据 api_version 路由到具体 Credential -""" -from typing import Dict - -from django.utils.translation import gettext_lazy as _, gettext - -from common import forms -from common.exception.app_exception import AppApiException -from common.forms import BaseForm, TooltipLabel -from models_provider.base_model_provider import BaseModelCredential, ValidCode -from common.utils.logger import maxkb_logger - - -class XunFeiDefaultTTSModelCredential(BaseForm, BaseModelCredential): - """讯飞 TTS 工厂类 Credential,根据 api_version 参数路由到具体实现""" - - api_version = forms.SingleSelect( - _("API Version"), required=True, - text_field='label', - value_field='value', - default_value='online', - option_list=[ - {'label': _('Online TTS'), 'value': 'online'}, - {'label': _('Super Humanoid TTS'), 'value': 'super_humanoid'} - ]) - - spark_api_url = forms.TextInputField('API URL', required=True, - default_value='wss://tts-api.xfyun.cn/v2/tts', - relation_show_field_dict={"api_version": ["online"]}) - spark_api_url_super = forms.TextInputField('API URL', required=True, - default_value='wss://cbm01.cn-huabei-1.xf-yun.com/v1/private/mcd9m97e6', - relation_show_field_dict={"api_version": ["super_humanoid"]}) - - # vcn 选择放在 credential 中,根据 api_version 联动显示 - vcn_online = forms.SingleSelect( - TooltipLabel(_('Speaker'), _('Speaker selection for standard TTS service')), - required=True, default_value='xiaoyan', - text_field='value', - value_field='value', - option_list=[ - {'text': _('iFlytek Xiaoyan'), 'value': 'xiaoyan'}, - {'text': _('iFlytek Xujiu'), 'value': 'aisjiuxu'}, - {'text': _('iFlytek Xiaoping'), 'value': 'aisxping'}, - {'text': _('iFlytek Xiaojing'), 'value': 'aisjinger'}, - {'text': _('iFlytek Xuxiaobao'), 'value': 'aisbabyxu'}, - ], - relation_show_field_dict={"api_version": ["online"]}) - - vcn_super = forms.SingleSelect( - TooltipLabel(_('Speaker'), _('Speaker selection for super-humanoid TTS service')), - required=True, default_value='x5_lingxiaoxuan_flow', - text_field='value', - value_field='value', - option_list=[ - {'text': _('Super-humanoid: Lingxiaoxuan Flow'), 'value': 'x5_lingxiaoxuan_flow'}, - {'text': _('Super-humanoid: Lingyuyan Flow'), 'value': 'x5_lingyuyan_flow'}, - {'text': _('Super-humanoid: Lingfeiyi Flow'), 'value': 'x5_lingfeiyi_flow'}, - {'text': _('Super-humanoid: Lingxiaoyue Flow'), 'value': 'x5_lingxiaoyue_flow'}, - {'text': _('Super-humanoid: Sun Dasheng Flow'), 'value': 'x5_sundasheng_flow'}, - {'text': _('Super-humanoid: Lingyuzhao Flow'), 'value': 'x5_lingyuzhao_flow'}, - {'text': _('Super-humanoid: Lingxiaotang Flow'), 'value': 'x5_lingxiaotang_flow'}, - {'text': _('Super-humanoid: Lingxiaorong Flow'), 'value': 'x5_lingxiaorong_flow'}, - {'text': _('Super-humanoid: Xinyun Flow'), 'value': 'x5_xinyun_flow'}, - {'text': _('Super-humanoid: Grant (EN)'), 'value': 'x5_EnUs_Grant_flow'}, - {'text': _('Super-humanoid: Lila (EN)'), 'value': 'x5_EnUs_Lila_flow'}, - {'text': _('Super-humanoid: Lingwanwan Pro'), 'value': 'x6_lingwanwan_pro'}, - {'text': _('Super-humanoid: Yiyi Pro'), 'value': 'x6_yiyi_pro'}, - {'text': _('Super-humanoid: Huifangnv Pro'), 'value': 'x6_huifangnv_pro'}, - {'text': _('Super-humanoid: Lingxiaoying Pro'), 'value': 'x6_lingxiaoying_pro'}, - {'text': _('Super-humanoid: Lingfeibo Pro'), 'value': 'x6_lingfeibo_pro'}, - {'text': _('Super-humanoid: Lingyuyan Pro'), 'value': 'x6_lingyuyan_pro'}, - ], - relation_show_field_dict={"api_version": ["super_humanoid"]}) - - spark_app_id = forms.TextInputField('APP ID', required=True) - spark_api_key = forms.PasswordInputField("API Key", required=True) - spark_api_secret = forms.PasswordInputField('API Secret', required=True) - - def is_valid(self, model_type: str, model_name, model_credential: Dict[str, object], model_params, provider, - raise_exception=False): - model_type_list = provider.get_model_type_list() - if not any(list(filter(lambda mt: mt.get('value') == model_type, model_type_list))): - raise AppApiException(ValidCode.valid_error.value, - gettext('{model_type} Model type is not supported').format(model_type=model_type)) - - api_version = model_credential.get('api_version', 'online') - if api_version == 'super_humanoid': - required_keys = ['spark_api_url_super', 'spark_app_id', 'spark_api_key', 'spark_api_secret'] - else: - required_keys = ['spark_api_url', 'spark_app_id', 'spark_api_key', 'spark_api_secret'] - - for key in required_keys: - if key not in model_credential: - if raise_exception: - raise AppApiException(ValidCode.valid_error.value, gettext('{key} is required').format(key=key)) - else: - return False - try: - model = provider.get_model(model_type, model_name, model_credential, **model_params) - model.check_auth() - except Exception as e: - maxkb_logger.error(f'Exception: {e}', exc_info=True) - if isinstance(e, AppApiException): - raise e - if raise_exception: - raise AppApiException(ValidCode.valid_error.value, - gettext( - 'Verification failed, please check whether the parameters are correct: {error}').format( - error=str(e))) - else: - return False - return True - - def encryption_dict(self, model: Dict[str, object]): - return {**model, 'spark_api_secret': super().encryption(model.get('spark_api_secret', ''))} - - def get_model_params_setting_form(self, model_name): - # params 只包含通用参数,vcn 已在 credential 中 - return XunFeiDefaultTTSModelParams() - - -class XunFeiDefaultTTSModelParams(BaseForm): - """工厂类的参数表单,只包含通用参数""" - - speed = forms.SliderField( - TooltipLabel(_('speaking speed'), _('Speech speed, optional value: [0-100], default is 50')), - required=True, default_value=50, - _min=1, - _max=100, - _step=5, - precision=1) diff --git a/apps/models_provider/impl/xf_model_provider/credential/tts/__init__.py b/apps/models_provider/impl/xf_model_provider/credential/tts/__init__.py new file mode 100644 index 00000000000..18197d857fc --- /dev/null +++ b/apps/models_provider/impl/xf_model_provider/credential/tts/__init__.py @@ -0,0 +1,11 @@ +# coding=utf-8 +""" + @project: MaxKB + @Author:niu + @file: __init__.py.py + @date:2025/12/10 14:13 + @desc: +""" +from .tts import * +from .default_tts import * +from .super_humanoid_tts import * \ No newline at end of file diff --git a/apps/models_provider/impl/xf_model_provider/credential/tts/default_tts.py b/apps/models_provider/impl/xf_model_provider/credential/tts/default_tts.py new file mode 100644 index 00000000000..7c65b18736c --- /dev/null +++ b/apps/models_provider/impl/xf_model_provider/credential/tts/default_tts.py @@ -0,0 +1,82 @@ +# coding=utf-8 +""" +讯飞 TTS 工厂类 Credential,根据 api_version 路由到具体 Credential +""" +from typing import Dict + +from django.utils.translation import gettext_lazy as _, gettext + +from common import forms +from common.exception.app_exception import AppApiException +from common.forms import BaseForm, TooltipLabel +from models_provider.base_model_provider import BaseModelCredential, ValidCode +from common.utils.logger import maxkb_logger + + +class XunFeiDefaultTTSModelCredential(BaseForm, BaseModelCredential): + """讯飞 TTS 工厂类 Credential,根据 api_version 参数路由到具体实现""" + + api_version = forms.SingleSelect( + _("API Version"), required=True, + text_field='label', + value_field='value', + default_value='online', + option_list=[ + {'label': _('Online TTS'), 'value': 'online'}, + {'label': _('Super Humanoid TTS'), 'value': 'super_humanoid'} + ]) + + spark_api_url = forms.TextInputField(_('API URL'), required=True) + spark_app_id = forms.TextInputField('APP ID', required=True) + spark_api_key = forms.PasswordInputField("API Key", required=True) + spark_api_secret = forms.PasswordInputField('API Secret', required=True) + + def is_valid(self, model_type: str, model_name, model_credential: Dict[str, object], model_params, provider, + raise_exception=False): + model_type_list = provider.get_model_type_list() + if not any(list(filter(lambda mt: mt.get('value') == model_type, model_type_list))): + raise AppApiException(ValidCode.valid_error.value, + gettext('{model_type} Model type is not supported').format(model_type=model_type)) + + api_version = model_credential.get('api_version', 'online') + + for key in ['spark_api_url', 'spark_app_id', 'spark_api_key', 'spark_api_secret']: + if key not in model_credential: + if raise_exception: + raise AppApiException(ValidCode.valid_error.value, gettext('{key} is required').format(key=key)) + else: + return False + try: + model = provider.get_model(model_type, model_name, model_credential, **model_params) + model.check_auth() + except Exception as e: + maxkb_logger.error(f'Exception: {e}', exc_info=True) + if isinstance(e, AppApiException): + raise e + if raise_exception: + raise AppApiException(ValidCode.valid_error.value, + gettext( + 'Verification failed, please check whether the parameters are correct: {error}').format( + error=str(e))) + else: + return False + return True + + def encryption_dict(self, model: Dict[str, object]): + return {**model, 'spark_api_secret': super().encryption(model.get('spark_api_secret', ''))} + + def get_model_params_setting_form(self, model_name): + # params 只包含通用参数,vcn 已在 credential 中 + return XunFeiDefaultTTSModelParams() + + +class XunFeiDefaultTTSModelParams(BaseForm): + """工厂类的参数表单,只包含通用参数""" + + speed = forms.SliderField( + TooltipLabel(_('speaking speed'), _('Speech speed, optional value: [0-100], default is 50')), + required=True, default_value=50, + _min=1, + _max=100, + _step=5, + precision=1) diff --git a/apps/models_provider/impl/xf_model_provider/credential/super_humanoid_tts.py b/apps/models_provider/impl/xf_model_provider/credential/tts/super_humanoid_tts.py similarity index 67% rename from apps/models_provider/impl/xf_model_provider/credential/super_humanoid_tts.py rename to apps/models_provider/impl/xf_model_provider/credential/tts/super_humanoid_tts.py index be0fb068f8c..741b9845489 100644 --- a/apps/models_provider/impl/xf_model_provider/credential/super_humanoid_tts.py +++ b/apps/models_provider/impl/xf_model_provider/credential/tts/super_humanoid_tts.py @@ -17,26 +17,26 @@ class XunFeiSuperHumanoidTTSModelParams(BaseForm): vcn = forms.SingleSelect( TooltipLabel(_('Speaker'), _('Speaker selection for super-humanoid TTS service')), required=True, default_value='x5_lingxiaoxuan_flow', - text_field='value', + text_field='label', value_field='value', option_list=[ - {'text': _('Super-humanoid: Lingxiaoxuan Flow'), 'value': 'x5_lingxiaoxuan_flow'}, - {'text': _('Super-humanoid: Lingyuyan Flow'), 'value': 'x5_lingyuyan_flow'}, - {'text': _('Super-humanoid: Lingfeiyi Flow'), 'value': 'x5_lingfeiyi_flow'}, - {'text': _('Super-humanoid: Lingxiaoyue Flow'), 'value': 'x5_lingxiaoyue_flow'}, - {'text': _('Super-humanoid: Sun Dasheng Flow'), 'value': 'x5_sundasheng_flow'}, - {'text': _('Super-humanoid: Lingyuzhao Flow'), 'value': 'x5_lingyuzhao_flow'}, - {'text': _('Super-humanoid: Lingxiaotang Flow'), 'value': 'x5_lingxiaotang_flow'}, - {'text': _('Super-humanoid: Lingxiaorong Flow'), 'value': 'x5_lingxiaorong_flow'}, - {'text': _('Super-humanoid: Xinyun Flow'), 'value': 'x5_xinyun_flow'}, - {'text': _('Super-humanoid: Grant (EN)'), 'value': 'x5_EnUs_Grant_flow'}, - {'text': _('Super-humanoid: Lila (EN)'), 'value': 'x5_EnUs_Lila_flow'}, - {'text': _('Super-humanoid: Lingwanwan Pro'), 'value': 'x6_lingwanwan_pro'}, - {'text': _('Super-humanoid: Yiyi Pro'), 'value': 'x6_yiyi_pro'}, - {'text': _('Super-humanoid: Huifangnv Pro'), 'value': 'x6_huifangnv_pro'}, - {'text': _('Super-humanoid: Lingxiaoying Pro'), 'value': 'x6_lingxiaoying_pro'}, - {'text': _('Super-humanoid: Lingfeibo Pro'), 'value': 'x6_lingfeibo_pro'}, - {'text': _('Super-humanoid: Lingyuyan Pro'), 'value': 'x6_lingyuyan_pro'}, + {'label': _('Super-humanoid: Lingxiaoxuan Flow'), 'value': 'x5_lingxiaoxuan_flow'}, + {'label': _('Super-humanoid: Lingyuyan Flow'), 'value': 'x5_lingyuyan_flow'}, + {'label': _('Super-humanoid: Lingfeiyi Flow'), 'value': 'x5_lingfeiyi_flow'}, + {'label': _('Super-humanoid: Lingxiaoyue Flow'), 'value': 'x5_lingxiaoyue_flow'}, + {'label': _('Super-humanoid: Sun Dasheng Flow'), 'value': 'x5_sundasheng_flow'}, + {'label': _('Super-humanoid: Lingyuzhao Flow'), 'value': 'x5_lingyuzhao_flow'}, + {'label': _('Super-humanoid: Lingxiaotang Flow'), 'value': 'x5_lingxiaotang_flow'}, + {'label': _('Super-humanoid: Lingxiaorong Flow'), 'value': 'x5_lingxiaorong_flow'}, + {'label': _('Super-humanoid: Xinyun Flow'), 'value': 'x5_xinyun_flow'}, + {'label': _('Super-humanoid: Grant (EN)'), 'value': 'x5_EnUs_Grant_flow'}, + {'label': _('Super-humanoid: Lila (EN)'), 'value': 'x5_EnUs_Lila_flow'}, + {'label': _('Super-humanoid: Lingwanwan Pro'), 'value': 'x6_lingwanwan_pro'}, + {'label': _('Super-humanoid: Yiyi Pro'), 'value': 'x6_yiyi_pro'}, + {'label': _('Super-humanoid: Huifangnv Pro'), 'value': 'x6_huifangnv_pro'}, + {'label': _('Super-humanoid: Lingxiaoying Pro'), 'value': 'x6_lingxiaoying_pro'}, + {'label': _('Super-humanoid: Lingfeibo Pro'), 'value': 'x6_lingfeibo_pro'}, + {'label': _('Super-humanoid: Lingyuyan Pro'), 'value': 'x6_lingyuyan_pro'}, ]) speed = forms.SliderField( diff --git a/apps/models_provider/impl/xf_model_provider/credential/tts.py b/apps/models_provider/impl/xf_model_provider/credential/tts/tts.py similarity index 100% rename from apps/models_provider/impl/xf_model_provider/credential/tts.py rename to apps/models_provider/impl/xf_model_provider/credential/tts/tts.py diff --git a/apps/models_provider/impl/xf_model_provider/model/default_tts.py b/apps/models_provider/impl/xf_model_provider/model/default_tts.py deleted file mode 100644 index 9493264c7cf..00000000000 --- a/apps/models_provider/impl/xf_model_provider/model/default_tts.py +++ /dev/null @@ -1,58 +0,0 @@ -# coding=utf-8 -""" - @project: MaxKB - @Author: - @file: default_tts.py - @date:2025/12/9 - @desc: 讯飞 TTS 工厂类,根据 api_version 路由到具体实现 -""" -from typing import Dict - -from models_provider.base_model_provider import MaxKBBaseModel -from models_provider.impl.base_tts import BaseTextToSpeech - - -class XFSparkDefaultTextToSpeech(MaxKBBaseModel, BaseTextToSpeech): - """讯飞 TTS 工厂类,根据 api_version 参数路由到具体实现""" - - def check_auth(self): - pass - - def text_to_speech(self, text): - pass - - @staticmethod - def is_cache_model(): - return False - - @staticmethod - def new_instance(model_type, model_name, model_credential: Dict[str, object], **model_kwargs): - from models_provider.impl.xf_model_provider.model.tts import XFSparkTextToSpeech - from models_provider.impl.xf_model_provider.model.super_humanoid_tts import XFSparkSuperHumanoidTextToSpeech - - api_version = model_credential.get('api_version', 'online') - - if api_version == 'super_humanoid': - # 超拟人:从 credential 获取 vcn_super,构造统一的 credential 格式 - vcn = model_credential.get('vcn_super', 'x5_lingxiaoxuan_flow') - unified_credential = { - 'spark_app_id': model_credential.get('spark_app_id'), - 'spark_api_key': model_credential.get('spark_api_key'), - 'spark_api_secret': model_credential.get('spark_api_secret'), - 'spark_api_url': model_credential.get('spark_api_url_super'), - } - return XFSparkSuperHumanoidTextToSpeech.new_instance( - model_type, model_name, unified_credential, vcn=vcn, **model_kwargs - ) - else: - # 在线语音:从 credential 获取 vcn_online - vcn = model_credential.get('vcn_online', 'xiaoyan') - unified_credential = { - 'spark_app_id': model_credential.get('spark_app_id'), - 'spark_api_key': model_credential.get('spark_api_key'), - 'spark_api_secret': model_credential.get('spark_api_secret'), - 'spark_api_url': model_credential.get('spark_api_url'), - } - return XFSparkTextToSpeech.new_instance( - model_type, model_name, unified_credential, vcn=vcn, **model_kwargs - ) diff --git a/apps/models_provider/impl/xf_model_provider/model/tts/__init__.py b/apps/models_provider/impl/xf_model_provider/model/tts/__init__.py new file mode 100644 index 00000000000..ee1091982c2 --- /dev/null +++ b/apps/models_provider/impl/xf_model_provider/model/tts/__init__.py @@ -0,0 +1,11 @@ +# coding=utf-8 +""" + @project: MaxKB + @Author:niu + @file: __init__.py.py + @date:2025/12/10 14:14 + @desc: +""" +from .super_humanoid_tts import * +from .tts import * +from .default_tts import * \ No newline at end of file diff --git a/apps/models_provider/impl/xf_model_provider/model/tts/default_tts.py b/apps/models_provider/impl/xf_model_provider/model/tts/default_tts.py new file mode 100644 index 00000000000..3b6439dc9fc --- /dev/null +++ b/apps/models_provider/impl/xf_model_provider/model/tts/default_tts.py @@ -0,0 +1,54 @@ +# coding=utf-8 +""" + @project: MaxKB + @Author: + @file: default_tts.py + @date:2025/12/9 + @desc: 讯飞 TTS 工厂类,根据 api_version 路由到具体实现 +""" +from typing import Dict + +from models_provider.base_model_provider import MaxKBBaseModel +from models_provider.impl.base_tts import BaseTextToSpeech + + +class XFSparkDefaultTextToSpeech(MaxKBBaseModel, BaseTextToSpeech): + """讯飞 TTS 工厂类,根据 api_version 参数路由到具体实现""" + + def check_auth(self): + pass + + def text_to_speech(self, text): + pass + + @staticmethod + def is_cache_model(): + return False + + @staticmethod + def new_instance(model_type, model_name, model_credential: Dict[str, object], **model_kwargs): + from models_provider.impl.xf_model_provider.model.tts import XFSparkTextToSpeech + from models_provider.impl.xf_model_provider.model.tts.super_humanoid_tts import XFSparkSuperHumanoidTextToSpeech + + api_version = model_credential.get('api_version', 'online') + + if api_version == 'super_humanoid': + return XFSparkSuperHumanoidTextToSpeech( + spark_app_id=model_credential.get('spark_app_id'), + spark_api_key=model_credential.get('spark_api_key'), + spark_api_secret=model_credential.get('spark_api_secret'), + spark_api_url=model_credential.get('spark_api_url'), + params = model_kwargs, + **model_kwargs + ) + else: + # 在线语音:从 credential 获取 vcn_online + return XFSparkTextToSpeech( + spark_app_id=model_credential.get('spark_app_id'), + spark_api_key=model_credential.get('spark_api_key'), + spark_api_secret=model_credential.get('spark_api_secret'), + spark_api_url=model_credential.get('spark_api_url'), + params={key: v for key, v in model_kwargs.items() if + not ['parameter', 'streaming', 'model_id', 'use_local'].__contains__(key)}, + **model_kwargs + ) \ No newline at end of file diff --git a/apps/models_provider/impl/xf_model_provider/model/super_humanoid_tts.py b/apps/models_provider/impl/xf_model_provider/model/tts/super_humanoid_tts.py similarity index 89% rename from apps/models_provider/impl/xf_model_provider/model/super_humanoid_tts.py rename to apps/models_provider/impl/xf_model_provider/model/tts/super_humanoid_tts.py index 9bdec12b068..a210729d292 100644 --- a/apps/models_provider/impl/xf_model_provider/model/super_humanoid_tts.py +++ b/apps/models_provider/impl/xf_model_provider/model/tts/super_humanoid_tts.py @@ -48,20 +48,20 @@ def is_cache_model(): @staticmethod def new_instance(model_type, model_name, model_credential: Dict[str, object], **model_kwargs): - spark_api_url = model_credential.get('spark_api_url') - vcn = model_kwargs.get('vcn', 'x5_lingxiaoxuan_flow') + # vcn = model_kwargs.get('vcn', 'x5_lingxiaoxuan_flow') - params = {'vcn': vcn} + params = {} for k, v in model_kwargs.items(): - if k not in ['model_id', 'use_local', 'streaming', 'vcn']: + if k not in ['model_id', 'use_local', 'streaming']: params[k] = v return XFSparkSuperHumanoidTextToSpeech( spark_app_id=model_credential.get('spark_app_id'), spark_api_key=model_credential.get('spark_api_key'), spark_api_secret=model_credential.get('spark_api_secret'), - spark_api_url=spark_api_url, - params=params + spark_api_url=model_credential.get('spark_api_url'), + params=params, + **model_kwargs ) def create_url(self): @@ -154,11 +154,6 @@ async def handle_message(ws): return audio_bytes async def send(self, ws, text): - vcn_value = self.params.get("vcn", "x5_lingxiaoxuan_flow") - - # 确保 vcn 值符合超拟人格式 - if not vcn_value or not (str(vcn_value).startswith('x5_') or str(vcn_value).startswith('x6_')): - vcn_value = 'x5_lingxiaoxuan_flow' audio_params = { "encoding": self.params.get("encoding", "lame"), @@ -169,7 +164,9 @@ async def send(self, ws, text): } tts_params = { - "vcn": vcn_value, + **{key: v for key, v in self.params.items() if + not ['parameter', 'streaming', 'model_id', 'use_local'].__contains__(key)}, + "vcn": self.params.get("vcn") or "x5_lingxiaoxuan_flow", "audio": audio_params, "volume": self.params.get("volume", 50), "speed": self.params.get("speed", 50), @@ -185,11 +182,14 @@ async def send(self, ws, text): "seq": 0, "text": encoded_text } + s = {"tts": tts_params} + # "parameter": {"oar":"xxxx"} + parameter = self.params.get("parameter") or {} d = { "header": {"app_id": self.spark_app_id, "status": 2}, - "parameter": {"tts": tts_params}, + "parameter": {"tts": tts_params} | parameter, "payload": {"text": payload_text_obj} } - await ws.send(json.dumps(d)) \ No newline at end of file + await ws.send(json.dumps(d)) diff --git a/apps/models_provider/impl/xf_model_provider/model/tts.py b/apps/models_provider/impl/xf_model_provider/model/tts/tts.py similarity index 100% rename from apps/models_provider/impl/xf_model_provider/model/tts.py rename to apps/models_provider/impl/xf_model_provider/model/tts/tts.py diff --git a/apps/models_provider/impl/xf_model_provider/xf_model_provider.py b/apps/models_provider/impl/xf_model_provider/xf_model_provider.py index 54ac9307da8..0876ca3f6a3 100644 --- a/apps/models_provider/impl/xf_model_provider/xf_model_provider.py +++ b/apps/models_provider/impl/xf_model_provider/xf_model_provider.py @@ -17,16 +17,15 @@ from models_provider.impl.xf_model_provider.credential.llm import XunFeiLLMModelCredential from models_provider.impl.xf_model_provider.credential.stt import XunFeiSTTModelCredential from models_provider.impl.xf_model_provider.credential.tts import XunFeiTTSModelCredential -from models_provider.impl.xf_model_provider.credential.super_humanoid_tts import XunFeiSuperHumanoidTTSModelCredential -from models_provider.impl.xf_model_provider.credential.default_tts import XunFeiDefaultTTSModelCredential +from models_provider.impl.xf_model_provider.credential.tts.super_humanoid_tts import XunFeiSuperHumanoidTTSModelCredential +from models_provider.impl.xf_model_provider.credential.tts.default_tts import XunFeiDefaultTTSModelCredential from models_provider.impl.xf_model_provider.credential.zh_en_stt import ZhEnXunFeiSTTModelCredential from models_provider.impl.xf_model_provider.model.embedding import XFEmbedding -from models_provider.impl.xf_model_provider.model.image import XFSparkImage from models_provider.impl.xf_model_provider.model.llm import XFChatSparkLLM from models_provider.impl.xf_model_provider.model.stt import XFSparkSpeechToText from models_provider.impl.xf_model_provider.model.tts import XFSparkTextToSpeech -from models_provider.impl.xf_model_provider.model.super_humanoid_tts import XFSparkSuperHumanoidTextToSpeech -from models_provider.impl.xf_model_provider.model.default_tts import XFSparkDefaultTextToSpeech +from models_provider.impl.xf_model_provider.model.tts.super_humanoid_tts import XFSparkSuperHumanoidTextToSpeech +from models_provider.impl.xf_model_provider.model.tts.default_tts import XFSparkDefaultTextToSpeech from maxkb.conf import PROJECT_DIR from django.utils.translation import gettext as _