diff --git a/apps/setting/models_provider/constants/model_provider_constants.py b/apps/setting/models_provider/constants/model_provider_constants.py index e6bf698b01a..e4837e2f03a 100644 --- a/apps/setting/models_provider/constants/model_provider_constants.py +++ b/apps/setting/models_provider/constants/model_provider_constants.py @@ -23,6 +23,7 @@ SiliconCloudModelProvider from setting.models_provider.impl.tencent_cloud_model_provider.tencent_cloud_model_provider import \ TencentCloudModelProvider +from setting.models_provider.impl.coreshub_model_provider.coreshub_model_provider import CoresHubModelProvider from setting.models_provider.impl.tencent_model_provider.tencent_model_provider import TencentModelProvider from setting.models_provider.impl.vllm_model_provider.vllm_model_provider import VllmModelProvider from setting.models_provider.impl.volcanic_engine_model_provider.volcanic_engine_model_provider import \ @@ -55,3 +56,4 @@ class ModelProvideConstants(Enum): aliyun_bai_lian_model_provider = AliyunBaiLianModelProvider() model_anthropic_provider = AnthropicModelProvider() model_siliconCloud_provider = SiliconCloudModelProvider() + model_coreshub_provider = CoresHubModelProvider() diff --git a/apps/setting/models_provider/impl/coreshub_model_provider/__init__.py b/apps/setting/models_provider/impl/coreshub_model_provider/__init__.py new file mode 100644 index 00000000000..474f083a247 --- /dev/null +++ b/apps/setting/models_provider/impl/coreshub_model_provider/__init__.py @@ -0,0 +1,8 @@ +# coding=utf-8 +""" + @project: MaxKB + @Author:Qingyang + @file: __init__.py.py + @date:2025/2/11 11:11 + @desc: +""" \ No newline at end of file diff --git a/apps/setting/models_provider/impl/coreshub_model_provider/coreshub_model_provider.py b/apps/setting/models_provider/impl/coreshub_model_provider/coreshub_model_provider.py new file mode 100644 index 00000000000..1e244349d93 --- /dev/null +++ b/apps/setting/models_provider/impl/coreshub_model_provider/coreshub_model_provider.py @@ -0,0 +1,103 @@ +# coding=utf-8 +""" + @project: maxkb + @Author:Qingyang + @file: coreshub_model_provider.py + @date:2025/2/11 12:11 + @desc: +""" +import os + +from common.util.file_util import get_file_content +from setting.models_provider.base_model_provider import IModelProvider, ModelProvideInfo, ModelInfo, \ + ModelTypeConst, ModelInfoManage +from setting.models_provider.impl.coreshub_model_provider.credential.embedding import CoresHubEmbeddingCredential +from setting.models_provider.impl.coreshub_model_provider.credential.llm import CoresHubLLMModelCredential +from setting.models_provider.impl.coreshub_model_provider.model.llm import CoresHubChatModel +from setting.models_provider.impl.coreshub_model_provider.model.embedding import CoresHubEmbeddingModel +from setting.models_provider.impl.coreshub_model_provider.credential.tts import CoresHubTTSModelCredential +from setting.models_provider.impl.coreshub_model_provider.credential.stt import CoresHubSTTModelCredential +from setting.models_provider.impl.coreshub_model_provider.model.stt import CoresHubSpeechToText +from setting.models_provider.impl.coreshub_model_provider.model.tts import CoresHubTextToSpeech +from smartdoc.conf import PROJECT_DIR +from django.utils.translation import gettext as _ + +coreshub_llm_model_credential = CoresHubLLMModelCredential() +model_info_list = [ + ModelInfo('DeepSeek-V3', '', ModelTypeConst.LLM, + coreshub_llm_model_credential, CoresHubChatModel + ), + ModelInfo('DeepSeek-R1', '', ModelTypeConst.LLM, + coreshub_llm_model_credential, + CoresHubChatModel), + ModelInfo('DeepSeek-R1-Distill-Llama-70B', '', + ModelTypeConst.LLM, coreshub_llm_model_credential, + CoresHubChatModel), + ModelInfo('DeepSeek-R1-Distill-Qwen-32B', + '', + ModelTypeConst.LLM, coreshub_llm_model_credential, + CoresHubChatModel), + ModelInfo('DeepSeek-R1-Distill-Llama-70B', '', + ModelTypeConst.LLM, coreshub_llm_model_credential, + CoresHubChatModel), + ModelInfo('DeepSeek-R1-Distill-Qwen-14B', '', + ModelTypeConst.LLM, coreshub_llm_model_credential, + CoresHubChatModel), + ModelInfo('DeepSeek-R1-Distill-Llama-8B', '', + ModelTypeConst.LLM, coreshub_llm_model_credential, + CoresHubChatModel), + ModelInfo('DeepSeek-R1-Distill-Qwen-7B', '', + ModelTypeConst.LLM, coreshub_llm_model_credential, + CoresHubChatModel), + ModelInfo('DeepSeek-R1-Distill-Qwen-1.5B', '', + ModelTypeConst.LLM, coreshub_llm_model_credential, + CoresHubChatModel), + ModelInfo('Qwen2-0.5B-Instruct', '', + ModelTypeConst.LLM, coreshub_llm_model_credential, + CoresHubChatModel) +] + +coreshub_embedding_credential = CoresHubEmbeddingCredential() +model_info_embedding_list = [ + ModelInfo('bce-embedding-base_v1', '', + ModelTypeConst.EMBEDDING, coreshub_embedding_credential, + CoresHubEmbeddingModel) +] + +coreshub_stt_model_credential = CoresHubSTTModelCredential() +model_info_stt_list = [ + ModelInfo('SenseVoiceSmall', '', + ModelTypeConst.STT, coreshub_stt_model_credential, + CoresHubSpeechToText) +] + +coreshub_tts_model_credential = CoresHubTTSModelCredential() +model_info_tts_list = [ + ModelInfo('CosyVoice-300M', '', + ModelTypeConst.TTS, coreshub_tts_model_credential, + CoresHubTextToSpeech) +] + +model_info_manage = ( + ModelInfoManage.builder() + .append_model_info_list(model_info_list) + .append_default_model_info(model_info_list[0]) + .append_model_info_list(model_info_embedding_list) + .append_default_model_info(model_info_embedding_list[0]) + .append_model_info_list(model_info_stt_list) + .append_default_model_info(model_info_stt_list[0]) + .append_model_info_list(model_info_tts_list) + .append_default_model_info(model_info_tts_list[0]) + .build() +) + + +class CoresHubModelProvider(IModelProvider): + + def get_model_info_manage(self): + return model_info_manage + + def get_model_provide_info(self): + return ModelProvideInfo(provider='model_coreshub_provider', name='CoresHub', icon=get_file_content( + os.path.join(PROJECT_DIR, "apps", "setting", 'models_provider', 'impl', 'coreshub_model_provider', 'icon', + 'coreshub_icon_svg'))) diff --git a/apps/setting/models_provider/impl/coreshub_model_provider/credential/embedding.py b/apps/setting/models_provider/impl/coreshub_model_provider/credential/embedding.py new file mode 100644 index 00000000000..aadcad77339 --- /dev/null +++ b/apps/setting/models_provider/impl/coreshub_model_provider/credential/embedding.py @@ -0,0 +1,51 @@ +# coding=utf-8 +""" + @project: MaxKB + @Author:虎 + @file: embedding.py + @date:2024/7/12 16:45 + @desc: +""" +from typing import Dict + +from django.utils.translation import gettext as _ + +from common import forms +from common.exception.app_exception import AppApiException +from common.forms import BaseForm +from setting.models_provider.base_model_provider import BaseModelCredential, ValidCode + + +class CoresHubEmbeddingCredential(BaseForm, BaseModelCredential): + def is_valid(self, model_type: str, model_name, model_credential: Dict[str, object], model_params, provider, + raise_exception=True): + model_type_list = provider.get_model_type_list() + if not any(list(filter(lambda mt: mt.get('value') == model_type, model_type_list))): + raise AppApiException(ValidCode.valid_error.value, + _('{model_type} Model type is not supported').format(model_type=model_type)) + + for key in ['api_base', 'api_key']: + if key not in model_credential: + if raise_exception: + raise AppApiException(ValidCode.valid_error.value, _('{key} is required').format(key=key)) + else: + return False + try: + model = provider.get_model(model_type, model_name, model_credential) + model.embed_query(_('Hello')) + except Exception as e: + if isinstance(e, AppApiException): + raise e + if raise_exception: + raise AppApiException(ValidCode.valid_error.value, + _('Verification failed, please check whether the parameters are correct: {error}').format( + error=str(e))) + else: + return False + return True + + def encryption_dict(self, model: Dict[str, object]): + return {**model, 'api_key': super().encryption(model.get('api_key', ''))} + + api_base = forms.TextInputField('API URL', required=True) + api_key = forms.PasswordInputField('API Key', required=True) diff --git a/apps/setting/models_provider/impl/coreshub_model_provider/credential/llm.py b/apps/setting/models_provider/impl/coreshub_model_provider/credential/llm.py new file mode 100644 index 00000000000..665bc09f375 --- /dev/null +++ b/apps/setting/models_provider/impl/coreshub_model_provider/credential/llm.py @@ -0,0 +1,77 @@ +# coding=utf-8 +""" + @project: MaxKB + @Author:Qingyang + @file: llm.py + @date:2025/2/11 11:32 + @desc: +""" +from typing import Dict + +from django.utils.translation import gettext_lazy as _, gettext +from langchain_core.messages import HumanMessage + +from common import forms +from common.exception.app_exception import AppApiException +from common.forms import BaseForm, TooltipLabel +from setting.models_provider.base_model_provider import BaseModelCredential, ValidCode + + +class CoresHubLLMModelParams(BaseForm): + temperature = forms.SliderField(TooltipLabel(_('Temperature'), + _('Higher values make the output more random, while lower values make it more focused and deterministic')), + required=True, default_value=0.7, + _min=0.1, + _max=1.0, + _step=0.01, + precision=2) + + max_tokens = forms.SliderField( + TooltipLabel(_('Output the maximum Tokens'), + _('Specify the maximum number of tokens that the model can generate')), + required=True, default_value=800, + _min=1, + _max=100000, + _step=1, + precision=0) + + +class CoresHubLLMModelCredential(BaseForm, BaseModelCredential): + + def is_valid(self, model_type: str, model_name, model_credential: Dict[str, object], model_params, provider, + raise_exception=False): + model_type_list = provider.get_model_type_list() + if not any(list(filter(lambda mt: mt.get('value') == model_type, model_type_list))): + raise AppApiException(ValidCode.valid_error.value, + gettext('{model_type} Model type is not supported').format(model_type=model_type)) + + for key in ['api_base', 'api_key']: + if key not in model_credential: + if raise_exception: + raise AppApiException(ValidCode.valid_error.value, gettext('{key} is required').format(key=key)) + else: + return False + try: + + model = provider.get_model(model_type, model_name, model_credential, **model_params) + model.invoke([HumanMessage(content=gettext('Hello'))]) + except Exception as e: + if isinstance(e, AppApiException): + raise e + if raise_exception: + raise AppApiException(ValidCode.valid_error.value, + gettext( + 'Verification failed, please check whether the parameters are correct: {error}').format( + error=str(e))) + else: + return False + return True + + def encryption_dict(self, model: Dict[str, object]): + return {**model, 'api_key': super().encryption(model.get('api_key', ''))} + + api_base = forms.TextInputField('API URL', required=True) + api_key = forms.PasswordInputField('API Key', required=True) + + def get_model_params_setting_form(self, model_name): + return CoresHubLLMModelParams() diff --git a/apps/setting/models_provider/impl/coreshub_model_provider/credential/stt.py b/apps/setting/models_provider/impl/coreshub_model_provider/credential/stt.py new file mode 100644 index 00000000000..eeb81fea7f2 --- /dev/null +++ b/apps/setting/models_provider/impl/coreshub_model_provider/credential/stt.py @@ -0,0 +1,47 @@ +# coding=utf-8 +from typing import Dict + +from django.utils.translation import gettext as _ + +from common import forms +from common.exception.app_exception import AppApiException +from common.forms import BaseForm +from setting.models_provider.base_model_provider import BaseModelCredential, ValidCode + + +class CoresHubSTTModelCredential(BaseForm, BaseModelCredential): + api_base = forms.TextInputField('API URL', required=True) + api_key = forms.PasswordInputField('API Key', required=True) + + def is_valid(self, model_type: str, model_name, model_credential: Dict[str, object], model_params, provider, + raise_exception=False): + model_type_list = provider.get_model_type_list() + if not any(list(filter(lambda mt: mt.get('value') == model_type, model_type_list))): + raise AppApiException(ValidCode.valid_error.value, + _('{model_type} Model type is not supported').format(model_type=model_type)) + + for key in ['api_base', 'api_key']: + if key not in model_credential: + if raise_exception: + raise AppApiException(ValidCode.valid_error.value, _('{key} is required').format(key=key)) + else: + return False + try: + model = provider.get_model(model_type, model_name, model_credential) + model.check_auth() + except Exception as e: + if isinstance(e, AppApiException): + raise e + if raise_exception: + raise AppApiException(ValidCode.valid_error.value, + _('Verification failed, please check whether the parameters are correct: {error}').format( + error=str(e))) + else: + return False + return True + + def encryption_dict(self, model: Dict[str, object]): + return {**model, 'api_key': super().encryption(model.get('api_key', ''))} + + def get_model_params_setting_form(self, model_name): + pass diff --git a/apps/setting/models_provider/impl/coreshub_model_provider/credential/tts.py b/apps/setting/models_provider/impl/coreshub_model_provider/credential/tts.py new file mode 100644 index 00000000000..a1241e1d852 --- /dev/null +++ b/apps/setting/models_provider/impl/coreshub_model_provider/credential/tts.py @@ -0,0 +1,66 @@ +# coding=utf-8 +from typing import Dict + +from django.utils.translation import gettext_lazy as _, gettext + +from common import forms +from common.exception.app_exception import AppApiException +from common.forms import BaseForm, TooltipLabel +from setting.models_provider.base_model_provider import BaseModelCredential, ValidCode + + +class CoresHubTTSModelGeneralParams(BaseForm): + # alloy, echo, fable, onyx, nova, shimmer + voice = forms.SingleSelect( + TooltipLabel('Voice', + _('Try out the different sounds (Alloy, Echo, Fable, Onyx, Nova, and Sparkle) to find one that suits your desired tone and audience. The current voiceover is optimized for English.')), + required=True, default_value='alloy', + text_field='value', + value_field='value', + option_list=[ + {'text': 'alloy', 'value': 'alloy'}, + {'text': 'echo', 'value': 'echo'}, + {'text': 'fable', 'value': 'fable'}, + {'text': 'onyx', 'value': 'onyx'}, + {'text': 'nova', 'value': 'nova'}, + {'text': 'shimmer', 'value': 'shimmer'}, + ]) + + +class CoresHubTTSModelCredential(BaseForm, BaseModelCredential): + api_base = forms.TextInputField('API URL', required=True) + api_key = forms.PasswordInputField('API Key', required=True) + + def is_valid(self, model_type: str, model_name, model_credential: Dict[str, object], model_params, provider, + raise_exception=False): + model_type_list = provider.get_model_type_list() + if not any(list(filter(lambda mt: mt.get('value') == model_type, model_type_list))): + raise AppApiException(ValidCode.valid_error.value, + gettext('{model_type} Model type is not supported').format(model_type=model_type)) + + for key in ['api_base', 'api_key']: + if key not in model_credential: + if raise_exception: + raise AppApiException(ValidCode.valid_error.value, gettext('{key} is required').format(key=key)) + else: + return False + try: + model = provider.get_model(model_type, model_name, model_credential, **model_params) + model.check_auth() + except Exception as e: + if isinstance(e, AppApiException): + raise e + if raise_exception: + raise AppApiException(ValidCode.valid_error.value, + gettext( + 'Verification failed, please check whether the parameters are correct: {error}').format( + error=str(e))) + else: + return False + return True + + def encryption_dict(self, model: Dict[str, object]): + return {**model, 'api_key': super().encryption(model.get('api_key', ''))} + + def get_model_params_setting_form(self, model_name): + return CoresHubTTSModelGeneralParams() diff --git a/apps/setting/models_provider/impl/coreshub_model_provider/model/embedding.py b/apps/setting/models_provider/impl/coreshub_model_provider/model/embedding.py new file mode 100644 index 00000000000..fdc770e1008 --- /dev/null +++ b/apps/setting/models_provider/impl/coreshub_model_provider/model/embedding.py @@ -0,0 +1,23 @@ +# coding=utf-8 +""" + @project: MaxKB + @Author:虎 + @file: embedding.py + @date:2024/7/12 17:44 + @desc: +""" +from typing import Dict + +from langchain_community.embeddings import OpenAIEmbeddings + +from setting.models_provider.base_model_provider import MaxKBBaseModel + + +class CoresHubEmbeddingModel(MaxKBBaseModel, OpenAIEmbeddings): + @staticmethod + def new_instance(model_type, model_name, model_credential: Dict[str, object], **model_kwargs): + return CoresHubEmbeddingModel( + api_key=model_credential.get('api_key'), + model=model_name, + openai_api_base=model_credential.get('api_base'), + ) diff --git a/apps/setting/models_provider/impl/coreshub_model_provider/model/llm.py b/apps/setting/models_provider/impl/coreshub_model_provider/model/llm.py new file mode 100644 index 00000000000..cb4d836b37f --- /dev/null +++ b/apps/setting/models_provider/impl/coreshub_model_provider/model/llm.py @@ -0,0 +1,35 @@ +# coding=utf-8 +""" + @project: MaxKB + @Author:Qingyang + @file: llm.py + @date:2025/2/11 11:32 + @desc: +""" +from typing import List, Dict + +from common.config.tokenizer_manage_config import TokenizerManage +from setting.models_provider.base_model_provider import MaxKBBaseModel +from setting.models_provider.impl.base_chat_open_ai import BaseChatOpenAI + + +def custom_get_token_ids(text: str): + tokenizer = TokenizerManage.get_tokenizer() + return tokenizer.encode(text) + + +class CoresHubChatModel(MaxKBBaseModel, BaseChatOpenAI): + + @staticmethod + def is_cache_model(): + return False + + @staticmethod + def new_instance(model_type, model_name, model_credential: Dict[str, object], **model_kwargs): + optional_params = MaxKBBaseModel.filter_optional_params(model_kwargs) + return CoresHubChatModel( + model=model_name, + openai_api_base=model_credential.get('api_base'), + openai_api_key=model_credential.get('api_key'), + **optional_params + ) diff --git a/apps/setting/models_provider/impl/coreshub_model_provider/model/stt.py b/apps/setting/models_provider/impl/coreshub_model_provider/model/stt.py new file mode 100644 index 00000000000..51288196aee --- /dev/null +++ b/apps/setting/models_provider/impl/coreshub_model_provider/model/stt.py @@ -0,0 +1,59 @@ +import asyncio +import io +from typing import Dict + +from openai import OpenAI + +from common.config.tokenizer_manage_config import TokenizerManage +from setting.models_provider.base_model_provider import MaxKBBaseModel +from setting.models_provider.impl.base_stt import BaseSpeechToText + + +def custom_get_token_ids(text: str): + tokenizer = TokenizerManage.get_tokenizer() + return tokenizer.encode(text) + + +class CoresHubSpeechToText(MaxKBBaseModel, BaseSpeechToText): + api_base: str + api_key: str + model: str + + def __init__(self, **kwargs): + super().__init__(**kwargs) + self.api_key = kwargs.get('api_key') + self.api_base = kwargs.get('api_base') + + @staticmethod + def new_instance(model_type, model_name, model_credential: Dict[str, object], **model_kwargs): + optional_params = {} + if 'max_tokens' in model_kwargs and model_kwargs['max_tokens'] is not None: + optional_params['max_tokens'] = model_kwargs['max_tokens'] + if 'temperature' in model_kwargs and model_kwargs['temperature'] is not None: + optional_params['temperature'] = model_kwargs['temperature'] + return CoresHubSpeechToText( + model=model_name, + api_base=model_credential.get('api_base'), + api_key=model_credential.get('api_key'), + **optional_params, + ) + + def check_auth(self): + client = OpenAI( + base_url=self.api_base, + api_key=self.api_key + ) + response_list = client.models.with_raw_response.list() + # print(response_list) + + def speech_to_text(self, audio_file): + client = OpenAI( + base_url=self.api_base, + api_key=self.api_key + ) + audio_data = audio_file.read() + buffer = io.BytesIO(audio_data) + buffer.name = "file.mp3" # this is the important line + res = client.audio.transcriptions.create(model=self.model, language="zh", file=buffer) + return res.text + diff --git a/apps/setting/models_provider/impl/coreshub_model_provider/model/tts.py b/apps/setting/models_provider/impl/coreshub_model_provider/model/tts.py new file mode 100644 index 00000000000..85850ea5571 --- /dev/null +++ b/apps/setting/models_provider/impl/coreshub_model_provider/model/tts.py @@ -0,0 +1,64 @@ +from typing import Dict + +from openai import OpenAI + +from common.config.tokenizer_manage_config import TokenizerManage +from common.util.common import _remove_empty_lines +from setting.models_provider.base_model_provider import MaxKBBaseModel +from setting.models_provider.impl.base_tts import BaseTextToSpeech + + +def custom_get_token_ids(text: str): + tokenizer = TokenizerManage.get_tokenizer() + return tokenizer.encode(text) + + +class CoresHubTextToSpeech(MaxKBBaseModel, BaseTextToSpeech): + api_base: str + api_key: str + model: str + params: dict + + def __init__(self, **kwargs): + super().__init__(**kwargs) + self.api_key = kwargs.get('api_key') + self.api_base = kwargs.get('api_base') + self.model = kwargs.get('model') + self.params = kwargs.get('params') + + @staticmethod + def new_instance(model_type, model_name, model_credential: Dict[str, object], **model_kwargs): + optional_params = {'params': {'voice': 'alloy'}} + for key, value in model_kwargs.items(): + if key not in ['model_id', 'use_local', 'streaming']: + optional_params['params'][key] = value + return CoresHubTextToSpeech( + model=model_name, + api_base=model_credential.get('api_base'), + api_key=model_credential.get('api_key'), + **optional_params, + ) + + def check_auth(self): + client = OpenAI( + base_url=self.api_base, + api_key=self.api_key + ) + response_list = client.models.with_raw_response.list() + # print(response_list) + + def text_to_speech(self, text): + client = OpenAI( + base_url=self.api_base, + api_key=self.api_key + ) + text = _remove_empty_lines(text) + with client.audio.speech.with_streaming_response.create( + model=self.model, + input=text, + **self.params + ) as response: + return response.read() + + def is_cache_model(self): + return False