diff --git a/models/siliconflow/models/llm/deepseek-v3.2.yaml b/models/siliconflow/models/llm/deepseek-v3.2.yaml index 7ae9acdfa..2febba25b 100644 --- a/models/siliconflow/models/llm/deepseek-v3.2.yaml +++ b/models/siliconflow/models/llm/deepseek-v3.2.yaml @@ -6,12 +6,25 @@ features: - agent-thought - tool-call - stream-tool-call + - multi-tool-call + - document + - structured-output model_properties: mode: chat context_size: 163840 parameter_rules: - name: temperature - use_template: temperature + type: float + required: true + default: 1 + min: 0 + max: 2 + label: + en_US: Temperature + zh_Hans: 温度 + help: + en_US: For Gemini 3, best results at default 1.0. Lower values may impact reasoning. + zh_Hans: 对于 Gemini 3,使用默认值 1.0 可获得最佳效果。数值过低可能会影响推理能力。 - name: max_tokens use_template: max_tokens type: int @@ -21,19 +34,6 @@ parameter_rules: help: zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。 en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter. - - name: top_p - use_template: top_p - - name: top_k - label: - zh_Hans: 取样数量 - en_US: Top k - type: int - help: - zh_Hans: 仅从每个后续标记的前 K 个选项中采样。 - en_US: Only sample from the top K options for each subsequent token. - required: false - - name: frequency_penalty - use_template: frequency_penalty - name: response_format label: zh_Hans: 回复格式 diff --git a/models/siliconflow/models/llm/llm.py b/models/siliconflow/models/llm/llm.py index 2834da041..7ba8a2f8c 100644 --- a/models/siliconflow/models/llm/llm.py +++ b/models/siliconflow/models/llm/llm.py @@ -10,11 +10,50 @@ ModelType, ParameterRule, ParameterType, + DefaultParameterName, ) from dify_plugin.entities.model.llm import LLMMode, LLMResult from dify_plugin.entities.model.message import PromptMessage, PromptMessageTool +class RecommendModelSchema: + """ + This is not a post-patch override. + Parameters manually set by the user have a higher priority than this. + """ + + @staticmethod + def set_deepseek_v32(entity: AIModelEntity): + pending_features = [ + ModelFeature.TOOL_CALL, + ModelFeature.MULTI_TOOL_CALL, + ModelFeature.STREAM_TOOL_CALL, + ModelFeature.AGENT_THOUGHT, + ] + + # Add missing features + for feature in pending_features: + if feature not in entity.features: + entity.features.append(feature) + + # Set JSON schema to default + entity.parameter_rules.append( + ParameterRule( + name=DefaultParameterName.JSON_SCHEMA.value, + use_template=DefaultParameterName.JSON_SCHEMA.value, + ) # type: ignore + ) + + # Set temperature to 1.0 + for rule in entity.parameter_rules: + if rule.name == DefaultParameterName.TEMPERATURE.value: + rule.default = 1.0 + + @staticmethod + def patch_siliconflow_default(entity: AIModelEntity): + pass + + class SiliconflowLargeLanguageModel(OAICompatLargeLanguageModel): def _invoke( self, @@ -41,86 +80,90 @@ def validate_credentials(self, model: str, credentials: dict) -> None: def _add_custom_parameters(cls, credentials: dict) -> None: credentials["mode"] = "chat" credentials["endpoint_url"] = "https://api.siliconflow.cn/v1" - + def _add_function_call(self, model: str, credentials: dict) -> None: model_schema = self.get_model_schema(model, credentials) if model_schema and {ModelFeature.TOOL_CALL, ModelFeature.MULTI_TOOL_CALL}.intersection( model_schema.features or [] ): credentials["function_calling_type"] = "tool_call" - + def get_customizable_model_schema( self, model: str, credentials: dict ) -> Optional[AIModelEntity]: - return AIModelEntity( - model=model, - label=I18nObject(en_US=model, zh_Hans=model), - model_type=ModelType.LLM, - features=( - [ - ModelFeature.TOOL_CALL, - ModelFeature.MULTI_TOOL_CALL, - ModelFeature.STREAM_TOOL_CALL, - ] - if credentials.get("function_calling_type") == "tool_call" - else [] - ), - fetch_from=FetchFrom.CUSTOMIZABLE_MODEL, - model_properties={ - ModelPropertyKey.CONTEXT_SIZE: int( - credentials.get("context_size", 8000) - ), - ModelPropertyKey.MODE: LLMMode.CHAT.value, - }, - parameter_rules=[ - ParameterRule( - name="temperature", - use_template="temperature", - label=I18nObject(en_US="Temperature", zh_Hans="温度"), - type=ParameterType.FLOAT, - ), - ParameterRule( - name="max_tokens", - use_template="max_tokens", - default=4096, - min=1, - max=int(credentials.get("max_tokens", 16384)), - label=I18nObject(en_US="Max Tokens", zh_Hans="最大标记"), - type=ParameterType.INT, - ), - ParameterRule( - name="top_p", - use_template="top_p", - label=I18nObject(en_US="Top P", zh_Hans="Top P"), - type=ParameterType.FLOAT, - ), - ParameterRule( - name="top_k", - use_template="top_k", - label=I18nObject(en_US="Top K", zh_Hans="Top K"), - type=ParameterType.FLOAT, - ), - ParameterRule( - name="frequency_penalty", - use_template="frequency_penalty", - label=I18nObject(en_US="Frequency Penalty", zh_Hans="重复惩罚"), - type=ParameterType.FLOAT, - ), - ParameterRule( - name="enable_thinking", - use_template="enable_thinking", - default=True, - label=I18nObject(en_US="Thinking mode", zh_Hans="启用思考模式"), - type=ParameterType.BOOLEAN, - ), - ParameterRule( - name="thinking_budget", - use_template="thinking_budget", - default=512, - min=1, - max=int(credentials.get("thinking_budget", 8192)), - label=I18nObject(en_US="Thinking budget", zh_Hans="思考长度限制"), - type=ParameterType.INT, - ), - ], - ) + entity = super().get_customizable_model_schema(model, credentials) + + print(entity) + if model in ["deepseek-ai/DeepSeek-V3.2", "Pro/deepseek-ai/DeepSeek-V3.2"]: + RecommendModelSchema.set_deepseek_v32(entity) + return entity + # return AIModelEntity( + # model=model, + # label=I18nObject(en_US=model, zh_Hans=model), + # model_type=ModelType.LLM, + # features=( + # [ + # ModelFeature.TOOL_CALL, + # ModelFeature.MULTI_TOOL_CALL, + # ModelFeature.STREAM_TOOL_CALL, + # ] + # if credentials.get("function_calling_type") == "tool_call" + # else [] + # ), + # fetch_from=FetchFrom.CUSTOMIZABLE_MODEL, + # model_properties={ + # ModelPropertyKey.CONTEXT_SIZE: int(credentials.get("context_size", 8000)), + # ModelPropertyKey.MODE: LLMMode.CHAT.value, + # }, + # parameter_rules=[ + # ParameterRule( + # name="temperature", + # use_template="temperature", + # label=I18nObject(en_US="Temperature", zh_Hans="温度"), + # type=ParameterType.FLOAT, + # ), + # ParameterRule( + # name="max_tokens", + # use_template="max_tokens", + # default=4096, + # min=1, + # max=int(credentials.get("max_tokens", 16384)), + # label=I18nObject(en_US="Max Tokens", zh_Hans="最大标记"), + # type=ParameterType.INT, + # ), + # ParameterRule( + # name="top_p", + # use_template="top_p", + # label=I18nObject(en_US="Top P", zh_Hans="Top P"), + # type=ParameterType.FLOAT, + # ), + # ParameterRule( + # name="top_k", + # use_template="top_k", + # label=I18nObject(en_US="Top K", zh_Hans="Top K"), + # type=ParameterType.FLOAT, + # ), + # ParameterRule( + # name="frequency_penalty", + # use_template="frequency_penalty", + # label=I18nObject(en_US="Frequency Penalty", zh_Hans="重复惩罚"), + # type=ParameterType.FLOAT, + # ), + # ParameterRule( + # name="enable_thinking", + # use_template="enable_thinking", + # default=True, + # label=I18nObject(en_US="Thinking mode", zh_Hans="启用思考模式"), + # type=ParameterType.BOOLEAN, + # ), + # ParameterRule( + # name="thinking_budget", + # use_template="thinking_budget", + # default=512, + # min=1, + # max=int(credentials.get("thinking_budget", 8192)), + # label=I18nObject(en_US="Thinking budget", zh_Hans="思考长度限制"), + # type=ParameterType.INT, + # ), + # ], + # )