diff --git a/python/dify_plugin/entities/model/__init__.py b/python/dify_plugin/entities/model/__init__.py index c07eeece..190d2a05 100644 --- a/python/dify_plugin/entities/model/__init__.py +++ b/python/dify_plugin/entities/model/__init__.py @@ -22,6 +22,7 @@ class DefaultParameterName(Enum): PRESENCE_PENALTY = "presence_penalty" FREQUENCY_PENALTY = "frequency_penalty" MAX_TOKENS = "max_tokens" + MAX_COMPLETION_TOKENS = "max_completion_tokens" RESPONSE_FORMAT = "response_format" JSON_SCHEMA = "json_schema" @@ -139,6 +140,23 @@ def value_of(cls, value: Any) -> "DefaultParameterName": "max": 2048, "precision": 0, }, + DefaultParameterName.MAX_COMPLETION_TOKENS: { + "label": { + "en_US": "Max Completion Tokens", + "zh_Hans": "最大完成标记", + }, + "type": "int", + "help": { + "en_US": "Specifies the upper limit on the length of generated results. " + "If the generated results are truncated, you can increase this parameter.", + "zh_Hans": "指定生成结果长度的上限。如果生成结果截断,可以调大该参数。", + }, + "required": False, + "default": 64, + "min": 1, + "max": 2048, + "precision": 0, + }, DefaultParameterName.RESPONSE_FORMAT: { "label": { "en_US": "Response Format", diff --git a/python/dify_plugin/interfaces/model/openai_compatible/llm.py b/python/dify_plugin/interfaces/model/openai_compatible/llm.py index e26c7464..cab9cc33 100644 --- a/python/dify_plugin/interfaces/model/openai_compatible/llm.py +++ b/python/dify_plugin/interfaces/model/openai_compatible/llm.py @@ -180,7 +180,11 @@ def validate_credentials(self, model: str, credentials: dict) -> None: endpoint_url += "/" # prepare the payload for a simple ping to the model - data = {"model": credentials.get("endpoint_model_name", model), "max_tokens": 5} + if credentials.get("reasoning_thought_support") == "supported": + # for reasoning thought support, they use max_completion_tokens + data = {"model": credentials.get("endpoint_model_name", model), "max_completion_tokens": 5} + else: + data = {"model": credentials.get("endpoint_model_name", model), "max_tokens": 5} completion_type = LLMMode.value_of(credentials["mode"]) @@ -256,6 +260,14 @@ def get_customizable_model_schema(self, model: str, credentials: dict) -> AIMode """ features = [] + # for reasoning thought support, they use max_completion_tokens + if credentials.get("reasoning_thought_support") == "supported": + max_token_param_name = DefaultParameterName.MAX_COMPLETION_TOKENS.value + max_token_param_label = "Max Completion Tokens" + else: + max_token_param_name = DefaultParameterName.MAX_TOKENS.value + max_token_param_label = "Max Tokens" + function_calling_type = credentials.get("function_calling_type", "no_call") if function_calling_type == "function_call": features.append(ModelFeature.TOOL_CALL) @@ -338,8 +350,8 @@ def get_customizable_model_schema(self, model: str, credentials: dict) -> AIMode max=2, ), ParameterRule( - name=DefaultParameterName.MAX_TOKENS.value, - label=I18nObject(en_US="Max Tokens", zh_Hans="最大标记"), + name=max_token_param_name, + label=I18nObject(en_US=max_token_param_label, zh_Hans="最大标记"), help=I18nObject( en_US="Maximum length of tokens for the model response.", zh_Hans="模型回答的tokens的最大长度。",