-
Notifications
You must be signed in to change notification settings - Fork 1
Extended thinking support for bedrock models #56
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: labs-release
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -5,6 +5,7 @@ | |
| from pydantic import BaseModel | ||
| from langchain_core.messages import BaseMessage, BaseMessageChunk | ||
|
|
||
| from fmcore.aws.constants.aws_constants import EXTENDED_THINKING_MODELS | ||
| from fmcore.aws.factory.bedrock_factory import BedrockFactory | ||
| from fmcore.llm.base_llm import BaseLLM | ||
| from fmcore.llm.types.llm_types import LLMConfig | ||
|
|
@@ -30,6 +31,45 @@ class BedrockLLM(BaseLLM[List[BaseMessage], BaseMessage, BaseMessageChunk], Base | |
| client: ChatBedrockConverse | ||
| rate_limiter: AsyncLimiter | ||
|
|
||
| @classmethod | ||
| def _validate_bedrock_params(cls, llm_config: LLMConfig) -> None: | ||
| """ | ||
| Validates and adjusts thinking-related parameters for Bedrock models. | ||
|
|
||
| Args: | ||
| llm_config: The LLM configuration to validate | ||
|
|
||
| Raises: | ||
| ValueError: If token budget constraints are violated | ||
| """ | ||
| model_params = llm_config.model_params | ||
|
|
||
| if llm_config.model_id in EXTENDED_THINKING_MODELS: | ||
| thinking_params = {} | ||
|
|
||
| if (model_params.additional_model_request_fields is not None and | ||
| "thinking" in model_params.additional_model_request_fields): | ||
| thinking_params = model_params.additional_model_request_fields["thinking"] | ||
|
|
||
| if "type" in thinking_params: | ||
| if thinking_params["type"] == "enabled": | ||
| # Fix temperature and unset top p for thinking mode | ||
| model_params.temperature = 1.0 | ||
| model_params.top_p = None | ||
|
|
||
| # Validate token budgets | ||
| budget_tokens = thinking_params.get("budget_tokens", 0) | ||
| if budget_tokens < 1024: | ||
| raise ValueError("Budget tokens must be greater than 1024") | ||
| if budget_tokens > model_params.max_tokens: | ||
| raise ValueError("Max tokens must be greater than budget tokens") | ||
| elif "budget_tokens" in thinking_params: | ||
| # Remove budget tokens if thinking is disabled | ||
| model_params.additional_model_request_fields["thinking"].pop("budget_tokens") | ||
| else: | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should we avoid this else block? If a new bedrock model is launched that supports streaming, then you'd be removing the thinking params passed in the llm-config until Not having this allows the values to pass through to converse-client creation, and if its an error on the application side if they send these params when they were not supposed to be, then the client creation will just fail - as expected |
||
| # Clear additional fields for non-thinking models | ||
| model_params.additional_model_request_fields = None | ||
|
|
||
| @classmethod | ||
| def _get_instance(cls, *, llm_config: LLMConfig) -> "BedrockLLM": | ||
| """ | ||
|
|
@@ -41,6 +81,7 @@ def _get_instance(cls, *, llm_config: LLMConfig) -> "BedrockLLM": | |
| Args: | ||
| llm_config (SingleLLMConfig): Contains model_id, model_params, and provider_params. | ||
| """ | ||
| cls._validate_bedrock_params(llm_config) | ||
| converse_client = BedrockFactory.create_converse_client(llm_config=llm_config) | ||
| rate_limiter = RateLimiterUtils.create_async_rate_limiter( | ||
| rate_limit_config=llm_config.provider_params.rate_limit | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This transformation can be skipped - it'd silently update the values for compliance and can lead to unexpected results. If the input config is not supported, the user should be the one to fix it