BerriAI · RheagalFire · Feb 11, 2026 · Mar 16, 2026 · Mar 17, 2026 · Mar 17, 2026
diff --git a/docs/my-website/docs/providers/minimax.md b/docs/my-website/docs/providers/minimax.md
@@ -11,12 +11,16 @@ Litellm provides anthropic specs compatible support for minmax
 
 ## Supported Models
 
-MiniMax offers three models through their Anthropic-compatible API:
+MiniMax offers the following models through their Anthropic-compatible API:
 
 | Model | Description | Input Cost | Output Cost | Prompt Caching Read | Prompt Caching Write |
 |-------|-------------|------------|-------------|---------------------|----------------------|
 | **MiniMax-M2.1** | Powerful Multi-Language Programming with Enhanced Programming Experience (~60 tps) | $0.3/M tokens | $1.2/M tokens | $0.03/M tokens | $0.375/M tokens |
-| **MiniMax-M2.1-lightning** | Faster and More Agile (~100 tps) | $0.3/M tokens | $2.4/M tokens | $0.03/M tokens | $0.375/M tokens |
+| **MiniMax-M2.1-lightning** | Deprecated model name. Use `MiniMax-M2.1-highspeed` for new integrations. | $0.3/M tokens | $2.4/M tokens | $0.03/M tokens | $0.375/M tokens |
+| **MiniMax-M2.1-highspeed** | High-speed variant of MiniMax M2.1 | $0.6/M tokens | $2.4/M tokens | $0.03/M tokens | $0.375/M tokens |
+| **MiniMax-M2.5** | MiniMax M2.5 general-purpose model | $0.3/M tokens | $1.2/M tokens | $0.03/M tokens | $0.375/M tokens |
+| **MiniMax-M2.5-lightning** | Deprecated model name. Use `MiniMax-M2.5-highspeed` for new integrations. | $0.3/M tokens | $2.4/M tokens | $0.03/M tokens | $0.375/M tokens |
+| **MiniMax-M2.5-highspeed** | High-speed variant of MiniMax M2.5 | $0.6/M tokens | $2.4/M tokens | $0.03/M tokens | $0.375/M tokens |
 | **MiniMax-M2** | Agentic capabilities, Advanced reasoning | $0.3/M tokens | $1.2/M tokens | $0.03/M tokens | $0.375/M tokens |
 
 

diff --git a/docs/my-website/docs/proxy/config_settings.md b/docs/my-website/docs/proxy/config_settings.md
@@ -218,6 +218,10 @@ router_settings:
 | enforce_user_param | boolean | If true, requires all OpenAI endpoint requests to have a 'user' param. [Doc on call hooks](call_hooks)|
 | reject_clientside_metadata_tags | boolean | If true, rejects requests that contain client-side 'metadata.tags' to prevent users from influencing budgets by sending different tags. Tags can only be inherited from the API key metadata. |
 | allowed_routes | array of strings | List of allowed proxy API routes a user can access [Doc on controlling allowed routes](enterprise#control-available-public-private-routes)|
+| cors_allow_origins | Union[str, List[str]] | CORS allowlist origins for the proxy. Defaults to `["*"]` when unset. Set this to `[]` to disable CORS for all origins, or provide explicit origins to restrict access. Existing `LITELLM_CORS_*` env vars take precedence over config values. Restart the proxy after changing any CORS setting. |
+| cors_allow_credentials | boolean | Allow CORS credentials. Defaults to `false` when `cors_allow_origins` is explicitly configured and this setting is unset. Otherwise it preserves the proxy's existing default behavior. Wildcard origins or patterns disable credentials. |
+| cors_allow_methods | Union[str, List[str]] | CORS allowlist methods for the proxy. Defaults to `"*"` when unset. |
+| cors_allow_headers | Union[str, List[str]] | CORS allowlist headers for the proxy. Defaults to `"*"` when unset. |
 | key_management_system | string | Specifies the key management system. [Doc Secret Managers](../secret) |
 | master_key | string | The master key for the proxy [Set up Virtual Keys](virtual_keys) |
 | database_url | string | The URL for the database connection [Set up Virtual Keys](virtual_keys) |

diff --git a/litellm/integrations/anthropic_cache_control_hook.py b/litellm/integrations/anthropic_cache_control_hook.py
@@ -98,8 +98,31 @@ def _process_message_injection(
 
         targetted_role = point.get("role", None)
 
-        # Case 1: Target by specific index
-        if targetted_index is not None:
+        # Case 1: Target by role + index (e.g., index=-1 among assistant messages)
+        if targetted_index is not None and targetted_role is not None:
+            role_indices = [
+                i
+                for i, msg in enumerate(messages)
+                if msg.get("role") == targetted_role
+            ]
+            if role_indices:
+                try:
+                    # Negative indices handled by Python's native list indexing (e.g., -1 = last)
+                    actual_idx = role_indices[targetted_index]
+                except IndexError:
+                    verbose_logger.warning(
+                        f"AnthropicCacheControlHook: Index {targetted_index} is out of bounds "
+                        f"for {len(role_indices)} messages with role '{targetted_role}'. "
+                        f"Skipping cache control injection for this point."
+                    )
+                else:
+                    messages[actual_idx] = (
+                        AnthropicCacheControlHook._safe_insert_cache_control_in_message(
+                            messages[actual_idx], control
+                        )
+                    )
+        # Case 2: Target by index only
+        elif targetted_index is not None:
             original_index = targetted_index
             # Handle negative indices (convert to positive)
             if targetted_index < 0:
@@ -116,7 +139,7 @@ def _process_message_injection(
                     f"AnthropicCacheControlHook: Provided index {original_index} is out of bounds for message list of length {len(messages)}. "
                     f"Targeted index was {targetted_index}. Skipping cache control injection for this point."
                 )
-        # Case 2: Target by role
+        # Case 3: Target by role only
         elif targetted_role is not None:
             for msg in messages:
                 if msg.get("role") == targetted_role:

diff --git a/litellm/integrations/arize/_utils.py b/litellm/integrations/arize/_utils.py
@@ -236,13 +236,28 @@ def _set_usage_outputs(span: "Span", response_obj, span_attrs):
     prompt_tokens = usage.get("prompt_tokens") or usage.get("input_tokens")
     if prompt_tokens:
         safe_set_attribute(span, span_attrs.LLM_TOKEN_COUNT_PROMPT, prompt_tokens)
-    reasoning_tokens = usage.get("output_tokens_details", {}).get("reasoning_tokens")
-    if reasoning_tokens:
-        safe_set_attribute(
-            span,
-            span_attrs.LLM_TOKEN_COUNT_COMPLETION_DETAILS_REASONING,
-            reasoning_tokens,
-        )
+    completion_tokens_details = usage.get("completion_tokens_details") or usage.get(
+        "output_tokens_details"
+    )
+    if completion_tokens_details is not None:
+        reasoning_tokens = getattr(completion_tokens_details, "reasoning_tokens", None)
-    completion_tokens_details = usage.get("completion_tokens_details") or usage.get(
-        "output_tokens_details"
-    )
-    if completion_tokens_details is not None:
-        reasoning_tokens = getattr(completion_tokens_details, "reasoning_tokens", None)
+    completion_tokens_details = usage.get("completion_tokens_details")
+    if completion_tokens_details is None:
+        completion_tokens_details = usage.get("output_tokens_details")
-    completion_tokens_details = usage.get("completion_tokens_details") or usage.get(
-        "output_tokens_details"
-    )
-    if completion_tokens_details is not None:
-        reasoning_tokens = getattr(completion_tokens_details, "reasoning_tokens", None)
+    completion_tokens_details = usage.get("completion_tokens_details")
+    if completion_tokens_details is None:
+        completion_tokens_details = usage.get("output_tokens_details")
+        if reasoning_tokens:
+            safe_set_attribute(
+                span,
+                span_attrs.LLM_TOKEN_COUNT_COMPLETION_DETAILS_REASONING,
+                reasoning_tokens,
+            )
+    prompt_tokens_details = usage.get("prompt_tokens_details") or usage.get(
+        "input_tokens_details"
+    )
+    if prompt_tokens_details is not None:
+        cached_tokens = getattr(prompt_tokens_details, "cached_tokens", None)
+        if cached_tokens:
+            safe_set_attribute(
+                span,
+                span_attrs.LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_READ,
+                cached_tokens,
+            )
 
 
 def _infer_open_inference_span_kind(call_type: Optional[str]) -> str:

diff --git a/litellm/litellm_core_utils/prompt_templates/factory.py b/litellm/litellm_core_utils/prompt_templates/factory.py
@@ -86,6 +86,17 @@ def prompt_injection_detection_default_pt():
 )  # similar to autogen. Only used if `litellm.modify_params=True`.
 
 
+def _get_content_as_str(content: Union[str, list, None]) -> str:
+    """Extract text from content that may be a string, a list of content blocks, or None."""
+    if content is None:
+        return ""
+    if isinstance(content, str):
+        return content
+    if isinstance(content, list):
+        return convert_content_list_to_str({"role": "user", "content": content})
+    return ""
+
+
 def map_system_message_pt(messages: list) -> list:
     """
     Convert 'system' message to 'user' message if provider doesn't support 'system' role.
@@ -100,20 +111,24 @@ def map_system_message_pt(messages: list) -> list:
     new_messages = []
     for i, m in enumerate(messages):
         if m["role"] == "system":
+            system_text = _get_content_as_str(m["content"])
             if i < len(messages) - 1:  # Not the last message
                 next_m = messages[i + 1]
                 next_role = next_m["role"]
                 if (
                     next_role == "user" or next_role == "assistant"
                 ):  # Next message is a user or assistant message
                     # Merge system prompt into the next message
-                    next_m["content"] = m["content"] + " " + next_m["content"]
+                    # Copy to avoid mutating the caller's original dict
+                    next_m = messages[i + 1] = {**next_m}
+                    next_text = _get_content_as_str(next_m["content"])
+                    next_m["content"] = " ".join(filter(None, [system_text, next_text]))
                 elif next_role == "system":  # Next message is a system message
                     # Append a user message instead of the system message
-                    new_message = {"role": "user", "content": m["content"]}
+                    new_message = {"role": "user", "content": system_text}
                     new_messages.append(new_message)
             else:  # Last message
-                new_message = {"role": "user", "content": m["content"]}
+                new_message = {"role": "user", "content": system_text}
                 new_messages.append(new_message)
         else:  # Not a system message
             new_messages.append(m)
@@ -1393,10 +1408,10 @@ def convert_to_gemini_tool_call_invoke(
         if tool_calls is not None:
             for idx, tool in enumerate(tool_calls):
                 if "function" in tool:
-                    gemini_function_call: Optional[
-                        VertexFunctionCall
-                    ] = _gemini_tool_call_invoke_helper(
-                        function_call_params=tool["function"]
+                    gemini_function_call: Optional[VertexFunctionCall] = (
+                        _gemini_tool_call_invoke_helper(
+                            function_call_params=tool["function"]
+                        )
                     )
                     if gemini_function_call is not None:
                         part_dict: VertexPartType = {
@@ -1540,9 +1555,7 @@ def convert_to_gemini_tool_call_result(  # noqa: PLR0915
                         file_data = (
                             file_content.get("file_data", "")
                             if isinstance(file_content, dict)
-                            else file_content
-                            if isinstance(file_content, str)
-                            else ""
+                            else file_content if isinstance(file_content, str) else ""
                         )
 
                     if file_data:
@@ -2046,9 +2059,9 @@ def _sanitize_empty_text_content(
         if isinstance(content, str):
             if not content or not content.strip():
                 message = cast(AllMessageValues, dict(message))  # Make a copy
-                message[
-                    "content"
-                ] = "[System: Empty message content sanitised to satisfy protocol]"
+                message["content"] = (
+                    "[System: Empty message content sanitised to satisfy protocol]"
+                )
                 verbose_logger.debug(
                     f"_sanitize_empty_text_content: Replaced empty text content in {message.get('role')} message"
                 )
@@ -2388,9 +2401,9 @@ def anthropic_messages_pt(  # noqa: PLR0915
                             # Convert ChatCompletionImageUrlObject to dict if needed
                             image_url_value = m["image_url"]
                             if isinstance(image_url_value, str):
-                                image_url_input: Union[
-                                    str, dict[str, Any]
-                                ] = image_url_value
+                                image_url_input: Union[str, dict[str, Any]] = (
+                                    image_url_value
+                                )
                             else:
                                 # ChatCompletionImageUrlObject or dict case - convert to dict
                                 image_url_input = {
@@ -2417,9 +2430,9 @@ def anthropic_messages_pt(  # noqa: PLR0915
                             )
 
                             if "cache_control" in _content_element:
-                                _anthropic_content_element[
-                                    "cache_control"
-                                ] = _content_element["cache_control"]
+                                _anthropic_content_element["cache_control"] = (
+                                    _content_element["cache_control"]
+                                )
                             user_content.append(_anthropic_content_element)
                         elif m.get("type", "") == "text":
                             m = cast(ChatCompletionTextObject, m)
@@ -2479,9 +2492,9 @@ def anthropic_messages_pt(  # noqa: PLR0915
                     )
 
                     if "cache_control" in _content_element:
-                        _anthropic_content_text_element[
-                            "cache_control"
-                        ] = _content_element["cache_control"]
+                        _anthropic_content_text_element["cache_control"] = (
+                            _content_element["cache_control"]
+                        )
 
                     user_content.append(_anthropic_content_text_element)
 
@@ -2614,9 +2627,9 @@ def anthropic_messages_pt(  # noqa: PLR0915
                         original_content_element=dict(assistant_content_block),
                     )
                     if "cache_control" in _content_element:
-                        _anthropic_text_content_element[
-                            "cache_control"
-                        ] = _content_element["cache_control"]
+                        _anthropic_text_content_element["cache_control"] = (
+                            _content_element["cache_control"]
+                        )
                     text_element = _anthropic_text_content_element
 
                 # Interleave: each thinking block precedes its server tool group.
@@ -2776,9 +2789,9 @@ def anthropic_messages_pt(  # noqa: PLR0915
                     )
 
                     if "cache_control" in _content_element:
-                        _anthropic_text_content_element[
-                            "cache_control"
-                        ] = _content_element["cache_control"]
+                        _anthropic_text_content_element["cache_control"] = (
+                            _content_element["cache_control"]
+                        )
 
                     assistant_content.append(_anthropic_text_content_element)
 
@@ -5220,9 +5233,7 @@ def default_response_schema_prompt(response_schema: dict) -> str:
     prompt_str = """Use this JSON schema: 
     ```json 
     {}
-    ```""".format(
-        response_schema
-    )
+    ```""".format(response_schema)
     return prompt_str
 
 

diff --git a/litellm/llms/anthropic/chat/handler.py b/litellm/llms/anthropic/chat/handler.py
@@ -23,6 +23,7 @@
 import litellm.litellm_core_utils
 import litellm.types
 import litellm.types.utils
+from litellm._logging import verbose_logger
 from litellm.anthropic_beta_headers_manager import (
     update_request_with_filtered_beta,
 )
@@ -1245,7 +1246,15 @@ def convert_str_chunk_to_generic_chunk(self, chunk: str) -> ModelResponseStream:
             str_line = str_line[index:]
 
         if str_line.startswith("data:"):
-            data_json = json.loads(str_line[5:])
-            return self.chunk_parser(chunk=data_json)
-        else:
-            return ModelResponseStream(id=self.response_id)
+            chunk_str = str_line[5:].strip()
+            # Models like Deepseek might return "data: [DONE]" here which is not a
+            # valid JSON input. We can just ignore these chunks.
+            try:
+                data_json = json.loads(chunk_str)
+                return self.chunk_parser(chunk=data_json)
+            except json.JSONDecodeError:
+                verbose_logger.debug(
+                    f"Non-JSON SSE chunk received, ignoring: {chunk_str!r}"
+                )
+
+        return ModelResponseStream(id=self.response_id)
diff --git a/litellm/llms/minimax/chat/transformation.py b/litellm/llms/minimax/chat/transformation.py
@@ -16,9 +16,14 @@ class MinimaxChatConfig(OpenAIGPTConfig):
     - International: https://api.minimax.io/v1
     - China: https://api.minimaxi.com/v1
 
+    Note: MiniMax's Claude-compatible `/anthropic/v1/messages` support is implemented
+    separately in `litellm/llms/minimax/messages/transformation.py`.
+
     Supported models:
     - MiniMax-M2.1
-    - MiniMax-M2.1-lightning
+    - MiniMax-M2.1-highspeed
+    - MiniMax-M2.5
+    - MiniMax-M2.5-highspeed
     - MiniMax-M2
     """
 

diff --git a/litellm/llms/minimax/messages/transformation.py b/litellm/llms/minimax/messages/transformation.py
@@ -1,5 +1,8 @@
 """
-MiniMax Anthropic transformation config - extends AnthropicConfig for MiniMax's Anthropic-compatible API
+MiniMax Anthropic-compatible Messages API transformation config.
+
+MiniMax exposes Claude-compatible `/anthropic/v1/messages` endpoints separately from
+its OpenAI-compatible `/v1/chat/completions` endpoint.
 """
 from typing import Optional
 
@@ -19,7 +22,9 @@ class MinimaxMessagesConfig(AnthropicMessagesConfig):
 
     Supported models:
     - MiniMax-M2.1
-    - MiniMax-M2.1-lightning
+    - MiniMax-M2.1-highspeed
+    - MiniMax-M2.5
+    - MiniMax-M2.5-highspeed
     - MiniMax-M2
     """
 

diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json
@@ -32542,6 +32542,20 @@
         "supports_vision": true,
         "supports_web_search": true
     },
+    "zai.glm-5": {
+        "input_cost_per_token": 1e-06,
+        "litellm_provider": "bedrock_converse",
+        "max_input_tokens": 200000,
+        "max_output_tokens": 128000,
+        "max_tokens": 128000,
+        "mode": "chat",
+        "output_cost_per_token": 3.2e-06,
+        "supports_function_calling": true,
+        "supports_reasoning": true,
+        "supports_system_messages": true,
+        "supports_tool_choice": true,
+        "source": "https://aws.amazon.com/bedrock/pricing/"
+    },
     "zai.glm-4.7": {
         "input_cost_per_token": 6e-07,
         "litellm_provider": "bedrock_converse",

diff --git a/litellm/proxy/_types.py b/litellm/proxy/_types.py
@@ -2236,6 +2236,22 @@ class ConfigGeneralSettings(LiteLLMPydanticObjectBase):
     allowed_routes: Optional[List] = Field(
         None, description="Proxy API Endpoints you want users to be able to access"
     )
+    cors_allow_origins: Optional[Union[str, List[str]]] = Field(
+        None,
+        description='CORS allowlist origins for the proxy. Defaults to `["*"]` when unset. Set this to `[]` to disable CORS for all origins, or provide explicit origins to restrict access. Existing `LITELLM_CORS_*` env vars take precedence over config values. Restart the proxy after changing any CORS setting.',
+    )
+    cors_allow_credentials: Optional[bool] = Field(
+        None,
+        description="Allow CORS credentials. Defaults to False when cors_allow_origins is explicitly configured and this setting is unset. Otherwise it preserves the proxy's existing default behavior. Wildcard origins or patterns disable credentials.",
+    )
+    cors_allow_methods: Optional[Union[str, List[str]]] = Field(
+        None,
+        description='CORS allowlist methods for the proxy. Defaults to `"*"` when unset.',
+    )
+    cors_allow_headers: Optional[Union[str, List[str]]] = Field(
+        None,
+        description='CORS allowlist headers for the proxy. Defaults to `"*"` when unset.',
+    )
     reject_clientside_metadata_tags: Optional[bool] = Field(
         None,
         description="When set to True, rejects requests that contain client-side 'metadata.tags' to prevent users from influencing budgets by sending different tags. Tags can only be inherited from the API key metadata.",