BerriAI
diff --git a/‎litellm/proxy/common_utils/callback_utils.py‎
Lines changed: 5 additions & 5 deletions b/‎litellm/proxy/common_utils/callback_utils.py‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎litellm/proxy/hooks/parallel_request_limiter_v3.py‎
Lines changed: 3 additions & 2 deletions b/‎litellm/proxy/hooks/parallel_request_limiter_v3.py‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎litellm/proxy/pass_through_endpoints/llm_passthrough_endpoints.py‎
Lines changed: 29 additions & 89 deletions b/‎litellm/proxy/pass_through_endpoints/llm_passthrough_endpoints.py‎
Lines changed: 29 additions & 89 deletions
@@ -289,8 +289,8 @@ def initialize_callbacks_on_proxy(  # noqa: PLR0915
 
 def get_model_group_from_litellm_kwargs(kwargs: dict) -> Optional[str]:
     _litellm_params = kwargs.get("litellm_params", None) or {}
-    _metadata = _litellm_params.get(get_metadata_variable_name_from_kwargs(kwargs)) or {}
-    _model_group = _metadata.get("model_group", None)
+    _metadata = _litellm_params.get(get_metadata_variable_name_from_litellm_params(_litellm_params)) or {}
+    _model_group = _metadata.get("model_group", None) or kwargs.get("model", None)
     if _model_group is not None:
         return _model_group
 
@@ -367,8 +367,8 @@ def add_guardrail_to_applied_guardrails_header(
         _metadata["applied_guardrails"] = [guardrail_name]
 
 
-def get_metadata_variable_name_from_kwargs(
-        kwargs: dict
+def get_metadata_variable_name_from_litellm_params(
+        litellm_params: dict
     ) -> Literal["metadata", "litellm_metadata"]:
         """
         Helper to return what the "metadata" field should be called in the request data
@@ -381,4 +381,4 @@ def get_metadata_variable_name_from_kwargs(
         - OpenAI then started using this field for their metadata
         - LiteLLM is now moving to using `litellm_metadata` for our metadata
         """
-        return "litellm_metadata" if "litellm_metadata" in kwargs else "metadata"
+        return "litellm_metadata" if "litellm_metadata" in litellm_params else "metadata"
@@ -25,6 +25,7 @@
 from litellm.integrations.custom_logger import CustomLogger
 from litellm.proxy._types import UserAPIKeyAuth
 from litellm.types.llms.openai import BaseLiteLLMOpenAIResponseObject
+from fastapi import HTTPException
 
 if TYPE_CHECKING:
     from opentelemetry.trace import Span as _Span
@@ -843,7 +844,7 @@ async def async_log_success_event(self, kwargs, response_obj, start_time, end_ti
             _get_parent_otel_span_from_kwargs,
         )
         from litellm.proxy.common_utils.callback_utils import (
-            get_metadata_variable_name_from_kwargs,
+            get_metadata_variable_name_from_litellm_params,
             get_model_group_from_litellm_kwargs,
         )
         from litellm.types.caching import RedisPipelineIncrementOperation
@@ -861,7 +862,7 @@ async def async_log_success_event(self, kwargs, response_obj, start_time, end_ti
 
             # Get metadata from kwargs
             litellm_metadata = kwargs["litellm_params"].get(
-                get_metadata_variable_name_from_kwargs(kwargs), {}
+                get_metadata_variable_name_from_litellm_params(kwargs["litellm_params"]), {}
             )
             if litellm_metadata is None:
                 return
 
@@ -57,9 +57,7 @@ def create_request_copy(request: Request):
     }
 
 
-def is_passthrough_request_using_router_model(
-    request_body: dict, llm_router: Optional[litellm.Router]
-) -> bool:
+def is_passthrough_request_using_router_model(request_body: dict, llm_router: Optional[litellm.Router]) -> bool:
     """
     Returns True if the model is in the llm_router model names
     """
@@ -95,16 +93,12 @@ async def llm_passthrough_factory_proxy_route(
         model=None,
     )
     if provider_config is None:
-        raise HTTPException(
-            status_code=404, detail=f"Provider {custom_llm_provider} not found"
-        )
+        raise HTTPException(status_code=404, detail=f"Provider {custom_llm_provider} not found")
 
     base_target_url = provider_config.get_api_base()
 
     if base_target_url is None:
-        raise HTTPException(
-            status_code=404, detail=f"Provider {custom_llm_provider} api base not found"
-        )
+        raise HTTPException(status_code=404, detail=f"Provider {custom_llm_provider} api base not found")
 
     encoded_endpoint = httpx.URL(endpoint).path
 
@@ -183,17 +177,11 @@ async def gemini_proxy_route(
     [Docs](https://docs.litellm.ai/docs/pass_through/google_ai_studio)
     """
     ## CHECK FOR LITELLM API KEY IN THE QUERY PARAMS - ?..key=LITELLM_API_KEY
-    google_ai_studio_api_key = request.query_params.get("key") or request.headers.get(
-        "x-goog-api-key"
-    )
+    google_ai_studio_api_key = request.query_params.get("key") or request.headers.get("x-goog-api-key")
 
-    user_api_key_dict = await user_api_key_auth(
-        request=request, api_key=f"Bearer {google_ai_studio_api_key}"
-    )
+    user_api_key_dict = await user_api_key_auth(request=request, api_key=f"Bearer {google_ai_studio_api_key}")
 
-    base_target_url = (
-        os.getenv("GEMINI_API_BASE") or "https://generativelanguage.googleapis.com"
-    )
+    base_target_url = os.getenv("GEMINI_API_BASE") or "https://generativelanguage.googleapis.com"
     encoded_endpoint = httpx.URL(endpoint).path
 
     # Ensure endpoint starts with '/' for proper URL construction
@@ -226,6 +214,7 @@ async def gemini_proxy_route(
     endpoint_func = create_pass_through_route(
         endpoint=endpoint,
         target=str(updated_url),
+        custom_llm_provider="gemini",
     )  # dynamically construct pass-through endpoint based on incoming path
     received_value = await endpoint_func(
         request,
@@ -310,9 +299,7 @@ async def vllm_proxy_route(
     from litellm.proxy.proxy_server import llm_router
 
     request_body = await get_request_body(request)
-    is_router_model = is_passthrough_request_using_router_model(
-        request_body, llm_router
-    )
+    is_router_model = is_passthrough_request_using_router_model(request_body, llm_router)
     is_streaming_request = is_passthrough_request_streaming(request_body)
     if is_router_model and llm_router:
         result = cast(
@@ -327,11 +314,7 @@ async def vllm_proxy_route(
                 content=None,
                 data=None,
                 files=None,
-                json=(
-                    request_body
-                    if request.headers.get("content-type") == "application/json"
-                    else None
-                ),
+                json=(request_body if request.headers.get("content-type") == "application/json" else None),
                 params=None,
                 headers=None,
                 cookies=None,
@@ -509,9 +492,7 @@ async def handle_bedrock_count_tokens(
         # Extract model from request body
         model = request_body.get("model")
         if not model:
-            raise HTTPException(
-                status_code=400, detail={"error": "Model is required in request body"}
-            )
+            raise HTTPException(status_code=400, detail={"error": "Model is required in request body"})
 
         # Get model parameters from router
         litellm_params = {"user_api_key_dict": user_api_key_dict}
@@ -550,9 +531,7 @@ async def handle_bedrock_count_tokens(
         raise
     except Exception as e:
         verbose_proxy_logger.error(f"Error in handle_bedrock_count_tokens: {str(e)}")
-        raise HTTPException(
-            status_code=500, detail={"error": f"CountTokens processing error: {str(e)}"}
-        )
+        raise HTTPException(status_code=500, detail={"error": f"CountTokens processing error: {str(e)}"})
 
 
 async def bedrock_llm_proxy_route(
@@ -604,8 +583,7 @@ async def bedrock_llm_proxy_route(
         raise HTTPException(
             status_code=400,
             detail={
-                "error": "Model missing from endpoint. Expected format: /model/<Model>/<endpoint>. Got: "
-                + endpoint,
+                "error": "Model missing from endpoint. Expected format: /model/<Model>/<endpoint>. Got: " + endpoint,
             },
         )
 
@@ -669,9 +647,7 @@ async def bedrock_proxy_route(
 
     aws_region_name = litellm.utils.get_secret(secret_name="AWS_REGION_NAME")
     if _is_bedrock_agent_runtime_route(endpoint=endpoint):  # handle bedrock agents
-        base_target_url = (
-            f"https://bedrock-agent-runtime.{aws_region_name}.amazonaws.com"
-        )
+        base_target_url = f"https://bedrock-agent-runtime.{aws_region_name}.amazonaws.com"
     else:
         return await bedrock_llm_proxy_route(
             endpoint=endpoint,
@@ -701,9 +677,7 @@ async def bedrock_proxy_route(
         data = await request.json()
     except Exception as e:
         raise HTTPException(status_code=400, detail={"error": e})
-    _request = AWSRequest(
-        method="POST", url=str(updated_url), data=json.dumps(data), headers=headers
-    )
+    _request = AWSRequest(method="POST", url=str(updated_url), data=json.dumps(data), headers=headers)
     sigv4.add_auth(_request)
     prepped = _request.prepare()
 
@@ -764,14 +738,8 @@ async def assemblyai_proxy_route(
     [Docs](https://api.assemblyai.com)
     """
     # Set base URL based on the route
-    assembly_region = AssemblyAIPassthroughLoggingHandler._get_assembly_region_from_url(
-        url=str(request.url)
-    )
-    base_target_url = (
-        AssemblyAIPassthroughLoggingHandler._get_assembly_base_url_from_region(
-            region=assembly_region
-        )
-    )
+    assembly_region = AssemblyAIPassthroughLoggingHandler._get_assembly_region_from_url(url=str(request.url))
+    base_target_url = AssemblyAIPassthroughLoggingHandler._get_assembly_base_url_from_region(region=assembly_region)
     encoded_endpoint = httpx.URL(endpoint).path
     # Ensure endpoint starts with '/' for proper URL construction
     if not encoded_endpoint.startswith("/"):
@@ -829,18 +797,14 @@ async def azure_proxy_route(
     """
     base_target_url = get_secret_str(secret_name="AZURE_API_BASE")
     if base_target_url is None:
-        raise Exception(
-            "Required 'AZURE_API_BASE' in environment to make pass-through calls to Azure."
-        )
+        raise Exception("Required 'AZURE_API_BASE' in environment to make pass-through calls to Azure.")
     # Add or update query parameters
     azure_api_key = passthrough_endpoint_router.get_credentials(
         custom_llm_provider=litellm.LlmProviders.AZURE.value,
         region_name=None,
     )
     if azure_api_key is None:
-        raise Exception(
-            "Required 'AZURE_API_KEY' in environment to make pass-through calls to Azure."
-        )
+        raise Exception("Required 'AZURE_API_KEY' in environment to make pass-through calls to Azure.")
 
     return await BaseOpenAIPassThroughHandler._base_openai_pass_through_handler(
         endpoint=endpoint,
@@ -864,9 +828,7 @@ def get_default_base_target_url(vertex_location: Optional[str]) -> str:
 
     @staticmethod
     @abstractmethod
-    def update_base_target_url_with_credential_location(
-        base_target_url: str, vertex_location: Optional[str]
-    ) -> str:
+    def update_base_target_url_with_credential_location(base_target_url: str, vertex_location: Optional[str]) -> str:
         pass
 
 
@@ -876,9 +838,7 @@ def get_default_base_target_url(vertex_location: Optional[str]) -> str:
         return "https://discoveryengine.googleapis.com/"
 
     @staticmethod
-    def update_base_target_url_with_credential_location(
-        base_target_url: str, vertex_location: Optional[str]
-    ) -> str:
+    def update_base_target_url_with_credential_location(base_target_url: str, vertex_location: Optional[str]) -> str:
         return base_target_url
 
 
@@ -888,9 +848,7 @@ def get_default_base_target_url(vertex_location: Optional[str]) -> str:
         return get_vertex_base_url(vertex_location)
 
     @staticmethod
-    def update_base_target_url_with_credential_location(
-        base_target_url: str, vertex_location: Optional[str]
-    ) -> str:
+    def update_base_target_url_with_credential_location(base_target_url: str, vertex_location: Optional[str]) -> str:
         return get_vertex_base_url(vertex_location)
 
 
@@ -956,18 +914,14 @@ async def _base_vertex_proxy_route(
         location=vertex_location,
     )
 
-    base_target_url = get_vertex_pass_through_handler.get_default_base_target_url(
-        vertex_location
-    )
+    base_target_url = get_vertex_pass_through_handler.get_default_base_target_url(vertex_location)
 
     headers_passed_through = False
     # Use headers from the incoming request if no vertex credentials are found
     if vertex_credentials is None or vertex_credentials.vertex_project is None:
         headers = dict(request.headers) or {}
         headers_passed_through = True
-        verbose_proxy_logger.debug(
-            "default_vertex_config  not set, incoming request headers %s", headers
-        )
+        verbose_proxy_logger.debug("default_vertex_config  not set, incoming request headers %s", headers)
         headers.pop("content-length", None)
         headers.pop("host", None)
     else:
@@ -1133,9 +1087,7 @@ async def openai_proxy_route(
         region_name=None,
     )
     if openai_api_key is None:
-        raise Exception(
-            "Required 'OPENAI_API_KEY' in environment to make pass-through calls to OpenAI."
-        )
+        raise Exception("Required 'OPENAI_API_KEY' in environment to make pass-through calls to OpenAI.")
 
     return await BaseOpenAIPassThroughHandler._base_openai_pass_through_handler(
         endpoint=endpoint,
@@ -1181,9 +1133,7 @@ async def _base_openai_pass_through_handler(
         endpoint_func = create_pass_through_route(
             endpoint=endpoint,
             target=str(updated_url),
-            custom_headers=BaseOpenAIPassThroughHandler._assemble_headers(
-                api_key=api_key, request=request
-            ),
+            custom_headers=BaseOpenAIPassThroughHandler._assemble_headers(api_key=api_key, request=request),
         )  # dynamically construct pass-through endpoint based on incoming path
         received_value = await endpoint_func(
             request,
@@ -1200,10 +1150,7 @@ def _append_openai_beta_header(headers: dict, request: Request) -> dict:
         """
         Appends the OpenAI-Beta header to the headers if the request is an OpenAI Assistants API request
         """
-        if (
-            RouteChecks._is_assistants_api_request(request) is True
-            and "OpenAI-Beta" not in headers
-        ):
+        if RouteChecks._is_assistants_api_request(request) is True and "OpenAI-Beta" not in headers:
             headers["OpenAI-Beta"] = "assistants=v2"
         return headers
 
@@ -1219,9 +1166,7 @@ def _assemble_headers(api_key: str, request: Request) -> dict:
         )
 
     @staticmethod
-    def _join_url_paths(
-        base_url: httpx.URL, path: str, custom_llm_provider: litellm.LlmProviders
-    ) -> str:
+    def _join_url_paths(base_url: httpx.URL, path: str, custom_llm_provider: litellm.LlmProviders) -> str:
         """
         Properly joins a base URL with a path, preserving any existing path in the base URL.
         """
@@ -1237,14 +1182,9 @@ def _join_url_paths(
             joined_path_str = str(base_url.copy_with(path=full_path))
 
         # Apply OpenAI-specific path handling for both branches
-        if (
-            custom_llm_provider == litellm.LlmProviders.OPENAI
-            and "/v1/" not in joined_path_str
-        ):
+        if custom_llm_provider == litellm.LlmProviders.OPENAI and "/v1/" not in joined_path_str:
             # Insert v1 after api.openai.com for OpenAI requests
-            joined_path_str = joined_path_str.replace(
-                "api.openai.com/", "api.openai.com/v1/"
-            )
+            joined_path_str = joined_path_str.replace("api.openai.com/", "api.openai.com/v1/")
 
         return joined_path_str