Skip to content

Commit 793a7fd

Browse files
authored
Merge pull request BerriAI#20333 from BerriAI/litellm_tuesday_cicd_release_final
Litellm tuesday cicd release final
2 parents 80acd4c + 21e95c7 commit 793a7fd

File tree

15 files changed

+43
-463
lines changed

15 files changed

+43
-463
lines changed

ci_cd/security_scans.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,7 @@ run_grype_scans() {
154154
"CVE-2025-15367" # No fix available yet
155155
"CVE-2025-12781" # No fix available yet
156156
"CVE-2025-11468" # No fix available yet
157+
"CVE-2026-1299" # Python 3.13 email module header injection - not applicable, LiteLLM doesn't use BytesGenerator for email serialization
157158
)
158159

159160
# Build JSON array of allowlisted CVE IDs for jq

docs/my-website/docs/proxy/config_settings.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -545,6 +545,9 @@ router_settings:
545545
| DEFAULT_MAX_TOKENS | Default maximum tokens for LLM calls. Default is 4096
546546
| DEFAULT_MAX_TOKENS_FOR_TRITON | Default maximum tokens for Triton models. Default is 2000
547547
| DEFAULT_MAX_REDIS_BATCH_CACHE_SIZE | Default maximum size for redis batch cache. Default is 1000
548+
| DEFAULT_MCP_SEMANTIC_FILTER_EMBEDDING_MODEL | Default embedding model for MCP semantic tool filtering. Default is "text-embedding-3-small"
549+
| DEFAULT_MCP_SEMANTIC_FILTER_SIMILARITY_THRESHOLD | Default similarity threshold for MCP semantic tool filtering. Default is 0.3
550+
| DEFAULT_MCP_SEMANTIC_FILTER_TOP_K | Default number of top results to return for MCP semantic tool filtering. Default is 10
548551
| DEFAULT_MOCK_RESPONSE_COMPLETION_TOKEN_COUNT | Default token count for mock response completions. Default is 20
549552
| DEFAULT_MOCK_RESPONSE_PROMPT_TOKEN_COUNT | Default token count for mock response prompts. Default is 10
550553
| DEFAULT_MODEL_CREATED_AT_TIME | Default creation timestamp for models. Default is 1677610602
@@ -802,6 +805,7 @@ router_settings:
802805
| MAXIMUM_TRACEBACK_LINES_TO_LOG | Maximum number of lines to log in traceback in LiteLLM Logs UI. Default is 100
803806
| MAX_RETRY_DELAY | Maximum delay in seconds for retrying requests. Default is 8.0
804807
| MAX_LANGFUSE_INITIALIZED_CLIENTS | Maximum number of Langfuse clients to initialize on proxy. Default is 50. This is set since langfuse initializes 1 thread everytime a client is initialized. We've had an incident in the past where we reached 100% cpu utilization because Langfuse was initialized several times.
808+
| MAX_MCP_SEMANTIC_FILTER_TOOLS_HEADER_LENGTH | Maximum header length for MCP semantic filter tools. Default is 150
805809
| MIN_NON_ZERO_TEMPERATURE | Minimum non-zero temperature value. Default is 0.0001
806810
| MINIMUM_PROMPT_CACHE_TOKEN_COUNT | Minimum token count for caching a prompt. Default is 1024
807811
| MISTRAL_API_BASE | Base URL for Mistral API. Default is https://api.mistral.ai

litellm-proxy-extras/litellm_proxy_extras/migrations/20260129103648_add_verificationtoken_indexes/migration.sql

Lines changed: 0 additions & 8 deletions
This file was deleted.

litellm-proxy-extras/litellm_proxy_extras/schema.prisma

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -305,16 +305,6 @@ model LiteLLM_VerificationToken {
305305
litellm_budget_table LiteLLM_BudgetTable? @relation(fields: [budget_id], references: [budget_id])
306306
litellm_organization_table LiteLLM_OrganizationTable? @relation(fields: [organization_id], references: [organization_id])
307307
object_permission LiteLLM_ObjectPermissionTable? @relation(fields: [object_permission_id], references: [object_permission_id])
308-
309-
// SELECT COUNT(*) FROM (SELECT "public"."LiteLLM_VerificationToken"."token" FROM "public"."LiteLLM_VerificationToken" WHERE ("public"."LiteLLM_VerificationToken"."user_id" = $1 AND ("public"."LiteLLM_VerificationToken"."team_id" IS NULL OR "public"."LiteLLM_VerificationToken"."team_id" <> $2)) OFFSET $3 ) AS "sub"
310-
// SELECT ... FROM "public"."LiteLLM_VerificationToken" WHERE "public"."LiteLLM_VerificationToken"."user_id" = $1 OFFSET $2
311-
@@index([user_id, team_id])
312-
313-
// SELECT ... FROM "public"."LiteLLM_VerificationToken" WHERE "public"."LiteLLM_VerificationToken"."team_id" = $1 OFFSET $2
314-
@@index([team_id])
315-
316-
// SELECT ... FROM "public"."LiteLLM_VerificationToken" WHERE (("public"."LiteLLM_VerificationToken"."expires" IS NULL OR "public"."LiteLLM_VerificationToken"."expires" > $1) AND "public"."LiteLLM_VerificationToken"."budget_reset_at" < $2) OFFSET $3
317-
@@index([budget_reset_at, expires])
318308
}
319309

320310
// Audit table for deleted keys - preserves spend and key information for historical tracking

litellm/proxy/auth/model_checks.py

Lines changed: 2 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -64,27 +64,6 @@ def _get_models_from_access_groups(
6464
return all_models
6565

6666

67-
def get_access_groups_from_models(
68-
model_access_groups: Dict[str, List[str]],
69-
models: List[str],
70-
) -> List[str]:
71-
"""
72-
Extract access group names from a models list.
73-
74-
Given a models list like ["gpt-4", "beta-models", "claude-v1"]
75-
and access groups like {"beta-models": ["gpt-5", "gpt-6"]},
76-
returns ["beta-models"].
77-
78-
This is used to pass allowed access groups to the router for filtering
79-
deployments during load balancing (GitHub issue #18333).
80-
"""
81-
access_groups = []
82-
for model in models:
83-
if model in model_access_groups:
84-
access_groups.append(model)
85-
return access_groups
86-
87-
8867
async def get_mcp_server_ids(
8968
user_api_key_dict: UserAPIKeyAuth,
9069
) -> List[str]:
@@ -101,6 +80,7 @@ async def get_mcp_server_ids(
10180

10281
# Make a direct SQL query to get just the mcp_servers
10382
try:
83+
10484
result = await prisma_client.db.litellm_objectpermissiontable.find_unique(
10585
where={"object_permission_id": user_api_key_dict.object_permission_id},
10686
)
@@ -196,7 +176,6 @@ def get_complete_model_list(
196176
"""
197177

198178
unique_models = []
199-
200179
def append_unique(models):
201180
for model in models:
202181
if model not in unique_models:
@@ -209,7 +188,7 @@ def append_unique(models):
209188
else:
210189
append_unique(proxy_model_list)
211190
if include_model_access_groups:
212-
append_unique(list(model_access_groups.keys())) # TODO: keys order
191+
append_unique(list(model_access_groups.keys())) # TODO: keys order
213192

214193
if user_model:
215194
append_unique([user_model])

litellm/proxy/litellm_pre_call_utils.py

Lines changed: 0 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1021,37 +1021,6 @@ async def add_litellm_data_to_request( # noqa: PLR0915
10211021
"user_api_key_user_max_budget"
10221022
] = user_api_key_dict.user_max_budget
10231023

1024-
# Extract allowed access groups for router filtering (GitHub issue #18333)
1025-
# This allows the router to filter deployments based on key's and team's access groups
1026-
# NOTE: We keep key and team access groups SEPARATE because a key doesn't always
1027-
# inherit all team access groups (per maintainer feedback).
1028-
if llm_router is not None:
1029-
from litellm.proxy.auth.model_checks import get_access_groups_from_models
1030-
1031-
model_access_groups = llm_router.get_model_access_groups()
1032-
1033-
# Key-level access groups (from user_api_key_dict.models)
1034-
key_models = list(user_api_key_dict.models) if user_api_key_dict.models else []
1035-
key_allowed_access_groups = get_access_groups_from_models(
1036-
model_access_groups=model_access_groups, models=key_models
1037-
)
1038-
if key_allowed_access_groups:
1039-
data[_metadata_variable_name][
1040-
"user_api_key_allowed_access_groups"
1041-
] = key_allowed_access_groups
1042-
1043-
# Team-level access groups (from user_api_key_dict.team_models)
1044-
team_models = (
1045-
list(user_api_key_dict.team_models) if user_api_key_dict.team_models else []
1046-
)
1047-
team_allowed_access_groups = get_access_groups_from_models(
1048-
model_access_groups=model_access_groups, models=team_models
1049-
)
1050-
if team_allowed_access_groups:
1051-
data[_metadata_variable_name][
1052-
"user_api_key_team_allowed_access_groups"
1053-
] = team_allowed_access_groups
1054-
10551024
data[_metadata_variable_name]["user_api_key_metadata"] = user_api_key_dict.metadata
10561025
_headers = dict(request.headers)
10571026
_headers.pop(

litellm/proxy/schema.prisma

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -305,16 +305,6 @@ model LiteLLM_VerificationToken {
305305
litellm_budget_table LiteLLM_BudgetTable? @relation(fields: [budget_id], references: [budget_id])
306306
litellm_organization_table LiteLLM_OrganizationTable? @relation(fields: [organization_id], references: [organization_id])
307307
object_permission LiteLLM_ObjectPermissionTable? @relation(fields: [object_permission_id], references: [object_permission_id])
308-
309-
// SELECT COUNT(*) FROM (SELECT "public"."LiteLLM_VerificationToken"."token" FROM "public"."LiteLLM_VerificationToken" WHERE ("public"."LiteLLM_VerificationToken"."user_id" = $1 AND ("public"."LiteLLM_VerificationToken"."team_id" IS NULL OR "public"."LiteLLM_VerificationToken"."team_id" <> $2)) OFFSET $3 ) AS "sub"
310-
// SELECT ... FROM "public"."LiteLLM_VerificationToken" WHERE "public"."LiteLLM_VerificationToken"."user_id" = $1 OFFSET $2
311-
@@index([user_id, team_id])
312-
313-
// SELECT ... FROM "public"."LiteLLM_VerificationToken" WHERE "public"."LiteLLM_VerificationToken"."team_id" = $1 OFFSET $2
314-
@@index([team_id])
315-
316-
// SELECT ... FROM "public"."LiteLLM_VerificationToken" WHERE (("public"."LiteLLM_VerificationToken"."expires" IS NULL OR "public"."LiteLLM_VerificationToken"."expires" > $1) AND "public"."LiteLLM_VerificationToken"."budget_reset_at" < $2) OFFSET $3
317-
@@index([budget_reset_at, expires])
318308
}
319309

320310
// Audit table for deleted keys - preserves spend and key information for historical tracking

litellm/router.py

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,6 @@
8888
is_clientside_credential,
8989
)
9090
from litellm.router_utils.common_utils import (
91-
filter_deployments_by_access_groups,
9291
filter_team_based_models,
9392
filter_web_search_deployments,
9493
)
@@ -8088,17 +8087,10 @@ async def async_get_healthy_deployments(
80888087
request_kwargs=request_kwargs,
80898088
)
80908089

8091-
verbose_router_logger.debug(f"healthy_deployments after web search filter: {healthy_deployments}")
8092-
8093-
# Filter by allowed access groups (GitHub issue #18333)
8094-
# This prevents cross-team load balancing when teams have models with same name in different access groups
8095-
healthy_deployments = filter_deployments_by_access_groups(
8096-
healthy_deployments=healthy_deployments,
8097-
request_kwargs=request_kwargs,
8090+
verbose_router_logger.debug(
8091+
f"healthy_deployments after web search filter: {healthy_deployments}"
80988092
)
80998093

8100-
verbose_router_logger.debug(f"healthy_deployments after access group filter: {healthy_deployments}")
8101-
81028094
if isinstance(healthy_deployments, dict):
81038095
return healthy_deployments
81048096

litellm/router_utils/common_utils.py

Lines changed: 2 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,6 @@ def filter_team_based_models(
7575
if deployment.get("model_info", {}).get("id") not in ids_to_remove
7676
]
7777

78-
7978
def _deployment_supports_web_search(deployment: Dict) -> bool:
8079
"""
8180
Check if a deployment supports web search.
@@ -113,7 +112,7 @@ def filter_web_search_deployments(
113112
is_web_search_request = False
114113
tools = request_kwargs.get("tools") or []
115114
for tool in tools:
116-
# These are the two websearch tools for OpenAI / Azure.
115+
# These are the two websearch tools for OpenAI / Azure.
117116
if tool.get("type") == "web_search" or tool.get("type") == "web_search_preview":
118117
is_web_search_request = True
119118
break
@@ -122,82 +121,8 @@ def filter_web_search_deployments(
122121
return healthy_deployments
123122

124123
# Filter out deployments that don't support web search
125-
final_deployments = [
126-
d for d in healthy_deployments if _deployment_supports_web_search(d)
127-
]
124+
final_deployments = [d for d in healthy_deployments if _deployment_supports_web_search(d)]
128125
if len(healthy_deployments) > 0 and len(final_deployments) == 0:
129126
verbose_logger.warning("No deployments support web search for request")
130127
return final_deployments
131128

132-
133-
def filter_deployments_by_access_groups(
134-
healthy_deployments: Union[List[Dict], Dict],
135-
request_kwargs: Optional[Dict] = None,
136-
) -> Union[List[Dict], Dict]:
137-
"""
138-
Filter deployments to only include those matching the user's allowed access groups.
139-
140-
Reads from TWO separate metadata fields (per maintainer feedback):
141-
- `user_api_key_allowed_access_groups`: Access groups from the API Key's models.
142-
- `user_api_key_team_allowed_access_groups`: Access groups from the Team's models.
143-
144-
A deployment is included if its access_groups overlap with EITHER the key's
145-
or the team's allowed access groups. Deployments with no access_groups are
146-
always included (not restricted).
147-
148-
This prevents cross-team load balancing when multiple teams have models with
149-
the same name but in different access groups (GitHub issue #18333).
150-
"""
151-
if request_kwargs is None:
152-
return healthy_deployments
153-
154-
if isinstance(healthy_deployments, dict):
155-
return healthy_deployments
156-
157-
metadata = request_kwargs.get("metadata") or {}
158-
litellm_metadata = request_kwargs.get("litellm_metadata") or {}
159-
160-
# Gather key-level allowed access groups
161-
key_allowed_access_groups = (
162-
metadata.get("user_api_key_allowed_access_groups")
163-
or litellm_metadata.get("user_api_key_allowed_access_groups")
164-
or []
165-
)
166-
167-
# Gather team-level allowed access groups
168-
team_allowed_access_groups = (
169-
metadata.get("user_api_key_team_allowed_access_groups")
170-
or litellm_metadata.get("user_api_key_team_allowed_access_groups")
171-
or []
172-
)
173-
174-
# Combine both for the final allowed set
175-
combined_allowed_access_groups = list(key_allowed_access_groups) + list(
176-
team_allowed_access_groups
177-
)
178-
179-
# If no access groups specified from either source, return all deployments (backwards compatible)
180-
if not combined_allowed_access_groups:
181-
return healthy_deployments
182-
183-
allowed_set = set(combined_allowed_access_groups)
184-
filtered = []
185-
for deployment in healthy_deployments:
186-
model_info = deployment.get("model_info") or {}
187-
deployment_access_groups = model_info.get("access_groups") or []
188-
189-
# If deployment has no access groups, include it (not restricted)
190-
if not deployment_access_groups:
191-
filtered.append(deployment)
192-
continue
193-
194-
# Include if any of deployment's groups overlap with allowed groups
195-
if set(deployment_access_groups) & allowed_set:
196-
filtered.append(deployment)
197-
198-
if len(healthy_deployments) > 0 and len(filtered) == 0:
199-
verbose_logger.warning(
200-
f"No deployments match allowed access groups {combined_allowed_access_groups}"
201-
)
202-
203-
return filtered

litellm/router_utils/fallback_event_handlers.py

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -113,16 +113,8 @@ async def run_async_fallback(
113113
The most recent exception if all fallback model groups fail.
114114
"""
115115

116-
### BASE CASE ### MAX FALLBACK DEPTH REACHED
117-
if fallback_depth >= max_fallbacks:
118-
raise original_exception
119-
120-
### CHECK IF MODEL GROUP LIST EXHAUSTED
121-
if original_model_group in fallback_model_group:
122-
fallback_group_length = len(fallback_model_group) - 1
123-
else:
124-
fallback_group_length = len(fallback_model_group)
125-
if fallback_depth >= fallback_group_length:
116+
### BASE CASE ### MAX FALLBACK DEPTH REACHED
117+
if fallback_depth >= max_fallbacks:
126118
raise original_exception
127119

128120
error_from_fallbacks = original_exception

0 commit comments

Comments
 (0)