Skip to content

Commit b2ffeb8

Browse files
Merge pull request #15094 from BerriAI/litellm_dev_09_30_2025_p1
Guardrails - run all guardrails before calling other post_call_success_hook + Prometheus - support custom metadata labels on key/team
2 parents e1ee428 + bed6c79 commit b2ffeb8

File tree

8 files changed

+135
-68
lines changed

8 files changed

+135
-68
lines changed

enterprise/litellm_enterprise/integrations/prometheus.py

Lines changed: 30 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
from litellm.integrations.custom_logger import CustomLogger
2222
from litellm.proxy._types import LiteLLM_TeamTable, UserAPIKeyAuth
2323
from litellm.types.integrations.prometheus import *
24+
from litellm.types.integrations.prometheus import _sanitize_prometheus_label_name
2425
from litellm.types.utils import StandardLoggingPayload
2526
from litellm.utils import get_end_user_id_for_cost_tracking
2627

@@ -794,9 +795,16 @@ async def async_log_success_event(self, kwargs, response_obj, start_time, end_ti
794795
output_tokens = standard_logging_payload["completion_tokens"]
795796
tokens_used = standard_logging_payload["total_tokens"]
796797
response_cost = standard_logging_payload["response_cost"]
797-
_requester_metadata = standard_logging_payload["metadata"].get(
798+
_requester_metadata: Optional[dict] = standard_logging_payload["metadata"].get(
798799
"requester_metadata"
799800
)
801+
user_api_key_auth_metadata: Optional[dict] = standard_logging_payload[
802+
"metadata"
803+
].get("user_api_key_auth_metadata")
804+
combined_metadata: Dict[str, Any] = {
805+
**(_requester_metadata if _requester_metadata else {}),
806+
**(user_api_key_auth_metadata if user_api_key_auth_metadata else {}),
807+
}
800808
if standard_logging_payload is not None and isinstance(
801809
standard_logging_payload, dict
802810
):
@@ -828,8 +836,7 @@ async def async_log_success_event(self, kwargs, response_obj, start_time, end_ti
828836
exception_status=None,
829837
exception_class=None,
830838
custom_metadata_labels=get_custom_labels_from_metadata(
831-
metadata=standard_logging_payload["metadata"].get("requester_metadata")
832-
or {}
839+
metadata=combined_metadata
833840
),
834841
route=standard_logging_payload["metadata"].get(
835842
"user_api_key_request_route"
@@ -1649,9 +1656,22 @@ def set_litellm_deployment_state(
16491656
api_base: Optional[str],
16501657
api_provider: str,
16511658
):
1652-
self.litellm_deployment_state.labels(
1653-
litellm_model_name, model_id, api_base, api_provider
1654-
).set(state)
1659+
"""
1660+
Set the deployment state.
1661+
"""
1662+
### get labels
1663+
_labels = prometheus_label_factory(
1664+
supported_enum_labels=self.get_labels_for_metric(
1665+
metric_name="litellm_deployment_state"
1666+
),
1667+
enum_values=UserAPIKeyLabelValues(
1668+
litellm_model_name=litellm_model_name,
1669+
model_id=model_id,
1670+
api_base=api_base,
1671+
api_provider=api_provider,
1672+
),
1673+
)
1674+
self.litellm_deployment_state.labels(**_labels).set(state)
16551675

16561676
def set_deployment_healthy(
16571677
self,
@@ -2228,8 +2248,10 @@ def prometheus_label_factory(
22282248

22292249
if enum_values.custom_metadata_labels is not None:
22302250
for key, value in enum_values.custom_metadata_labels.items():
2231-
if key in supported_enum_labels:
2232-
filtered_labels[key] = value
2251+
# check sanitized key
2252+
sanitized_key = _sanitize_prometheus_label_name(key)
2253+
if sanitized_key in supported_enum_labels:
2254+
filtered_labels[sanitized_key] = value
22332255

22342256
# Add custom tags if configured
22352257
if enum_values.tags is not None:

litellm/litellm_core_utils/litellm_logging.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4040,6 +4040,7 @@ def get_standard_logging_metadata(
40404040
usage_object=usage_object,
40414041
requester_custom_headers=None,
40424042
cold_storage_object_key=None,
4043+
user_api_key_auth_metadata=None,
40434044
)
40444045
if isinstance(metadata, dict):
40454046
# Filter the metadata dictionary to include only the specified keys
@@ -4755,6 +4756,7 @@ def get_standard_logging_metadata(
47554756
requester_custom_headers=None,
47564757
user_api_key_request_route=None,
47574758
cold_storage_object_key=None,
4759+
user_api_key_auth_metadata=None,
47584760
)
47594761
if isinstance(metadata, dict):
47604762
# Update the clean_metadata with values from input metadata that match StandardLoggingMetadata fields
Lines changed: 6 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,9 @@
11
model_list:
2-
- model_name: byok-fixed-gpt-4o-mini
2+
- model_name: openai/gpt-4o
33
litellm_params:
4-
model: openai/gpt-4o-mini
5-
api_base: "https://webhook.site/2f385e05-00aa-402b-86d1-efc9261471a5"
6-
api_key: dummy
7-
- model_name: "byok-wildcard/*"
8-
litellm_params:
9-
model: openai/*
10-
- model_name: xai-grok-3
11-
litellm_params:
12-
model: xai/grok-3
13-
- model_name: hosted_vllm/whisper-v3
14-
litellm_params:
15-
model: hosted_vllm/whisper-v3
16-
api_base: "https://webhook.site/2f385e05-00aa-402b-86d1-efc9261471a5"
17-
api_key: dummy
18-
19-
mcp_servers:
20-
github_mcp:
21-
url: "https://api.githubcopilot.com/mcp"
22-
auth_type: oauth2
23-
authorization_url: https://github.com/login/oauth/authorize
24-
token_url: https://github.com/login/oauth/access_token
25-
client_id: os.environ/GITHUB_OAUTH_CLIENT_ID
26-
client_secret: os.environ/GITHUB_OAUTH_CLIENT_SECRET
27-
scopes: ["public_repo", "user:email"]
28-
allowed_tools: ["list_tools"]
29-
# disallowed_tools: ["repo_delete"]
4+
model: openai/gpt-4o
5+
api_key: os.environ/OPENAI_API_KEY
306

7+
litellm_settings:
8+
callbacks: ["prometheus"]
9+
custom_prometheus_metadata_labels: ["metadata.initiative", "metadata.business-unit"]

litellm/proxy/_types.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3068,6 +3068,7 @@ class PassThroughEndpointLoggingTypedDict(TypedDict):
30683068
"tags",
30693069
"team_member_key_duration",
30703070
"prompts",
3071+
"logging",
30713072
]
30723073

30733074

litellm/proxy/litellm_pre_call_utils.py

Lines changed: 53 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -579,7 +579,12 @@ def get_sanitized_user_information_from_key(
579579
user_api_key_end_user_id=user_api_key_dict.end_user_id,
580580
user_api_key_user_email=user_api_key_dict.user_email,
581581
user_api_key_request_route=user_api_key_dict.request_route,
582-
user_api_key_budget_reset_at=user_api_key_dict.budget_reset_at.isoformat() if user_api_key_dict.budget_reset_at else None,
582+
user_api_key_budget_reset_at=(
583+
user_api_key_dict.budget_reset_at.isoformat()
584+
if user_api_key_dict.budget_reset_at
585+
else None
586+
),
587+
user_api_key_auth_metadata=None,
583588
)
584589
return user_api_key_logged_metadata
585590

@@ -607,6 +612,37 @@ def add_user_api_key_auth_to_request_metadata(
607612
)
608613
return data
609614

615+
@staticmethod
616+
def add_management_endpoint_metadata_to_request_metadata(
617+
data: dict,
618+
management_endpoint_metadata: dict,
619+
_metadata_variable_name: str,
620+
) -> dict:
621+
"""
622+
Adds the `UserAPIKeyAuth` metadata to the request metadata.
623+
624+
ignore any sensitive fields like logging, api_key, etc.
625+
"""
626+
from litellm.proxy._types import (
627+
LiteLLM_ManagementEndpoint_MetadataFields,
628+
LiteLLM_ManagementEndpoint_MetadataFields_Premium,
629+
)
630+
631+
# ignore any special fields
632+
added_metadata = {}
633+
for k, v in management_endpoint_metadata.items():
634+
if k not in (
635+
LiteLLM_ManagementEndpoint_MetadataFields_Premium
636+
+ LiteLLM_ManagementEndpoint_MetadataFields
637+
):
638+
added_metadata[k] = v
639+
if data[_metadata_variable_name].get("user_api_key_auth_metadata") is None:
640+
data[_metadata_variable_name]["user_api_key_auth_metadata"] = {}
641+
data[_metadata_variable_name]["user_api_key_auth_metadata"].update(
642+
added_metadata
643+
)
644+
return data
645+
610646
@staticmethod
611647
def add_key_level_controls(
612648
key_metadata: Optional[dict], data: dict, _metadata_variable_name: str
@@ -651,6 +687,13 @@ def add_key_level_controls(
651687
key_metadata["disable_fallbacks"], bool
652688
):
653689
data["disable_fallbacks"] = key_metadata["disable_fallbacks"]
690+
691+
## KEY-LEVEL METADATA
692+
data = LiteLLMProxyRequestSetup.add_management_endpoint_metadata_to_request_metadata(
693+
data=data,
694+
management_endpoint_metadata=key_metadata,
695+
_metadata_variable_name=_metadata_variable_name,
696+
)
654697
return data
655698

656699
@staticmethod
@@ -889,6 +932,15 @@ async def add_litellm_data_to_request( # noqa: PLR0915
889932
"spend_logs_metadata"
890933
]
891934

935+
## TEAM-LEVEL METADATA
936+
data = (
937+
LiteLLMProxyRequestSetup.add_management_endpoint_metadata_to_request_metadata(
938+
data=data,
939+
management_endpoint_metadata=team_metadata,
940+
_metadata_variable_name=_metadata_variable_name,
941+
)
942+
)
943+
892944
# Team spend, budget - used by prometheus.py
893945
data[_metadata_variable_name][
894946
"user_api_key_team_max_budget"

litellm/proxy/utils.py

Lines changed: 31 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1395,9 +1395,12 @@ async def post_call_success_hook(
13951395
3. /image/generation
13961396
4. /files
13971397
"""
1398+
from litellm.types.guardrails import GuardrailEventHooks
13981399

1399-
for callback in litellm.callbacks:
1400-
try:
1400+
guardrail_callbacks: List[CustomGuardrail] = []
1401+
other_callbacks: List[CustomLogger] = []
1402+
try:
1403+
for callback in litellm.callbacks:
14011404
_callback: Optional[CustomLogger] = None
14021405
if isinstance(callback, str):
14031406
_callback = litellm.litellm_core_utils.litellm_logging.get_custom_logger_compatible_class(
@@ -1407,36 +1410,37 @@ async def post_call_success_hook(
14071410
_callback = callback # type: ignore
14081411

14091412
if _callback is not None:
1413+
if isinstance(_callback, CustomGuardrail):
1414+
guardrail_callbacks.append(_callback)
1415+
else:
1416+
other_callbacks.append(_callback)
14101417
############## Handle Guardrails ########################################
14111418
#############################################################################
1412-
if isinstance(callback, CustomGuardrail):
1413-
# Main - V2 Guardrails implementation
1414-
from litellm.types.guardrails import GuardrailEventHooks
14151419

1416-
if (
1417-
callback.should_run_guardrail(
1418-
data=data, event_type=GuardrailEventHooks.post_call
1419-
)
1420-
is not True
1421-
):
1422-
continue
1420+
for callback in guardrail_callbacks:
1421+
# Main - V2 Guardrails implementation
1422+
if (
1423+
callback.should_run_guardrail(
1424+
data=data, event_type=GuardrailEventHooks.post_call
1425+
)
1426+
is not True
1427+
):
1428+
continue
14231429

1424-
await callback.async_post_call_success_hook(
1425-
user_api_key_dict=user_api_key_dict,
1426-
data=data,
1427-
response=response,
1428-
)
1430+
await callback.async_post_call_success_hook(
1431+
user_api_key_dict=user_api_key_dict,
1432+
data=data,
1433+
response=response,
1434+
)
14291435

1430-
############ Handle CustomLogger ###############################
1431-
#################################################################
1432-
elif isinstance(_callback, CustomLogger):
1433-
await _callback.async_post_call_success_hook(
1434-
user_api_key_dict=user_api_key_dict,
1435-
data=data,
1436-
response=response,
1437-
)
1438-
except Exception as e:
1439-
raise e
1436+
############ Handle CustomLogger ###############################
1437+
#################################################################
1438+
for callback in other_callbacks:
1439+
await callback.async_post_call_success_hook(
1440+
user_api_key_dict=user_api_key_dict, data=data, response=response
1441+
)
1442+
except Exception as e:
1443+
raise e
14401444
return response
14411445

14421446
async def async_post_call_streaming_hook(

litellm/types/integrations/prometheus.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -426,13 +426,13 @@ class PrometheusMetricLabels:
426426

427427
# Buffer monitoring metrics - these typically don't need additional labels
428428
litellm_pod_lock_manager_size: List[str] = []
429-
429+
430430
litellm_in_memory_daily_spend_update_queue_size: List[str] = []
431-
431+
432432
litellm_redis_daily_spend_update_queue_size: List[str] = []
433-
433+
434434
litellm_in_memory_spend_update_queue_size: List[str] = []
435-
435+
436436
litellm_redis_spend_update_queue_size: List[str] = []
437437

438438
@staticmethod

litellm/types/utils.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1867,6 +1867,7 @@ class StandardLoggingUserAPIKeyMetadata(TypedDict):
18671867
user_api_key_team_alias: Optional[str]
18681868
user_api_key_end_user_id: Optional[str]
18691869
user_api_key_request_route: Optional[str]
1870+
user_api_key_auth_metadata: Optional[Dict[str, str]]
18701871

18711872

18721873
class StandardLoggingMCPToolCall(TypedDict, total=False):
@@ -2077,10 +2078,12 @@ class StandardLoggingGuardrailInformation(TypedDict, total=False):
20772078

20782079
StandardLoggingPayloadStatus = Literal["success", "failure"]
20792080

2081+
20802082
class CachingDetails(TypedDict):
20812083
"""
20822084
Track all caching related metrics, fields for a given request
20832085
"""
2086+
20842087
cache_hit: Optional[bool]
20852088
"""
20862089
Whether the request hit the cache
@@ -2090,12 +2093,16 @@ class CachingDetails(TypedDict):
20902093
Duration for reading from cache
20912094
"""
20922095

2096+
20932097
class CostBreakdown(TypedDict):
20942098
"""
20952099
Detailed cost breakdown for a request
20962100
"""
2101+
20972102
input_cost: float # Cost of input/prompt tokens
2098-
output_cost: float # Cost of output/completion tokens (includes reasoning if applicable)
2103+
output_cost: (
2104+
float # Cost of output/completion tokens (includes reasoning if applicable)
2105+
)
20992106
total_cost: float # Total cost (input + output + tool usage)
21002107
tool_usage_cost: float # Cost of usage of built-in tools
21012108

0 commit comments

Comments
 (0)