Skip to content

Commit d6800ee

Browse files
feat(prometheus.py): initial working commit of passing team/key metadata as prometheus metrics
Closes LIT-1006
1 parent 6ca7752 commit d6800ee

File tree

6 files changed

+93
-38
lines changed

6 files changed

+93
-38
lines changed

enterprise/litellm_enterprise/integrations/prometheus.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -794,9 +794,16 @@ async def async_log_success_event(self, kwargs, response_obj, start_time, end_ti
794794
output_tokens = standard_logging_payload["completion_tokens"]
795795
tokens_used = standard_logging_payload["total_tokens"]
796796
response_cost = standard_logging_payload["response_cost"]
797-
_requester_metadata = standard_logging_payload["metadata"].get(
797+
_requester_metadata: Optional[dict] = standard_logging_payload["metadata"].get(
798798
"requester_metadata"
799799
)
800+
user_api_key_auth_metadata: Optional[dict] = standard_logging_payload[
801+
"metadata"
802+
].get("user_api_key_auth_metadata")
803+
combined_metadata: Dict[str, Any] = {
804+
**(_requester_metadata if _requester_metadata else {}),
805+
**(user_api_key_auth_metadata if user_api_key_auth_metadata else {}),
806+
}
800807
if standard_logging_payload is not None and isinstance(
801808
standard_logging_payload, dict
802809
):
@@ -828,8 +835,7 @@ async def async_log_success_event(self, kwargs, response_obj, start_time, end_ti
828835
exception_status=None,
829836
exception_class=None,
830837
custom_metadata_labels=get_custom_labels_from_metadata(
831-
metadata=standard_logging_payload["metadata"].get("requester_metadata")
832-
or {}
838+
metadata=combined_metadata
833839
),
834840
route=standard_logging_payload["metadata"].get(
835841
"user_api_key_request_route"

litellm/litellm_core_utils/litellm_logging.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4019,6 +4019,7 @@ def get_standard_logging_metadata(
40194019
usage_object=usage_object,
40204020
requester_custom_headers=None,
40214021
cold_storage_object_key=None,
4022+
user_api_key_auth_metadata=None,
40224023
)
40234024
if isinstance(metadata, dict):
40244025
# Filter the metadata dictionary to include only the specified keys
@@ -4685,6 +4686,7 @@ def get_standard_logging_metadata(
46854686
requester_custom_headers=None,
46864687
user_api_key_request_route=None,
46874688
cold_storage_object_key=None,
4689+
user_api_key_auth_metadata=None,
46884690
)
46894691
if isinstance(metadata, dict):
46904692
# Update the clean_metadata with values from input metadata that match StandardLoggingMetadata fields
Lines changed: 6 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,9 @@
11
model_list:
2-
- model_name: byok-fixed-gpt-4o-mini
2+
- model_name: openai/gpt-4o
33
litellm_params:
4-
model: openai/gpt-4o-mini
5-
api_base: "https://webhook.site/2f385e05-00aa-402b-86d1-efc9261471a5"
6-
api_key: dummy
7-
- model_name: "byok-wildcard/*"
8-
litellm_params:
9-
model: openai/*
10-
- model_name: xai-grok-3
11-
litellm_params:
12-
model: xai/grok-3
13-
- model_name: hosted_vllm/whisper-v3
14-
litellm_params:
15-
model: hosted_vllm/whisper-v3
16-
api_base: "https://webhook.site/2f385e05-00aa-402b-86d1-efc9261471a5"
17-
api_key: dummy
18-
19-
mcp_servers:
20-
github_mcp:
21-
url: "https://api.githubcopilot.com/mcp"
22-
auth_type: oauth2
23-
authorization_url: https://github.com/login/oauth/authorize
24-
token_url: https://github.com/login/oauth/access_token
25-
client_id: os.environ/GITHUB_OAUTH_CLIENT_ID
26-
client_secret: os.environ/GITHUB_OAUTH_CLIENT_SECRET
27-
scopes: ["public_repo", "user:email"]
28-
allowed_tools: ["list_tools"]
29-
# disallowed_tools: ["repo_delete"]
4+
model: openai/gpt-4o
5+
api_key: os.environ/OPENAI_API_KEY
306

7+
litellm_settings:
8+
callbacks: ["prometheus"]
9+
custom_prometheus_metadata_labels: ["metadata.initiative"]

litellm/proxy/_types.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3066,6 +3066,7 @@ class PassThroughEndpointLoggingTypedDict(TypedDict):
30663066
"tags",
30673067
"team_member_key_duration",
30683068
"prompts",
3069+
"logging",
30693070
]
30703071

30713072

litellm/proxy/litellm_pre_call_utils.py

Lines changed: 51 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -579,7 +579,12 @@ def get_sanitized_user_information_from_key(
579579
user_api_key_end_user_id=user_api_key_dict.end_user_id,
580580
user_api_key_user_email=user_api_key_dict.user_email,
581581
user_api_key_request_route=user_api_key_dict.request_route,
582-
user_api_key_budget_reset_at=user_api_key_dict.budget_reset_at.isoformat() if user_api_key_dict.budget_reset_at else None,
582+
user_api_key_budget_reset_at=(
583+
user_api_key_dict.budget_reset_at.isoformat()
584+
if user_api_key_dict.budget_reset_at
585+
else None
586+
),
587+
user_api_key_auth_metadata=None,
583588
)
584589
return user_api_key_logged_metadata
585590

@@ -607,6 +612,35 @@ def add_user_api_key_auth_to_request_metadata(
607612
)
608613
return data
609614

615+
@staticmethod
616+
def add_management_endpoint_metadata_to_request_metadata(
617+
data: dict,
618+
management_endpoint_metadata: dict,
619+
_metadata_variable_name: str,
620+
) -> dict:
621+
"""
622+
Adds the `UserAPIKeyAuth` metadata to the request metadata.
623+
624+
ignore any sensitive fields like logging, api_key, etc.
625+
"""
626+
from litellm.proxy._types import (
627+
LiteLLM_ManagementEndpoint_MetadataFields,
628+
LiteLLM_ManagementEndpoint_MetadataFields_Premium,
629+
)
630+
631+
# ignore any special fields
632+
added_metadata = {}
633+
for k, v in management_endpoint_metadata.items():
634+
if k not in (
635+
LiteLLM_ManagementEndpoint_MetadataFields_Premium
636+
+ LiteLLM_ManagementEndpoint_MetadataFields
637+
):
638+
added_metadata[k] = v
639+
data[_metadata_variable_name].setdefault(
640+
"user_api_key_auth_metadata", {}
641+
).update(added_metadata)
642+
return data
643+
610644
@staticmethod
611645
def add_key_level_controls(
612646
key_metadata: Optional[dict], data: dict, _metadata_variable_name: str
@@ -651,6 +685,13 @@ def add_key_level_controls(
651685
key_metadata["disable_fallbacks"], bool
652686
):
653687
data["disable_fallbacks"] = key_metadata["disable_fallbacks"]
688+
689+
## KEY-LEVEL METADATA
690+
data = LiteLLMProxyRequestSetup.add_management_endpoint_metadata_to_request_metadata(
691+
data=data,
692+
management_endpoint_metadata=key_metadata,
693+
_metadata_variable_name=_metadata_variable_name,
694+
)
654695
return data
655696

656697
@staticmethod
@@ -889,6 +930,15 @@ async def add_litellm_data_to_request( # noqa: PLR0915
889930
"spend_logs_metadata"
890931
]
891932

933+
## TEAM-LEVEL METADATA
934+
data = (
935+
LiteLLMProxyRequestSetup.add_management_endpoint_metadata_to_request_metadata(
936+
data=data,
937+
management_endpoint_metadata=team_metadata,
938+
_metadata_variable_name=_metadata_variable_name,
939+
)
940+
)
941+
892942
# Team spend, budget - used by prometheus.py
893943
data[_metadata_variable_name][
894944
"user_api_key_team_max_budget"

litellm/types/utils.py

Lines changed: 24 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -123,12 +123,18 @@ class ModelInfoBase(ProviderSpecificModelInfo, total=False):
123123
max_output_tokens: Required[Optional[int]]
124124
input_cost_per_token: Required[float]
125125
input_cost_per_token_flex: Optional[float] # OpenAI flex service tier pricing
126-
input_cost_per_token_priority: Optional[float] # OpenAI priority service tier pricing
126+
input_cost_per_token_priority: Optional[
127+
float
128+
] # OpenAI priority service tier pricing
127129
cache_creation_input_token_cost: Optional[float]
128130
cache_creation_input_token_cost_above_1hr: Optional[float]
129131
cache_read_input_token_cost: Optional[float]
130-
cache_read_input_token_cost_flex: Optional[float] # OpenAI flex service tier pricing
131-
cache_read_input_token_cost_priority: Optional[float] # OpenAI priority service tier pricing
132+
cache_read_input_token_cost_flex: Optional[
133+
float
134+
] # OpenAI flex service tier pricing
135+
cache_read_input_token_cost_priority: Optional[
136+
float
137+
] # OpenAI priority service tier pricing
132138
input_cost_per_character: Optional[float] # only for vertex ai models
133139
input_cost_per_audio_token: Optional[float]
134140
input_cost_per_token_above_128k_tokens: Optional[float] # only for vertex ai models
@@ -147,7 +153,9 @@ class ModelInfoBase(ProviderSpecificModelInfo, total=False):
147153
output_cost_per_token_batches: Optional[float]
148154
output_cost_per_token: Required[float]
149155
output_cost_per_token_flex: Optional[float] # OpenAI flex service tier pricing
150-
output_cost_per_token_priority: Optional[float] # OpenAI priority service tier pricing
156+
output_cost_per_token_priority: Optional[
157+
float
158+
] # OpenAI priority service tier pricing
151159
output_cost_per_character: Optional[float] # only for vertex ai models
152160
output_cost_per_audio_token: Optional[float]
153161
output_cost_per_token_above_128k_tokens: Optional[
@@ -1856,6 +1864,7 @@ class StandardLoggingUserAPIKeyMetadata(TypedDict):
18561864
user_api_key_team_alias: Optional[str]
18571865
user_api_key_end_user_id: Optional[str]
18581866
user_api_key_request_route: Optional[str]
1867+
user_api_key_auth_metadata: Optional[Dict[str, str]]
18591868

18601869

18611870
class StandardLoggingMCPToolCall(TypedDict, total=False):
@@ -2059,10 +2068,12 @@ class StandardLoggingGuardrailInformation(TypedDict, total=False):
20592068

20602069
StandardLoggingPayloadStatus = Literal["success", "failure"]
20612070

2071+
20622072
class CachingDetails(TypedDict):
20632073
"""
20642074
Track all caching related metrics, fields for a given request
20652075
"""
2076+
20662077
cache_hit: Optional[bool]
20672078
"""
20682079
Whether the request hit the cache
@@ -2072,12 +2083,16 @@ class CachingDetails(TypedDict):
20722083
Duration for reading from cache
20732084
"""
20742085

2086+
20752087
class CostBreakdown(TypedDict):
20762088
"""
20772089
Detailed cost breakdown for a request
20782090
"""
2091+
20792092
input_cost: float # Cost of input/prompt tokens
2080-
output_cost: float # Cost of output/completion tokens (includes reasoning if applicable)
2093+
output_cost: (
2094+
float # Cost of output/completion tokens (includes reasoning if applicable)
2095+
)
20812096
total_cost: float # Total cost (input + output + tool usage)
20822097
tool_usage_cost: float # Cost of usage of built-in tools
20832098

@@ -2616,6 +2631,7 @@ class SpecialEnums(Enum):
26162631

26172632
class ServiceTier(Enum):
26182633
"""Enum for service tier types used in cost calculations."""
2634+
26192635
FLEX = "flex"
26202636
PRIORITY = "priority"
26212637

@@ -2662,13 +2678,14 @@ class CallbacksByType(TypedDict):
26622678
class PriorityReservationSettings(BaseModel):
26632679
"""
26642680
Settings for priority-based rate limiting reservation.
2665-
2681+
26662682
Defines what priority to assign to keys without explicit priority metadata.
26672683
The priority_reservation mapping is configured separately via litellm.priority_reservation.
26682684
"""
2685+
26692686
default_priority: float = Field(
26702687
default=0.5,
2671-
description="Priority level to assign to API keys without explicit priority metadata. Should match a key in litellm.priority_reservation."
2688+
description="Priority level to assign to API keys without explicit priority metadata. Should match a key in litellm.priority_reservation.",
26722689
)
26732690

26742691
model_config = ConfigDict(protected_namespaces=())

0 commit comments

Comments
 (0)