Skip to content

Commit 338bfcd

Browse files
fix(proxy_server.py): handle decrypting model list from db, when litellm params is base model
1 parent 54095a2 commit 338bfcd

File tree

3 files changed

+63
-38
lines changed

3 files changed

+63
-38
lines changed

litellm/model_prices_and_context_window_backup.json

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4801,6 +4801,10 @@
48014801
"cache_creation_input_token_cost": 3.75e-06,
48024802
"cache_read_input_token_cost": 3e-07,
48034803
"input_cost_per_token": 3e-06,
4804+
"input_cost_per_token_above_200k_tokens": 6e-06,
4805+
"output_cost_per_token_above_200k_tokens": 2.25e-05,
4806+
"cache_creation_input_token_cost_above_200k_tokens": 7.5e-06,
4807+
"cache_read_input_token_cost_above_200k_tokens": 6e-07,
48044808
"litellm_provider": "anthropic",
48054809
"max_input_tokens": 200000,
48064810
"max_output_tokens": 64000,
@@ -4827,6 +4831,10 @@
48274831
"cache_creation_input_token_cost": 3.75e-06,
48284832
"cache_read_input_token_cost": 3e-07,
48294833
"input_cost_per_token": 3e-06,
4834+
"input_cost_per_token_above_200k_tokens": 6e-06,
4835+
"output_cost_per_token_above_200k_tokens": 2.25e-05,
4836+
"cache_creation_input_token_cost_above_200k_tokens": 7.5e-06,
4837+
"cache_read_input_token_cost_above_200k_tokens": 6e-07,
48304838
"litellm_provider": "anthropic",
48314839
"max_input_tokens": 200000,
48324840
"max_output_tokens": 64000,
@@ -19720,6 +19728,10 @@
1972019728
"cache_creation_input_token_cost": 3.75e-06,
1972119729
"cache_read_input_token_cost": 3e-07,
1972219730
"input_cost_per_token": 3e-06,
19731+
"input_cost_per_token_above_200k_tokens": 6e-06,
19732+
"output_cost_per_token_above_200k_tokens": 2.25e-05,
19733+
"cache_creation_input_token_cost_above_200k_tokens": 7.5e-06,
19734+
"cache_read_input_token_cost_above_200k_tokens": 6e-07,
1972319735
"litellm_provider": "bedrock_converse",
1972419736
"max_input_tokens": 200000,
1972519737
"max_output_tokens": 64000,
@@ -21086,6 +21098,10 @@
2108621098
"cache_creation_input_token_cost": 3.75e-06,
2108721099
"cache_read_input_token_cost": 3e-07,
2108821100
"input_cost_per_token": 3e-06,
21101+
"input_cost_per_token_above_200k_tokens": 6e-06,
21102+
"output_cost_per_token_above_200k_tokens": 2.25e-05,
21103+
"cache_creation_input_token_cost_above_200k_tokens": 7.5e-06,
21104+
"cache_read_input_token_cost_above_200k_tokens": 6e-07,
2108921105
"input_cost_per_token_batches": 1.5e-06,
2109021106
"litellm_provider": "vertex_ai-anthropic_models",
2109121107
"max_input_tokens": 200000,
@@ -21108,6 +21124,10 @@
2110821124
"cache_creation_input_token_cost": 3.75e-06,
2110921125
"cache_read_input_token_cost": 3e-07,
2111021126
"input_cost_per_token": 3e-06,
21127+
"input_cost_per_token_above_200k_tokens": 6e-06,
21128+
"output_cost_per_token_above_200k_tokens": 2.25e-05,
21129+
"cache_creation_input_token_cost_above_200k_tokens": 7.5e-06,
21130+
"cache_read_input_token_cost_above_200k_tokens": 6e-07,
2111121131
"input_cost_per_token_batches": 1.5e-06,
2111221132
"litellm_provider": "vertex_ai-anthropic_models",
2111321133
"max_input_tokens": 200000,

litellm/proxy/common_utils/encrypt_decrypt_utils.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ def decrypt_value_helper(
3939
value: str,
4040
key: str, # this is just for debug purposes, showing the k,v pair that's invalid. not a signing key.
4141
exception_type: Literal["debug", "error"] = "error",
42+
return_original_value: bool = False,
4243
):
4344
signing_key = _get_salt_key()
4445

@@ -55,14 +56,14 @@ def decrypt_value_helper(
5556
error_message = f"Error decrypting value for key: {key}, Did your master_key/salt key change recently? \nError: {str(e)}\nSet permanent salt key - https://docs.litellm.ai/docs/proxy/prod#5-set-litellm-salt-key"
5657
if exception_type == "debug":
5758
verbose_proxy_logger.debug(error_message)
58-
return None
59+
return value if return_original_value else None
5960

6061
verbose_proxy_logger.debug(
6162
f"Unable to decrypt value={value} for key: {key}, returning None"
6263
)
6364
verbose_proxy_logger.exception(error_message)
6465
# [Non-Blocking Exception. - this should not block decrypting other values]
65-
return None
66+
return value if return_original_value else None
6667

6768

6869
def encrypt_value(value: str, signing_key: str):

litellm/proxy/proxy_server.py

Lines changed: 40 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -253,9 +253,7 @@ def generate_feedback_box():
253253
from litellm.proxy.management_endpoints.internal_user_endpoints import (
254254
router as internal_user_router,
255255
)
256-
from litellm.proxy.management_endpoints.internal_user_endpoints import (
257-
user_update,
258-
)
256+
from litellm.proxy.management_endpoints.internal_user_endpoints import user_update
259257
from litellm.proxy.management_endpoints.key_management_endpoints import (
260258
delete_verification_tokens,
261259
duration_in_seconds,
@@ -302,9 +300,7 @@ def generate_feedback_box():
302300
from litellm.proxy.openai_files_endpoints.files_endpoints import (
303301
router as openai_files_router,
304302
)
305-
from litellm.proxy.openai_files_endpoints.files_endpoints import (
306-
set_files_config,
307-
)
303+
from litellm.proxy.openai_files_endpoints.files_endpoints import set_files_config
308304
from litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints import (
309305
passthrough_endpoint_router,
310306
)
@@ -467,9 +463,9 @@ def generate_feedback_box():
467463
server_root_path = os.getenv("SERVER_ROOT_PATH", "")
468464
_license_check = LicenseCheck()
469465
premium_user: bool = _license_check.is_premium()
470-
premium_user_data: Optional[
471-
"EnterpriseLicenseData"
472-
] = _license_check.airgapped_license_data
466+
premium_user_data: Optional["EnterpriseLicenseData"] = (
467+
_license_check.airgapped_license_data
468+
)
473469
global_max_parallel_request_retries_env: Optional[str] = os.getenv(
474470
"LITELLM_GLOBAL_MAX_PARALLEL_REQUEST_RETRIES"
475471
)
@@ -966,9 +962,9 @@ def swagger_monkey_patch(*args, **kwargs):
966962
dual_cache=user_api_key_cache
967963
)
968964
litellm.logging_callback_manager.add_litellm_callback(model_max_budget_limiter)
969-
redis_usage_cache: Optional[
970-
RedisCache
971-
] = None # redis cache used for tracking spend, tpm/rpm limits
965+
redis_usage_cache: Optional[RedisCache] = (
966+
None # redis cache used for tracking spend, tpm/rpm limits
967+
)
972968
user_custom_auth = None
973969
user_custom_key_generate = None
974970
user_custom_sso = None
@@ -1299,9 +1295,9 @@ async def _update_team_cache():
12991295
_id = "team_id:{}".format(team_id)
13001296
try:
13011297
# Fetch the existing cost for the given user
1302-
existing_spend_obj: Optional[
1303-
LiteLLM_TeamTable
1304-
] = await user_api_key_cache.async_get_cache(key=_id)
1298+
existing_spend_obj: Optional[LiteLLM_TeamTable] = (
1299+
await user_api_key_cache.async_get_cache(key=_id)
1300+
)
13051301
if existing_spend_obj is None:
13061302
# do nothing if team not in api key cache
13071303
return
@@ -1878,9 +1874,7 @@ async def load_config( # noqa: PLR0915
18781874
f"{blue_color_code}Set Global BitBucket Config on LiteLLM Proxy{reset_color_code}"
18791875
)
18801876
elif key == "global_gitlab_config":
1881-
from litellm.integrations.gitlab import (
1882-
set_global_gitlab_config,
1883-
)
1877+
from litellm.integrations.gitlab import set_global_gitlab_config
18841878

18851879
set_global_gitlab_config(value)
18861880
verbose_proxy_logger.info(
@@ -2541,10 +2535,14 @@ def decrypt_model_list_from_db(self, new_models: list) -> list:
25412535
_model_list: list = []
25422536
for m in new_models:
25432537
_litellm_params = m.litellm_params
2538+
if isinstance(_litellm_params, BaseModel):
2539+
_litellm_params = _litellm_params.model_dump()
25442540
if isinstance(_litellm_params, dict):
25452541
# decrypt values
25462542
for k, v in _litellm_params.items():
2547-
decrypted_value = decrypt_value_helper(value=v, key=k)
2543+
decrypted_value = decrypt_value_helper(
2544+
value=v, key=k, return_original_value=True
2545+
)
25482546
_litellm_params[k] = decrypted_value
25492547
_litellm_params = LiteLLM_Params(**_litellm_params)
25502548
else:
@@ -2628,7 +2626,7 @@ def _add_callback_from_db_to_in_memory_litellm_callbacks(
26282626
) -> None:
26292627
"""
26302628
Helper method to add a single callback to litellm for specified event types.
2631-
2629+
26322630
Args:
26332631
callback: The callback name to add
26342632
event_types: List of event types (e.g., ["success"], ["failure"], or ["success", "failure"])
@@ -3153,10 +3151,10 @@ async def _init_guardrails_in_db(self, prisma_client: PrismaClient):
31533151
)
31543152

31553153
try:
3156-
guardrails_in_db: List[
3157-
Guardrail
3158-
] = await GuardrailRegistry.get_all_guardrails_from_db(
3159-
prisma_client=prisma_client
3154+
guardrails_in_db: List[Guardrail] = (
3155+
await GuardrailRegistry.get_all_guardrails_from_db(
3156+
prisma_client=prisma_client
3157+
)
31603158
)
31613159
verbose_proxy_logger.debug(
31623160
"guardrails from the DB %s", str(guardrails_in_db)
@@ -3386,9 +3384,9 @@ async def initialize( # noqa: PLR0915
33863384
user_api_base = api_base
33873385
dynamic_config[user_model]["api_base"] = api_base
33883386
if api_version:
3389-
os.environ[
3390-
"AZURE_API_VERSION"
3391-
] = api_version # set this for azure - litellm can read this from the env
3387+
os.environ["AZURE_API_VERSION"] = (
3388+
api_version # set this for azure - litellm can read this from the env
3389+
)
33923390
if max_tokens: # model-specific param
33933391
dynamic_config[user_model]["max_tokens"] = max_tokens
33943392
if temperature: # model-specific param
@@ -3888,10 +3886,10 @@ async def _initialize_spend_tracking_background_jobs(
38883886
LITELLM_KEY_ROTATION_CHECK_INTERVAL_SECONDS,
38893887
LITELLM_KEY_ROTATION_ENABLED,
38903888
)
3891-
3889+
38923890
key_rotation_enabled: Optional[bool] = str_to_bool(LITELLM_KEY_ROTATION_ENABLED)
38933891
verbose_proxy_logger.debug(f"key_rotation_enabled: {key_rotation_enabled}")
3894-
3892+
38953893
if key_rotation_enabled is True:
38963894
try:
38973895
from litellm.proxy.common_utils.key_rotation_manager import (
@@ -3902,19 +3900,25 @@ async def _initialize_spend_tracking_background_jobs(
39023900
global prisma_client
39033901
if prisma_client is not None:
39043902
key_rotation_manager = KeyRotationManager(prisma_client)
3905-
verbose_proxy_logger.debug(f"Key rotation background job scheduled every {LITELLM_KEY_ROTATION_CHECK_INTERVAL_SECONDS} seconds (LITELLM_KEY_ROTATION_ENABLED=true)")
3903+
verbose_proxy_logger.debug(
3904+
f"Key rotation background job scheduled every {LITELLM_KEY_ROTATION_CHECK_INTERVAL_SECONDS} seconds (LITELLM_KEY_ROTATION_ENABLED=true)"
3905+
)
39063906
scheduler.add_job(
39073907
key_rotation_manager.process_rotations,
39083908
"interval",
39093909
seconds=LITELLM_KEY_ROTATION_CHECK_INTERVAL_SECONDS,
3910-
id="key_rotation_job"
3910+
id="key_rotation_job",
39113911
)
39123912
else:
3913-
verbose_proxy_logger.warning("Key rotation enabled but prisma_client not available")
3913+
verbose_proxy_logger.warning(
3914+
"Key rotation enabled but prisma_client not available"
3915+
)
39143916
except Exception as e:
39153917
verbose_proxy_logger.warning(f"Failed to setup key rotation job: {e}")
39163918
else:
3917-
verbose_proxy_logger.debug("Key rotation disabled (set LITELLM_KEY_ROTATION_ENABLED=true to enable)")
3919+
verbose_proxy_logger.debug(
3920+
"Key rotation disabled (set LITELLM_KEY_ROTATION_ENABLED=true to enable)"
3921+
)
39183922

39193923
@classmethod
39203924
async def _setup_prisma_client(
@@ -8745,9 +8749,9 @@ async def get_config_list(
87458749
hasattr(sub_field_info, "description")
87468750
and sub_field_info.description is not None
87478751
):
8748-
nested_fields[
8749-
idx
8750-
].field_description = sub_field_info.description
8752+
nested_fields[idx].field_description = (
8753+
sub_field_info.description
8754+
)
87518755
idx += 1
87528756

87538757
_stored_in_db = None

0 commit comments

Comments
 (0)