Skip to content

Commit 6f2dc19

Browse files
authored
Merge pull request BerriAI#20214 from cscguochang/feat/bedrock-1hr-tiered-caching-cost
feat(bedrock): add 1hr tiered caching costs for long-context models (BerriAI#18988)
2 parents 1a7fcfb + 76407bc commit 6f2dc19

File tree

2 files changed

+48
-21
lines changed

2 files changed

+48
-21
lines changed

litellm/litellm_core_utils/llm_cost_calc/utils.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,9 @@ def _get_token_base_cost(
215215
cache_creation_tiered_key = (
216216
f"cache_creation_input_token_cost_above_{threshold_str}_tokens"
217217
)
218+
cache_creation_1hr_tiered_key = (
219+
f"cache_creation_input_token_cost_above_1hr_above_{threshold_str}_tokens"
220+
)
218221
cache_read_tiered_key = (
219222
f"cache_read_input_token_cost_above_{threshold_str}_tokens"
220223
)
@@ -229,6 +232,16 @@ def _get_token_base_cost(
229232
),
230233
)
231234

235+
if cache_creation_1hr_tiered_key in model_info:
236+
cache_creation_cost_above_1hr = cast(
237+
float,
238+
_get_cost_per_unit(
239+
model_info,
240+
cache_creation_1hr_tiered_key,
241+
cache_creation_cost_above_1hr,
242+
),
243+
)
244+
232245
if cache_read_tiered_key in model_info:
233246
cache_read_cost = cast(
234247
float,

model_prices_and_context_window.json

Lines changed: 35 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -749,7 +749,7 @@
749749
"anthropic.claude-3-5-sonnet-20240620-v1:0": {
750750
"input_cost_per_token": 3e-06,
751751
"litellm_provider": "bedrock",
752-
"max_input_tokens": 200000,
752+
"max_input_tokens": 1000000,
753753
"max_output_tokens": 4096,
754754
"max_tokens": 4096,
755755
"mode": "chat",
@@ -758,14 +758,22 @@
758758
"supports_pdf_input": true,
759759
"supports_response_schema": true,
760760
"supports_tool_choice": true,
761-
"supports_vision": true
761+
"supports_vision": true,
762+
"input_cost_per_token_above_200k_tokens": 6e-06,
763+
"output_cost_per_token_above_200k_tokens": 3e-05,
764+
"cache_creation_input_token_cost_above_200k_tokens": 7.5e-06,
765+
"cache_read_input_token_cost_above_200k_tokens": 6e-07,
766+
"cache_creation_input_token_cost_above_1hr": 7.5e-06,
767+
"cache_creation_input_token_cost_above_1hr_above_200k_tokens": 1.5e-05,
768+
"cache_creation_input_token_cost": 3.75e-06,
769+
"cache_read_input_token_cost": 3e-07
762770
},
763771
"anthropic.claude-3-5-sonnet-20241022-v2:0": {
764772
"cache_creation_input_token_cost": 3.75e-06,
765773
"cache_read_input_token_cost": 3e-07,
766774
"input_cost_per_token": 3e-06,
767775
"litellm_provider": "bedrock",
768-
"max_input_tokens": 200000,
776+
"max_input_tokens": 1000000,
769777
"max_output_tokens": 8192,
770778
"max_tokens": 8192,
771779
"mode": "chat",
@@ -777,7 +785,13 @@
777785
"supports_prompt_caching": true,
778786
"supports_response_schema": true,
779787
"supports_tool_choice": true,
780-
"supports_vision": true
788+
"supports_vision": true,
789+
"input_cost_per_token_above_200k_tokens": 6e-06,
790+
"output_cost_per_token_above_200k_tokens": 3e-05,
791+
"cache_creation_input_token_cost_above_200k_tokens": 7.5e-06,
792+
"cache_read_input_token_cost_above_200k_tokens": 6e-07,
793+
"cache_creation_input_token_cost_above_1hr": 7.5e-06,
794+
"cache_creation_input_token_cost_above_1hr_above_200k_tokens": 1.5e-05
781795
},
782796
"anthropic.claude-3-7-sonnet-20240620-v1:0": {
783797
"cache_creation_input_token_cost": 4.5e-06,
@@ -24390,21 +24404,21 @@
2439024404
"supports_tool_choice": true
2439124405
},
2439224406
"openrouter/xiaomi/mimo-v2-flash": {
24393-
"input_cost_per_token": 9e-08,
24394-
"output_cost_per_token": 2.9e-07,
24395-
"cache_creation_input_token_cost": 0.0,
24396-
"cache_read_input_token_cost": 0.0,
24397-
"litellm_provider": "openrouter",
24398-
"max_input_tokens": 262144,
24399-
"max_output_tokens": 16384,
24400-
"max_tokens": 16384,
24401-
"mode": "chat",
24402-
"supports_function_calling": true,
24403-
"supports_tool_choice": true,
24404-
"supports_reasoning": true,
24405-
"supports_vision": false,
24406-
"supports_prompt_caching": false
24407-
},
24407+
"input_cost_per_token": 9e-08,
24408+
"output_cost_per_token": 2.9e-07,
24409+
"cache_creation_input_token_cost": 0.0,
24410+
"cache_read_input_token_cost": 0.0,
24411+
"litellm_provider": "openrouter",
24412+
"max_input_tokens": 262144,
24413+
"max_output_tokens": 16384,
24414+
"max_tokens": 16384,
24415+
"mode": "chat",
24416+
"supports_function_calling": true,
24417+
"supports_tool_choice": true,
24418+
"supports_reasoning": true,
24419+
"supports_vision": false,
24420+
"supports_prompt_caching": false
24421+
},
2440824422
"openrouter/z-ai/glm-4.7": {
2440924423
"input_cost_per_token": 4e-07,
2441024424
"output_cost_per_token": 1.5e-06,
@@ -26319,13 +26333,13 @@
2631926333
"litellm_provider": "bedrock",
2632026334
"max_input_tokens": 77,
2632126335
"mode": "image_edit",
26322-
"output_cost_per_image": 0.40
26336+
"output_cost_per_image": 0.4
2632326337
},
2632426338
"stability.stable-creative-upscale-v1:0": {
2632526339
"litellm_provider": "bedrock",
2632626340
"max_input_tokens": 77,
2632726341
"mode": "image_edit",
26328-
"output_cost_per_image": 0.60
26342+
"output_cost_per_image": 0.6
2632926343
},
2633026344
"stability.stable-fast-upscale-v1:0": {
2633126345
"litellm_provider": "bedrock",

0 commit comments

Comments
 (0)