Fix: Bedrock cross-region inference profile cost calculation (#14566)

timelfrink · web-flow · commit 30c3e7b3d350 · 2025-09-15T07:10:20.000-07:00
* Add tests for Bedrock cross-region inference profile mapping - Test model mapping lookup works correctly - Test proxy cost calculation scenario reproduces original issue - Verify cost calculation returns expected values - Ensure compatibility with existing test patterns * Fix Bedrock cross-region inference profile cost calculation - Add mapping for bedrock/us.anthropic.claude-3-5-haiku-20241022-v1:0 - Sync backup file for local testing consistency - Resolve proxy spend tracking failures for cross-region profiles - Maintain identical configuration with standalone profile Fixes #14458
diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json
@@ -13477,6 +13477,23 @@
         "supports_response_schema": true,
         "supports_tool_choice": true
     },
+    "bedrock/us.anthropic.claude-3-5-haiku-20241022-v1:0": {
+        "max_tokens": 8192,
+        "max_input_tokens": 200000,
+        "max_output_tokens": 8192,
+        "input_cost_per_token": 8e-07,
+        "output_cost_per_token": 4e-06,
+        "cache_creation_input_token_cost": 1e-06,
+        "cache_read_input_token_cost": 8e-08,
+        "litellm_provider": "bedrock",
+        "mode": "chat",
+        "supports_assistant_prefill": true,
+        "supports_pdf_input": true,
+        "supports_function_calling": true,
+        "supports_prompt_caching": true,
+        "supports_response_schema": true,
+        "supports_tool_choice": true
+    },
     "us.anthropic.claude-3-opus-20240229-v1:0": {
         "max_tokens": 4096,
         "max_input_tokens": 200000,
diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json
@@ -13477,6 +13477,23 @@
         "supports_response_schema": true,
         "supports_tool_choice": true
     },
+    "bedrock/us.anthropic.claude-3-5-haiku-20241022-v1:0": {
+        "max_tokens": 8192,
+        "max_input_tokens": 200000,
+        "max_output_tokens": 8192,
+        "input_cost_per_token": 8e-07,
+        "output_cost_per_token": 4e-06,
+        "cache_creation_input_token_cost": 1e-06,
+        "cache_read_input_token_cost": 8e-08,
+        "litellm_provider": "bedrock",
+        "mode": "chat",
+        "supports_assistant_prefill": true,
+        "supports_pdf_input": true,
+        "supports_function_calling": true,
+        "supports_prompt_caching": true,
+        "supports_response_schema": true,
+        "supports_tool_choice": true
+    },
     "us.anthropic.claude-3-opus-20240229-v1:0": {
         "max_tokens": 4096,
         "max_input_tokens": 200000,
diff --git a/tests/test_litellm/llms/bedrock/test_cross_region_inference_profile_mapping.py b/tests/test_litellm/llms/bedrock/test_cross_region_inference_profile_mapping.py
@@ -0,0 +1,43 @@
+"""Test Bedrock cross-region inference profile model mapping"""
+import os
+import sys
+
+sys.path.insert(0, os.path.abspath("../../../.."))
+
+from litellm.utils import _get_model_info_helper
+from litellm.cost_calculator import completion_cost
+from litellm.types.utils import ModelResponse, Usage, Choices, Message
+
+
+def test_bedrock_cross_region_inference_profile_mapping():
+    """Test that bedrock cross-region inference profile model is mapped"""
+    model = "bedrock/us.anthropic.claude-3-5-haiku-20241022-v1:0"
+
+    model_info = _get_model_info_helper(model=model, custom_llm_provider="bedrock")
+
+    assert model_info is not None
+    assert model_info["litellm_provider"] == "bedrock"
+    assert model_info["input_cost_per_token"] == 8e-07
+
+
+def test_proxy_cost_calculation_scenario():
+    """Test exact GitHub issue scenario: proxy cost calculation"""
+    model = "litellm_proxy/bedrock/us.anthropic.claude-3-5-haiku-20241022-v1:0"
+
+    # Test model info lookup works
+    model_info = _get_model_info_helper(model=model, custom_llm_provider="litellm_proxy")
+    assert model_info is not None
+
+    # Test cost calculation works
+    response = ModelResponse(
+        id="test",
+        created=1234567890,
+        model=model,
+        object="chat.completion",
+        choices=[Choices(finish_reason="stop", index=0, message=Message(content="Test", role="assistant"))],
+        usage=Usage(total_tokens=150, prompt_tokens=100, completion_tokens=50),
+    )
+
+    cost = completion_cost(completion_response=response, model=model, custom_llm_provider="litellm_proxy")
+    expected_cost = (100 * 8e-07) + (50 * 4e-06)
+    assert cost == expected_cost