Merge pull request #14797 from BerriAI/litellm_anthopic_token_count_issue

krrishdholakia · web-flow · commit 8a012f9940e7 · 2025-09-23T09:49:20.000-07:00
fix liniting issue
diff --git a/litellm/cost_calculator.py b/litellm/cost_calculator.py
@@ -330,7 +330,9 @@ def cost_per_token(  # noqa: PLR0915
     elif custom_llm_provider == "bedrock":
         return bedrock_cost_per_token(model=model, usage=usage_block)
     elif custom_llm_provider == "openai":
-        return openai_cost_per_token(model=model, usage=usage_block, service_tier=service_tier)
+        return openai_cost_per_token(
+            model=model, usage=usage_block, service_tier=service_tier
+        )
     elif custom_llm_provider == "databricks":
         return databricks_cost_per_token(model=model, usage=usage_block)
     elif custom_llm_provider == "fireworks_ai":
@@ -351,6 +353,7 @@ def cost_per_token(  # noqa: PLR0915
         from litellm.llms.dashscope.cost_calculator import (
             cost_per_token as dashscope_cost_per_token,
         )
+
         return dashscope_cost_per_token(model=model, usage=usage_block)
     else:
         model_info = _cached_get_model_info_helper(
@@ -663,7 +666,7 @@ def completion_cost(  # noqa: PLR0915
             completion_response=completion_response
         )
         rerank_billed_units: Optional[RerankBilledUnits] = None
-        
+
         # Extract service_tier from optional_params if not provided directly
         if service_tier is None and optional_params is not None:
             service_tier = optional_params.get("service_tier")
diff --git a/litellm/litellm_core_utils/litellm_logging.py b/litellm/litellm_core_utils/litellm_logging.py
@@ -1228,7 +1228,9 @@ def _response_cost_calculator(
                 "standard_built_in_tools_params": self.standard_built_in_tools_params,
                 "router_model_id": router_model_id,
                 "litellm_logging_obj": self,
-                "service_tier": self.optional_params.get("service_tier") if self.optional_params else None,
+                "service_tier": self.optional_params.get("service_tier")
+                if self.optional_params
+                else None,
             }
         except Exception as e:  # error creating kwargs for cost calculation
             debug_info = StandardLoggingModelCostFailureDebugInformation(
@@ -4191,16 +4193,22 @@ def _generate_cold_storage_object_key(
 
             # Get the actual s3_path from the configured cold storage logger instance
             s3_path = ""  # default value
-            
+
             # Try to get the actual logger instance from the logger name
             try:
-                custom_logger = litellm.logging_callback_manager.get_active_custom_logger_for_callback_name(configured_cold_storage_logger)
-                if custom_logger and hasattr(custom_logger, 's3_path') and custom_logger.s3_path:
+                custom_logger = litellm.logging_callback_manager.get_active_custom_logger_for_callback_name(
+                    configured_cold_storage_logger
+                )
+                if (
+                    custom_logger
+                    and hasattr(custom_logger, "s3_path")
+                    and custom_logger.s3_path
+                ):
                     s3_path = custom_logger.s3_path
             except Exception:
                 # If any error occurs in getting the logger instance, use default empty s3_path
                 pass
-            
+
             s3_object_key = get_s3_object_key(
                 s3_path=s3_path,  # Use actual s3_path from logger configuration
                 team_alias_prefix="",  # Don't split by team alias for cold storage
diff --git a/litellm/litellm_core_utils/llm_cost_calc/utils.py b/litellm/litellm_core_utils/llm_cost_calc/utils.py
@@ -11,8 +11,8 @@
     ImageResponse,
     ModelInfo,
     PassthroughCallTypes,
-    Usage,
     ServiceTier,
+    Usage,
 )
 from litellm.utils import get_model_info
 
@@ -118,21 +118,21 @@ def _generic_cost_per_character(
 def _get_service_tier_cost_key(base_key: str, service_tier: Optional[str]) -> str:
     """
     Get the appropriate cost key based on service tier.
-    
+
     Args:
         base_key: The base cost key (e.g., "input_cost_per_token")
         service_tier: The service tier ("flex", "priority", or None for standard)
-        
+
     Returns:
         str: The cost key to use (e.g., "input_cost_per_token_flex" or "input_cost_per_token")
     """
     if service_tier is None:
         return base_key
-    
+
     # Only use service tier specific keys for "flex" and "priority"
     if service_tier.lower() in [ServiceTier.FLEX.value, ServiceTier.PRIORITY.value]:
         return f"{base_key}_{service_tier.lower()}"
-    
+
     # For any other service tier, use standard pricing
     return base_key
 
@@ -152,25 +152,23 @@ def _get_token_base_cost(
     # Get service tier aware cost keys
     input_cost_key = _get_service_tier_cost_key("input_cost_per_token", service_tier)
     output_cost_key = _get_service_tier_cost_key("output_cost_per_token", service_tier)
-    cache_creation_cost_key = _get_service_tier_cost_key("cache_creation_input_token_cost", service_tier)
-    cache_read_cost_key = _get_service_tier_cost_key("cache_read_input_token_cost", service_tier)
-    
-    prompt_base_cost = cast(
-        float, _get_cost_per_unit(model_info, input_cost_key)
+    cache_creation_cost_key = _get_service_tier_cost_key(
+        "cache_creation_input_token_cost", service_tier
     )
-    completion_base_cost = cast(
-        float, _get_cost_per_unit(model_info, output_cost_key)
+    cache_read_cost_key = _get_service_tier_cost_key(
+        "cache_read_input_token_cost", service_tier
     )
+
+    prompt_base_cost = cast(float, _get_cost_per_unit(model_info, input_cost_key))
+    completion_base_cost = cast(float, _get_cost_per_unit(model_info, output_cost_key))
     cache_creation_cost = cast(
         float, _get_cost_per_unit(model_info, cache_creation_cost_key)
     )
     cache_creation_cost_above_1hr = cast(
         float,
         _get_cost_per_unit(model_info, "cache_creation_input_token_cost_above_1hr"),
     )
-    cache_read_cost = cast(
-        float, _get_cost_per_unit(model_info, cache_read_cost_key)
-    )
+    cache_read_cost = cast(float, _get_cost_per_unit(model_info, cache_read_cost_key))
 
     ## CHECK IF ABOVE THRESHOLD
     threshold: Optional[float] = None
@@ -183,7 +181,6 @@ def _get_token_base_cost(
                     1000 if "k" in threshold_str else 1
                 )
                 if usage.prompt_tokens > threshold:
-
                     prompt_base_cost = cast(
                         float, _get_cost_per_unit(model_info, key, prompt_base_cost)
                     )
@@ -278,15 +275,15 @@ def _get_cost_per_unit(
             verbose_logger.exception(
                 f"litellm.litellm_core_utils.llm_cost_calc.utils.py::calculate_cost_per_component(): Exception occured - {cost_per_unit}\nDefaulting to 0.0"
             )
-    
+
     # If the service tier key doesn't exist or is None, try to fall back to the standard key
     if cost_per_unit is None:
         # Check if any service tier suffix exists in the cost key using ServiceTier enum
         for service_tier in ServiceTier:
             suffix = f"_{service_tier.value}"
             if suffix in cost_key:
                 # Extract the base key by removing the matched suffix
-                base_key = cost_key.replace(suffix, '')
+                base_key = cost_key.replace(suffix, "")
                 fallback_cost = model_info.get(base_key)
                 if isinstance(fallback_cost, float):
                     return fallback_cost
@@ -300,7 +297,7 @@ def _get_cost_per_unit(
                             f"litellm.litellm_core_utils.llm_cost_calc.utils.py::_get_cost_per_unit(): Exception occured - {fallback_cost}\nDefaulting to 0.0"
                         )
                 break  # Only try the first matching suffix
-    
+
     return default_value
 
 
@@ -495,7 +492,10 @@ def _calculate_input_cost(
 
 
 def generic_cost_per_token(
-    model: str, usage: Usage, custom_llm_provider: str, service_tier: Optional[str] = None
+    model: str,
+    usage: Usage,
+    custom_llm_provider: str,
+    service_tier: Optional[str] = None,
 ) -> Tuple[float, float]:
     """
     Calculates the cost per token for a given model, prompt tokens, and completion tokens.
@@ -547,7 +547,9 @@ def generic_cost_per_token(
         cache_creation_cost,
         cache_creation_cost_above_1hr,
         cache_read_cost,
-    ) = _get_token_base_cost(model_info=model_info, usage=usage, service_tier=service_tier)
+    ) = _get_token_base_cost(
+        model_info=model_info, usage=usage, service_tier=service_tier
+    )
 
     prompt_cost = _calculate_input_cost(
         prompt_tokens_details=prompt_tokens_details,
diff --git a/litellm/llms/openai/cost_calculation.py b/litellm/llms/openai/cost_calculation.py
@@ -18,7 +18,9 @@ def cost_router(call_type: CallTypes) -> Literal["cost_per_token", "cost_per_sec
         return "cost_per_token"
 
 
-def cost_per_token(model: str, usage: Usage, service_tier: Optional[str] = None) -> Tuple[float, float]:
+def cost_per_token(
+    model: str, usage: Usage, service_tier: Optional[str] = None
+) -> Tuple[float, float]:
     """
     Calculates the cost per token for a given model, prompt tokens, and completion tokens.
 
@@ -31,7 +33,10 @@ def cost_per_token(model: str, usage: Usage, service_tier: Optional[str] = None)
     """
     ## CALCULATE INPUT COST
     return generic_cost_per_token(
-        model=model, usage=usage, custom_llm_provider="openai", service_tier=service_tier
+        model=model,
+        usage=usage,
+        custom_llm_provider="openai",
+        service_tier=service_tier,
     )
     # ### Non-cached text tokens
     # non_cached_text_tokens = usage.prompt_tokens
diff --git a/litellm/types/utils.py b/litellm/types/utils.py
@@ -9,30 +9,19 @@
     Literal,
     Mapping,
     Optional,
-    Tuple,
     Union,
 )
 
 import fastuuid as uuid
-from aiohttp import FormData
 from openai._models import BaseModel as OpenAIObject
-from openai.types.audio.transcription_create_params import FileTypes  # type: ignore
-from openai.types.chat.chat_completion import ChatCompletion
 from openai.types.completion_usage import (
     CompletionTokensDetails,
     CompletionUsage,
     PromptTokensDetails,
 )
-from openai.types.moderation import (
-    Categories,
-    CategoryAppliedInputTypes,
-    CategoryScores,
-)
-from openai.types.moderation_create_response import Moderation, ModerationCreateResponse
 from pydantic import BaseModel, ConfigDict, Field, PrivateAttr, model_validator
-from typing_extensions import Callable, Dict, Required, TypedDict, override
+from typing_extensions import Required, TypedDict
 
-import litellm
 from litellm.types.llms.base import (
     BaseLiteLLMOpenAIResponseObject,
     LiteLLMPydanticObjectBase,
@@ -57,7 +46,6 @@
     OpenAIRealtimeStreamList,
     WebSearchOptions,
 )
-from .rerank import RerankResponse
 
 if TYPE_CHECKING:
     from .vector_stores import VectorStoreSearchResponse
@@ -123,12 +111,18 @@ class ModelInfoBase(ProviderSpecificModelInfo, total=False):
     max_output_tokens: Required[Optional[int]]
     input_cost_per_token: Required[float]
     input_cost_per_token_flex: Optional[float]  # OpenAI flex service tier pricing
-    input_cost_per_token_priority: Optional[float]  # OpenAI priority service tier pricing
+    input_cost_per_token_priority: Optional[
+        float
+    ]  # OpenAI priority service tier pricing
     cache_creation_input_token_cost: Optional[float]
     cache_creation_input_token_cost_above_1hr: Optional[float]
     cache_read_input_token_cost: Optional[float]
-    cache_read_input_token_cost_flex: Optional[float]  # OpenAI flex service tier pricing
-    cache_read_input_token_cost_priority: Optional[float]  # OpenAI priority service tier pricing
+    cache_read_input_token_cost_flex: Optional[
+        float
+    ]  # OpenAI flex service tier pricing
+    cache_read_input_token_cost_priority: Optional[
+        float
+    ]  # OpenAI priority service tier pricing
     input_cost_per_character: Optional[float]  # only for vertex ai models
     input_cost_per_audio_token: Optional[float]
     input_cost_per_token_above_128k_tokens: Optional[float]  # only for vertex ai models
@@ -147,7 +141,9 @@ class ModelInfoBase(ProviderSpecificModelInfo, total=False):
     output_cost_per_token_batches: Optional[float]
     output_cost_per_token: Required[float]
     output_cost_per_token_flex: Optional[float]  # OpenAI flex service tier pricing
-    output_cost_per_token_priority: Optional[float]  # OpenAI priority service tier pricing
+    output_cost_per_token_priority: Optional[
+        float
+    ]  # OpenAI priority service tier pricing
     output_cost_per_character: Optional[float]  # only for vertex ai models
     output_cost_per_audio_token: Optional[float]
     output_cost_per_token_above_128k_tokens: Optional[
@@ -1141,9 +1137,6 @@ def __init__(self, **kwargs):
         super().__init__(**kwargs)
 
 
-from openai.types.chat import ChatCompletionChunk
-
-
 class ModelResponseBase(OpenAIObject):
     id: str
     """A unique identifier for the completion."""
@@ -2592,6 +2585,7 @@ class SpecialEnums(Enum):
 
 class ServiceTier(Enum):
     """Enum for service tier types used in cost calculations."""
+
     FLEX = "flex"
     PRIORITY = "priority"
 
diff --git a/litellm/utils.py b/litellm/utils.py