@@ -482,11 +482,10 @@ def test_gemini_25_implicit_caching_cost():
482
482
print (f"✓ Gemini 2.5 implicit caching cost calculation is correct: ${ result :.8f} " )
483
483
484
484
485
-
486
485
def test_log_context_cost_calculation ():
487
486
"""
488
487
Test that log context cost calculation works correctly with tiered pricing.
489
-
488
+
490
489
This test verifies that when using extended context (above 200k tokens),
491
490
the log context costs are calculated using the appropriate tiered rates.
492
491
"""
@@ -520,7 +519,7 @@ def test_log_context_cost_calculation():
520
519
],
521
520
usage = Usage (
522
521
total_tokens = 350000 , # Above 200k threshold
523
- prompt_tokens = 300000 , # Above 200k threshold
522
+ prompt_tokens = 301000 , # Above 200k threshold
524
523
completion_tokens = 50000 ,
525
524
prompt_tokens_details = PromptTokensDetailsWrapper (
526
525
text_tokens = 300000 ,
@@ -529,6 +528,7 @@ def test_log_context_cost_calculation():
529
528
image_tokens = None ,
530
529
character_count = None ,
531
530
video_length_seconds = None ,
531
+ cache_creation_tokens = 1000 ,
532
532
),
533
533
completion_tokens_details = None ,
534
534
_cache_creation_input_tokens = 1000 , # Some tokens added to cache
@@ -544,63 +544,84 @@ def test_log_context_cost_calculation():
544
544
545
545
# Debug: Print the actual result
546
546
print (f"DEBUG: Actual cost result: ${ result :.6f} " )
547
-
547
+
548
548
# Get model info to understand the pricing
549
549
from litellm import get_model_info
550
- model_info = get_model_info (model = "claude-4-sonnet-20250514" , custom_llm_provider = "anthropic" )
551
-
550
+
551
+ model_info = get_model_info (
552
+ model = "claude-4-sonnet-20250514" , custom_llm_provider = "anthropic"
553
+ )
554
+
552
555
# Calculate expected cost based on actual model pricing
553
556
input_cost_per_token = model_info .get ("input_cost_per_token" , 0 )
554
557
output_cost_per_token = model_info .get ("output_cost_per_token" , 0 )
555
558
cache_creation_cost_per_token = model_info .get ("cache_creation_input_token_cost" , 0 )
556
-
559
+
557
560
# Check if tiered pricing is applied
558
- input_cost_above_200k = model_info .get ("input_cost_per_token_above_200k_tokens" , input_cost_per_token )
559
- output_cost_above_200k = model_info .get ("output_cost_per_token_above_200k_tokens" , output_cost_per_token )
560
- cache_creation_above_200k = model_info .get ("cache_creation_input_token_cost_above_200k_tokens" , cache_creation_cost_per_token )
561
-
561
+ input_cost_above_200k = model_info .get (
562
+ "input_cost_per_token_above_200k_tokens" , input_cost_per_token
563
+ )
564
+ output_cost_above_200k = model_info .get (
565
+ "output_cost_per_token_above_200k_tokens" , output_cost_per_token
566
+ )
567
+ cache_creation_above_200k = model_info .get (
568
+ "cache_creation_input_token_cost_above_200k_tokens" ,
569
+ cache_creation_cost_per_token ,
570
+ )
571
+
562
572
print (f"DEBUG: Base input cost per token: ${ input_cost_per_token :.2e} " )
563
573
print (f"DEBUG: Base output cost per token: ${ output_cost_per_token :.2e} " )
564
- print (f"DEBUG: Base cache creation cost per token: ${ cache_creation_cost_per_token :.2e} " )
565
-
574
+ print (
575
+ f"DEBUG: Base cache creation cost per token: ${ cache_creation_cost_per_token :.2e} "
576
+ )
577
+
566
578
# Handle tiered pricing - if not available, use base pricing
567
579
if input_cost_above_200k is not None :
568
- print (f"DEBUG: Tiered input cost per token (>200k): ${ input_cost_above_200k :.2e} " )
580
+ print (
581
+ f"DEBUG: Tiered input cost per token (>200k): ${ input_cost_above_200k :.2e} "
582
+ )
569
583
else :
570
584
print (f"DEBUG: No tiered input pricing available, using base pricing" )
571
585
input_cost_above_200k = input_cost_per_token
572
-
586
+
573
587
if output_cost_above_200k is not None :
574
- print (f"DEBUG: Tiered output cost per token (>200k): ${ output_cost_above_200k :.2e} " )
588
+ print (
589
+ f"DEBUG: Tiered output cost per token (>200k): ${ output_cost_above_200k :.2e} "
590
+ )
575
591
else :
576
592
print (f"DEBUG: No tiered output pricing available, using base pricing" )
577
593
output_cost_above_200k = output_cost_per_token
578
-
594
+
579
595
if cache_creation_above_200k is not None :
580
- print (f"DEBUG: Tiered cache creation cost per token (>200k): ${ cache_creation_above_200k :.2e} " )
596
+ print (
597
+ f"DEBUG: Tiered cache creation cost per token (>200k): ${ cache_creation_above_200k :.2e} "
598
+ )
581
599
else :
582
600
print (f"DEBUG: No tiered cache creation pricing available, using base pricing" )
583
601
cache_creation_above_200k = cache_creation_cost_per_token
584
-
602
+
585
603
# Since we're above 200k tokens, we should use tiered pricing if available
586
604
expected_input_cost = 300000 * input_cost_above_200k
587
605
expected_output_cost = 50000 * output_cost_above_200k
588
606
expected_cache_cost = 1000 * cache_creation_above_200k
589
607
expected_total = expected_input_cost + expected_output_cost + expected_cache_cost
590
-
608
+
591
609
print (f"DEBUG: Expected total: ${ expected_total :.6f} " )
592
-
610
+
593
611
# Allow for small floating point differences
594
612
assert (
595
613
abs (result - expected_total ) < 1e-6
596
614
), f"Expected cost ${ expected_total :.6f} , but got ${ result :.6f} "
597
615
598
- print (f"✓ Log context cost calculation with tiered pricing is correct: ${ result :.6f} " )
616
+ print (
617
+ f"✓ Log context cost calculation with tiered pricing is correct: ${ result :.6f} "
618
+ )
599
619
print (f" - Input tokens (300k): ${ expected_input_cost :.6f} " )
600
620
print (f" - Output tokens (50k): ${ expected_output_cost :.6f} " )
601
621
print (f" - Cache creation (1k): ${ expected_cache_cost :.6f} " )
602
622
print (f" - Total: ${ result :.6f} " )
603
623
624
+
604
625
def test_gemini_25_explicit_caching_cost_direct_usage ():
605
626
"""
606
627
Test that Gemini 2.5 models correctly calculate costs with explicit caching.
0 commit comments