@@ -131,6 +131,18 @@ class NodeCostSummary:
131131 percent_of_total_cost : float
132132
133133
134+ @dataclass
135+ class CacheCostComparison :
136+ """Comparison of costs with cache vs without cache."""
137+
138+ cost_with_cache : float # Actual cost using cache
139+ cost_without_cache : float # Hypothetical cost if no cache used
140+ total_savings : float # Dollar savings from using cache
141+ savings_percent : float # Percentage savings (0-100)
142+ traces_analyzed : int # Total number of traces analyzed
143+ traces_with_cache : int # Number of traces that used cache
144+
145+
134146@dataclass
135147class CostAnalysisResults :
136148 """Complete cost analysis results."""
@@ -185,17 +197,58 @@ def extract_token_usage(trace: Trace) -> Optional[TokenUsage]:
185197 output_tokens = getattr (trace , "completion_tokens" , 0 ) or 0
186198 total_tokens = trace .total_tokens
187199
200+ # Extract cache tokens from LangChain message format before returning
201+ cached_tokens = None
202+ if trace .outputs is not None and isinstance (trace .outputs , dict ):
203+ if "generations" in trace .outputs :
204+ generations = trace .outputs .get ("generations" , [[]])
205+ if generations and len (generations ) > 0 and len (generations [0 ]) > 0 :
206+ message = generations [0 ][0 ]
207+ if isinstance (message , dict ):
208+ message_obj = message .get ("message" , {})
209+ if isinstance (message_obj , dict ):
210+ kwargs = message_obj .get ("kwargs" , {})
211+ if isinstance (kwargs , dict ):
212+ usage_metadata = kwargs .get ("usage_metadata" , {})
213+ if isinstance (usage_metadata , dict ):
214+ input_token_details = usage_metadata .get (
215+ "input_token_details" , {}
216+ )
217+ if isinstance (input_token_details , dict ):
218+ cached_tokens = input_token_details .get (
219+ "cache_read"
220+ )
221+
188222 return TokenUsage (
189223 input_tokens = input_tokens ,
190224 output_tokens = output_tokens ,
191225 total_tokens = total_tokens ,
192- cached_tokens = None , # Not available at top level
226+ cached_tokens = cached_tokens ,
193227 )
194228
195229 # Try outputs (handle None outputs)
196230 usage_data = None
231+ cached_tokens = None
232+
197233 if trace .outputs is not None :
198- usage_data = trace .outputs .get ("usage_metadata" )
234+ # First try LangChain message format (where cache data lives)
235+ # Path: outputs.generations[0][0].message.kwargs.usage_metadata
236+ if "generations" in trace .outputs :
237+ generations = trace .outputs .get ("generations" , [[]])
238+ if generations and len (generations ) > 0 and len (generations [0 ]) > 0 :
239+ message = generations [0 ][0 ]
240+ if isinstance (message , dict ):
241+ message_obj = message .get ("message" , {})
242+ if isinstance (message_obj , dict ):
243+ kwargs = message_obj .get ("kwargs" , {})
244+ if isinstance (kwargs , dict ):
245+ usage_metadata = kwargs .get ("usage_metadata" , {})
246+ if isinstance (usage_metadata , dict ) and usage_metadata :
247+ usage_data = usage_metadata
248+
249+ # Fallback to direct usage_metadata
250+ if not usage_data :
251+ usage_data = trace .outputs .get ("usage_metadata" )
199252
200253 # Fallback to inputs (handle None inputs)
201254 if not usage_data and trace .inputs is not None :
@@ -210,7 +263,6 @@ def extract_token_usage(trace: Trace) -> Optional[TokenUsage]:
210263 total_tokens = usage_data .get ("total_tokens" , input_tokens + output_tokens )
211264
212265 # Extract cache tokens if available
213- cached_tokens = None
214266 if "input_token_details" in usage_data :
215267 token_details = usage_data ["input_token_details" ]
216268 if isinstance (token_details , dict ):
@@ -430,6 +482,167 @@ def aggregate_node_costs(
430482 return summaries
431483
432484
485+ # ============================================================================
486+ # Cache Effectiveness Functions
487+ # ============================================================================
488+
489+
490+ def calculate_cache_hit_rate (workflow_analyses : List [WorkflowCostAnalysis ]) -> float :
491+ """
492+ Calculate percentage of traces that used cache.
493+
494+ Args:
495+ workflow_analyses: List of WorkflowCostAnalysis objects
496+
497+ Returns:
498+ Percentage of traces with cache data (0-100)
499+ """
500+ if not workflow_analyses :
501+ return 0.0
502+
503+ total_traces = 0
504+ cached_traces = 0
505+
506+ for workflow_analysis in workflow_analyses :
507+ for cost_breakdown in workflow_analysis .node_costs :
508+ total_traces += 1
509+ if cost_breakdown .token_usage .has_cache_data ():
510+ cached_traces += 1
511+
512+ if total_traces == 0 :
513+ return 0.0
514+
515+ return (cached_traces / total_traces ) * 100.0
516+
517+
518+ def calculate_cache_savings (
519+ workflow_analyses : List [WorkflowCostAnalysis ],
520+ pricing_config : PricingConfig ,
521+ ) -> float :
522+ """
523+ Calculate total cost savings from cache usage.
524+
525+ Compares actual cache costs to what costs would have been if cache tokens
526+ were charged at input token rate.
527+
528+ Args:
529+ workflow_analyses: List of WorkflowCostAnalysis objects
530+ pricing_config: Pricing configuration
531+
532+ Returns:
533+ Total savings in dollars from using cache
534+ """
535+ if not workflow_analyses :
536+ return 0.0
537+
538+ if pricing_config .cache_read_per_1k is None :
539+ return 0.0 # Cannot calculate savings without cache pricing
540+
541+ total_savings = 0.0
542+
543+ for workflow_analysis in workflow_analyses :
544+ for cost_breakdown in workflow_analysis .node_costs :
545+ if cost_breakdown .token_usage .has_cache_data ():
546+ cached_tokens = cost_breakdown .token_usage .cached_tokens
547+ if cached_tokens is None :
548+ continue
549+
550+ # Cost if these tokens were charged at input rate
551+ cost_without_cache = (
552+ cached_tokens / 1000.0
553+ ) * pricing_config .input_tokens_per_1k
554+
555+ # Actual cost at cache rate
556+ cost_with_cache = (
557+ cached_tokens / 1000.0
558+ ) * pricing_config .cache_read_per_1k
559+
560+ # Savings is the difference
561+ savings = cost_without_cache - cost_with_cache
562+ total_savings += savings
563+
564+ return total_savings
565+
566+
567+ def compare_cached_vs_fresh_costs (
568+ workflow_analyses : List [WorkflowCostAnalysis ],
569+ pricing_config : PricingConfig ,
570+ ) -> CacheCostComparison :
571+ """
572+ Compare total costs with cache vs hypothetical costs without cache.
573+
574+ Provides detailed breakdown showing actual cost using cache vs what cost
575+ would have been if cache tokens were charged at input token rate.
576+
577+ Args:
578+ workflow_analyses: List of WorkflowCostAnalysis objects
579+ pricing_config: Pricing configuration
580+
581+ Returns:
582+ CacheCostComparison with detailed cost comparison
583+ """
584+ cost_with_cache = 0.0
585+ cost_without_cache = 0.0
586+ traces_analyzed = 0
587+ traces_with_cache = 0
588+
589+ for workflow_analysis in workflow_analyses :
590+ for cost_breakdown in workflow_analysis .node_costs :
591+ traces_analyzed += 1
592+
593+ # Add actual cost (with cache)
594+ cost_with_cache += cost_breakdown .total_cost
595+
596+ # Calculate hypothetical cost without cache
597+ if cost_breakdown .token_usage .has_cache_data ():
598+ traces_with_cache += 1
599+
600+ # If cache pricing available, calculate what cost would have been
601+ if pricing_config .cache_read_per_1k is not None :
602+ cached_tokens = cost_breakdown .token_usage .cached_tokens
603+ if cached_tokens is None :
604+ cost_without_cache += cost_breakdown .total_cost
605+ continue
606+
607+ # Remove actual cache cost
608+ cost_without_this_cache = (
609+ cost_breakdown .total_cost - cost_breakdown .cache_cost
610+ )
611+
612+ # Add what it would cost at input token rate
613+ hypothetical_input_cost = (
614+ cached_tokens / 1000.0
615+ ) * pricing_config .input_tokens_per_1k
616+
617+ cost_without_cache += (
618+ cost_without_this_cache + hypothetical_input_cost
619+ )
620+ else :
621+ # No cache pricing, so cost would be same
622+ cost_without_cache += cost_breakdown .total_cost
623+ else :
624+ # No cache data, cost is same with or without cache
625+ cost_without_cache += cost_breakdown .total_cost
626+
627+ # Calculate savings
628+ total_savings = cost_without_cache - cost_with_cache
629+
630+ # Calculate savings percentage
631+ if cost_without_cache > 0 :
632+ savings_percent = (total_savings / cost_without_cache ) * 100.0
633+ else :
634+ savings_percent = 0.0
635+
636+ return CacheCostComparison (
637+ cost_with_cache = cost_with_cache ,
638+ cost_without_cache = cost_without_cache ,
639+ total_savings = total_savings ,
640+ savings_percent = savings_percent ,
641+ traces_analyzed = traces_analyzed ,
642+ traces_with_cache = traces_with_cache ,
643+ )
644+
645+
433646def analyze_costs (
434647 workflows : List [Workflow ],
435648 pricing_config : PricingConfig ,
@@ -486,9 +699,9 @@ def analyze_costs(
486699 monthly_workflow_estimate = monthly_workflow_estimate ,
487700 )
488701
489- # Cache effectiveness (not yet implemented - Phase 3B extension)
490- cache_effectiveness_percent = None
491- cache_savings_dollars = None
702+ # Cache effectiveness analysis
703+ cache_effectiveness_percent = calculate_cache_hit_rate ( workflow_analyses )
704+ cache_savings_dollars = calculate_cache_savings ( workflow_analyses , pricing_config )
492705
493706 return CostAnalysisResults (
494707 avg_cost_per_workflow = avg_cost ,
0 commit comments