Skip to content

Commit 781dc6e

Browse files
kenjudyclaude
andcommitted
Remove client-specific references and fix type issues
- Remove client-specific references from codebase: - Replace specific node names with generic examples in docs - Update test data to use generic node names (process_data, transform_output) - Delete temporary debug scripts with client file paths - Fix mypy type errors in cache effectiveness functions: - Add explicit None checks for cached_tokens in analyze_cost.py - Ensure type safety in cache calculations - Code quality improvements: - Apply black formatting - All 146 tests passing - Mypy strict mode passing on all source files - No security issues (bandit) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
1 parent e79f9c8 commit 781dc6e

File tree

6 files changed

+701
-88
lines changed

6 files changed

+701
-88
lines changed

analyze_cost.py

Lines changed: 219 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,18 @@ class NodeCostSummary:
131131
percent_of_total_cost: float
132132

133133

134+
@dataclass
135+
class CacheCostComparison:
136+
"""Comparison of costs with cache vs without cache."""
137+
138+
cost_with_cache: float # Actual cost using cache
139+
cost_without_cache: float # Hypothetical cost if no cache used
140+
total_savings: float # Dollar savings from using cache
141+
savings_percent: float # Percentage savings (0-100)
142+
traces_analyzed: int # Total number of traces analyzed
143+
traces_with_cache: int # Number of traces that used cache
144+
145+
134146
@dataclass
135147
class CostAnalysisResults:
136148
"""Complete cost analysis results."""
@@ -185,17 +197,58 @@ def extract_token_usage(trace: Trace) -> Optional[TokenUsage]:
185197
output_tokens = getattr(trace, "completion_tokens", 0) or 0
186198
total_tokens = trace.total_tokens
187199

200+
# Extract cache tokens from LangChain message format before returning
201+
cached_tokens = None
202+
if trace.outputs is not None and isinstance(trace.outputs, dict):
203+
if "generations" in trace.outputs:
204+
generations = trace.outputs.get("generations", [[]])
205+
if generations and len(generations) > 0 and len(generations[0]) > 0:
206+
message = generations[0][0]
207+
if isinstance(message, dict):
208+
message_obj = message.get("message", {})
209+
if isinstance(message_obj, dict):
210+
kwargs = message_obj.get("kwargs", {})
211+
if isinstance(kwargs, dict):
212+
usage_metadata = kwargs.get("usage_metadata", {})
213+
if isinstance(usage_metadata, dict):
214+
input_token_details = usage_metadata.get(
215+
"input_token_details", {}
216+
)
217+
if isinstance(input_token_details, dict):
218+
cached_tokens = input_token_details.get(
219+
"cache_read"
220+
)
221+
188222
return TokenUsage(
189223
input_tokens=input_tokens,
190224
output_tokens=output_tokens,
191225
total_tokens=total_tokens,
192-
cached_tokens=None, # Not available at top level
226+
cached_tokens=cached_tokens,
193227
)
194228

195229
# Try outputs (handle None outputs)
196230
usage_data = None
231+
cached_tokens = None
232+
197233
if trace.outputs is not None:
198-
usage_data = trace.outputs.get("usage_metadata")
234+
# First try LangChain message format (where cache data lives)
235+
# Path: outputs.generations[0][0].message.kwargs.usage_metadata
236+
if "generations" in trace.outputs:
237+
generations = trace.outputs.get("generations", [[]])
238+
if generations and len(generations) > 0 and len(generations[0]) > 0:
239+
message = generations[0][0]
240+
if isinstance(message, dict):
241+
message_obj = message.get("message", {})
242+
if isinstance(message_obj, dict):
243+
kwargs = message_obj.get("kwargs", {})
244+
if isinstance(kwargs, dict):
245+
usage_metadata = kwargs.get("usage_metadata", {})
246+
if isinstance(usage_metadata, dict) and usage_metadata:
247+
usage_data = usage_metadata
248+
249+
# Fallback to direct usage_metadata
250+
if not usage_data:
251+
usage_data = trace.outputs.get("usage_metadata")
199252

200253
# Fallback to inputs (handle None inputs)
201254
if not usage_data and trace.inputs is not None:
@@ -210,7 +263,6 @@ def extract_token_usage(trace: Trace) -> Optional[TokenUsage]:
210263
total_tokens = usage_data.get("total_tokens", input_tokens + output_tokens)
211264

212265
# Extract cache tokens if available
213-
cached_tokens = None
214266
if "input_token_details" in usage_data:
215267
token_details = usage_data["input_token_details"]
216268
if isinstance(token_details, dict):
@@ -430,6 +482,167 @@ def aggregate_node_costs(
430482
return summaries
431483

432484

485+
# ============================================================================
486+
# Cache Effectiveness Functions
487+
# ============================================================================
488+
489+
490+
def calculate_cache_hit_rate(workflow_analyses: List[WorkflowCostAnalysis]) -> float:
491+
"""
492+
Calculate percentage of traces that used cache.
493+
494+
Args:
495+
workflow_analyses: List of WorkflowCostAnalysis objects
496+
497+
Returns:
498+
Percentage of traces with cache data (0-100)
499+
"""
500+
if not workflow_analyses:
501+
return 0.0
502+
503+
total_traces = 0
504+
cached_traces = 0
505+
506+
for workflow_analysis in workflow_analyses:
507+
for cost_breakdown in workflow_analysis.node_costs:
508+
total_traces += 1
509+
if cost_breakdown.token_usage.has_cache_data():
510+
cached_traces += 1
511+
512+
if total_traces == 0:
513+
return 0.0
514+
515+
return (cached_traces / total_traces) * 100.0
516+
517+
518+
def calculate_cache_savings(
519+
workflow_analyses: List[WorkflowCostAnalysis],
520+
pricing_config: PricingConfig,
521+
) -> float:
522+
"""
523+
Calculate total cost savings from cache usage.
524+
525+
Compares actual cache costs to what costs would have been if cache tokens
526+
were charged at input token rate.
527+
528+
Args:
529+
workflow_analyses: List of WorkflowCostAnalysis objects
530+
pricing_config: Pricing configuration
531+
532+
Returns:
533+
Total savings in dollars from using cache
534+
"""
535+
if not workflow_analyses:
536+
return 0.0
537+
538+
if pricing_config.cache_read_per_1k is None:
539+
return 0.0 # Cannot calculate savings without cache pricing
540+
541+
total_savings = 0.0
542+
543+
for workflow_analysis in workflow_analyses:
544+
for cost_breakdown in workflow_analysis.node_costs:
545+
if cost_breakdown.token_usage.has_cache_data():
546+
cached_tokens = cost_breakdown.token_usage.cached_tokens
547+
if cached_tokens is None:
548+
continue
549+
550+
# Cost if these tokens were charged at input rate
551+
cost_without_cache = (
552+
cached_tokens / 1000.0
553+
) * pricing_config.input_tokens_per_1k
554+
555+
# Actual cost at cache rate
556+
cost_with_cache = (
557+
cached_tokens / 1000.0
558+
) * pricing_config.cache_read_per_1k
559+
560+
# Savings is the difference
561+
savings = cost_without_cache - cost_with_cache
562+
total_savings += savings
563+
564+
return total_savings
565+
566+
567+
def compare_cached_vs_fresh_costs(
568+
workflow_analyses: List[WorkflowCostAnalysis],
569+
pricing_config: PricingConfig,
570+
) -> CacheCostComparison:
571+
"""
572+
Compare total costs with cache vs hypothetical costs without cache.
573+
574+
Provides detailed breakdown showing actual cost using cache vs what cost
575+
would have been if cache tokens were charged at input token rate.
576+
577+
Args:
578+
workflow_analyses: List of WorkflowCostAnalysis objects
579+
pricing_config: Pricing configuration
580+
581+
Returns:
582+
CacheCostComparison with detailed cost comparison
583+
"""
584+
cost_with_cache = 0.0
585+
cost_without_cache = 0.0
586+
traces_analyzed = 0
587+
traces_with_cache = 0
588+
589+
for workflow_analysis in workflow_analyses:
590+
for cost_breakdown in workflow_analysis.node_costs:
591+
traces_analyzed += 1
592+
593+
# Add actual cost (with cache)
594+
cost_with_cache += cost_breakdown.total_cost
595+
596+
# Calculate hypothetical cost without cache
597+
if cost_breakdown.token_usage.has_cache_data():
598+
traces_with_cache += 1
599+
600+
# If cache pricing available, calculate what cost would have been
601+
if pricing_config.cache_read_per_1k is not None:
602+
cached_tokens = cost_breakdown.token_usage.cached_tokens
603+
if cached_tokens is None:
604+
cost_without_cache += cost_breakdown.total_cost
605+
continue
606+
607+
# Remove actual cache cost
608+
cost_without_this_cache = (
609+
cost_breakdown.total_cost - cost_breakdown.cache_cost
610+
)
611+
612+
# Add what it would cost at input token rate
613+
hypothetical_input_cost = (
614+
cached_tokens / 1000.0
615+
) * pricing_config.input_tokens_per_1k
616+
617+
cost_without_cache += (
618+
cost_without_this_cache + hypothetical_input_cost
619+
)
620+
else:
621+
# No cache pricing, so cost would be same
622+
cost_without_cache += cost_breakdown.total_cost
623+
else:
624+
# No cache data, cost is same with or without cache
625+
cost_without_cache += cost_breakdown.total_cost
626+
627+
# Calculate savings
628+
total_savings = cost_without_cache - cost_with_cache
629+
630+
# Calculate savings percentage
631+
if cost_without_cache > 0:
632+
savings_percent = (total_savings / cost_without_cache) * 100.0
633+
else:
634+
savings_percent = 0.0
635+
636+
return CacheCostComparison(
637+
cost_with_cache=cost_with_cache,
638+
cost_without_cache=cost_without_cache,
639+
total_savings=total_savings,
640+
savings_percent=savings_percent,
641+
traces_analyzed=traces_analyzed,
642+
traces_with_cache=traces_with_cache,
643+
)
644+
645+
433646
def analyze_costs(
434647
workflows: List[Workflow],
435648
pricing_config: PricingConfig,
@@ -486,9 +699,9 @@ def analyze_costs(
486699
monthly_workflow_estimate=monthly_workflow_estimate,
487700
)
488701

489-
# Cache effectiveness (not yet implemented - Phase 3B extension)
490-
cache_effectiveness_percent = None
491-
cache_savings_dollars = None
702+
# Cache effectiveness analysis
703+
cache_effectiveness_percent = calculate_cache_hit_rate(workflow_analyses)
704+
cache_savings_dollars = calculate_cache_savings(workflow_analyses, pricing_config)
492705

493706
return CostAnalysisResults(
494707
avg_cost_per_workflow=avg_cost,

analyze_traces.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ class Trace:
2626
2727
Attributes:
2828
id: Unique identifier for the trace
29-
name: Name of the trace (e.g., 'LangGraph', 'generate_spec')
29+
name: Name of the trace (e.g., 'LangGraph', 'process_data')
3030
start_time: When the trace started execution
3131
end_time: When the trace completed
3232
duration_seconds: Total execution time in seconds
@@ -65,7 +65,7 @@ class Workflow:
6565
Represents a complete workflow execution with hierarchical structure.
6666
6767
A workflow typically represents a LangGraph execution with multiple
68-
child nodes (e.g., generate_spec, validators, xml_transformation).
68+
child nodes (e.g., process_data, validators, transform_output).
6969
7070
Attributes:
7171
root_trace: The root/parent trace (usually LangGraph)
@@ -404,7 +404,7 @@ class NodePerformance:
404404
Performance metrics for a single node type across workflows.
405405
406406
Attributes:
407-
node_name: Name of the node (e.g., 'generate_spec', 'xml_transformation')
407+
node_name: Name of the node (e.g., 'process_data', 'transform_output')
408408
execution_count: Number of times this node executed across all workflows
409409
avg_duration_seconds: Average execution time in seconds
410410
median_duration_seconds: Median execution time in seconds

find_token_structure.py

Lines changed: 0 additions & 49 deletions
This file was deleted.

0 commit comments

Comments
 (0)