Skip to content

Commit 3fc61a4

Browse files
tekacsclaude
andcommitted
Fix cache pricing logic and update documentation
- Fix cache pricing to only apply when -c flag is used (not just when model supports caching) - Change cached parameter to Option<u64> to distinguish between no flag vs -c 0 - Update README with correct pricing examples and clear explanation of cache vs no-cache behavior - Document that -c flag presence enables caching pricing rules regardless of value 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
1 parent 589d308 commit 3fc61a4

File tree

2 files changed

+47
-15
lines changed

2 files changed

+47
-15
lines changed

README.md

Lines changed: 32 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ Cost calculation: 10000 input + 200 output
6767
6868
Model | Input | Output | Total
6969
------------------------+-----------+-----------+----------
70-
anthropic/claude-opus-4 | $0.000000 | $0.015000 | $0.202500
70+
anthropic/claude-opus-4 | $0.150000 | $0.015000 | $0.165000
7171
```
7272

7373
With cached tokens (uses 5-minute TTL by default):
@@ -90,6 +90,36 @@ With 1-hour cache TTL (higher write costs):
9090
llm-pricing calc 10000 200 -c 9500 --ttl 60 opus-4
9191
```
9292

93+
#### Understanding Cache vs No-Cache Pricing
94+
95+
The `-c` flag indicates you're using caching rules, which affects pricing even when no tokens are cached:
96+
97+
**Without `-c` flag (no caching):**
98+
```bash
99+
llm-pricing calc 10000 200 opus-4
100+
```
101+
```
102+
Cost calculation: 10000 input + 200 output
103+
104+
Model | Input | Output | Total
105+
------------------------+-----------+-----------+----------
106+
anthropic/claude-opus-4 | $0.150000 | $0.015000 | $0.165000
107+
```
108+
109+
**With `-c 0` flag (using caching, 0 cached tokens):**
110+
```bash
111+
llm-pricing calc 10000 200 -c 0 opus-4
112+
```
113+
```
114+
Cost calculation: 10000 input + 200 output
115+
116+
Model | Input | Output | Cache Read | Cache Write | Total
117+
------------------------+-----------+-----------+------------+-------------+----------
118+
anthropic/claude-opus-4 | $0.000000 | $0.015000 | $0.000000 | $0.187500 | $0.202500
119+
```
120+
121+
When using caching (`-c` flag), all new tokens are written to cache at cache write prices (1.25x base price for 5-minute TTL), which replaces the regular input cost.
122+
93123
### List Models
94124

95125
#### Basic Usage
@@ -222,7 +252,7 @@ Arguments:
222252
[FILTERS...] Filter models by name (e.g., 'anthropic/', 'sonnet')
223253

224254
Options:
225-
-c, --cached <CACHED> Number of cached input tokens read from cache [default: 0]
255+
-c, --cached <CACHED> Number of cached input tokens read from cache. Using this flag enables caching pricing rules.
226256
-t, --ttl <TTL> Cache TTL in minutes (affects pricing) [default: 5]
227257
-h, --help Print help
228258
```

src/main.rs

Lines changed: 15 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -61,9 +61,9 @@ enum Commands {
6161
output: u64,
6262
/// Filter models by name (e.g., 'anthropic/', 'sonnet')
6363
filters: Vec<String>,
64-
/// Number of cached input tokens read from cache (default: 0)
65-
#[arg(short, long, default_value = "0")]
66-
cached: u64,
64+
/// Number of cached input tokens read from cache
65+
#[arg(short, long)]
66+
cached: Option<u64>,
6767
/// Cache TTL in minutes (affects pricing for some models, default: 5)
6868
#[arg(short, long, default_value = "5")]
6969
ttl: u64,
@@ -536,35 +536,37 @@ async fn main() -> anyhow::Result<()> {
536536
total_cost: f64,
537537
}
538538

539+
let use_caching = cached.is_some();
540+
let cached_tokens = cached.unwrap_or(0);
539541
let mut calc_rows = Vec::new();
540542

541543
for (_, models_in_provider) in filtered {
542544
for model in models_in_provider {
543545
let input_price = parse_price(&model.pricing.prompt)?;
544546
let output_price = parse_price(&model.pricing.completion)?;
545547

546-
// cached = tokens read from cache
548+
// cached_tokens = tokens read from cache
547549
// new_tokens = tokens not in cache that need to be written to cache
548-
let new_tokens = input.saturating_sub(cached);
550+
let new_tokens = input.saturating_sub(cached_tokens);
549551

550552
let output_cost = (output as f64) * output_price;
551553

552554
let mut cache_read_cost = 0.0;
553555
let mut cache_write_cost = 0.0;
554556
let mut input_cost = 0.0;
555557

556-
if cached > 0 {
558+
if cached_tokens > 0 {
557559
// Cost for reading cached tokens
558560
if let Some(cache_read_price_str) = &model.pricing.input_cache_read {
559561
let cache_read_price = parse_price(cache_read_price_str)?;
560-
cache_read_cost = (cached as f64) * cache_read_price;
562+
cache_read_cost = (cached_tokens as f64) * cache_read_price;
561563
} else {
562-
cache_read_cost = (cached as f64) * input_price;
564+
cache_read_cost = (cached_tokens as f64) * input_price;
563565
}
564566
}
565567

566568
if new_tokens > 0 {
567-
if model.pricing.input_cache_write.is_some() {
569+
if use_caching && model.pricing.input_cache_write.is_some() {
568570
// Cost for writing new tokens to cache (replaces regular input cost for these tokens)
569571
let actual_write_price = match ttl {
570572
5 => input_price * 1.25, // 5-minute TTL is 1.25x base price
@@ -574,7 +576,7 @@ async fn main() -> anyhow::Result<()> {
574576
cache_write_cost = (new_tokens as f64) * actual_write_price;
575577
// Cache write cost replaces regular input cost for these tokens
576578
} else {
577-
// Regular input cost for tokens that can't be cached
579+
// Regular input cost for tokens (no caching or can't be cached)
578580
input_cost = (new_tokens as f64) * input_price;
579581
}
580582
}
@@ -652,13 +654,13 @@ async fn main() -> anyhow::Result<()> {
652654
.max(5);
653655

654656
// Print header with request details
655-
let cache_desc = if cached > 0 {
657+
let cache_desc = if use_caching && cached_tokens > 0 {
656658
let ttl_desc = match ttl {
657659
5 => "5m",
658660
60 => "1h",
659661
_ => unimplemented!("TTL must be exactly 5 or 60 minutes"),
660662
};
661-
format!(" ({} cached, {} TTL)", cached, ttl_desc)
663+
format!(" ({} cached, {} TTL)", cached_tokens, ttl_desc)
662664
} else {
663665
String::new()
664666
};
@@ -669,7 +671,7 @@ async fn main() -> anyhow::Result<()> {
669671
);
670672
println!();
671673

672-
if cached > 0 {
674+
if use_caching {
673675
println!("{:<width_model$} | {:<width_input$} | {:<width_output$} | {:<width_read$} | {:<width_write$} | {:<width_total$}",
674676
"Model", "Input", "Output", "Cache Read", "Cache Write", "Total",
675677
width_model = max_model_width,

0 commit comments

Comments
 (0)