Skip to content

Commit 73fd82a

Browse files
pricing: add currency field
Signed-off-by: Jintao Zhang <[email protected]>
1 parent 449ed43 commit 73fd82a

File tree

6 files changed

+76
-48
lines changed

6 files changed

+76
-48
lines changed

config/config.yaml

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -56,8 +56,9 @@ model_config:
5656
batch_size: 512.0 # vLLM default batch size
5757
context_size: 16384.0 # based on https://huggingface.co/microsoft/phi-4
5858
pricing:
59-
prompt_usd_per_1m: 200.0
60-
completion_usd_per_1m: 600.0
59+
currency: USD
60+
prompt_per_1m: 0.07
61+
completion_per_1m: 0.35
6162
pii_policy:
6263
allow_by_default: false # Deny all PII by default
6364
pii_types_allowed: ["EMAIL_ADDRESS", "PERSON", "GPE", "PHONE_NUMBER"] # Only allow these specific PII types
@@ -68,8 +69,9 @@ model_config:
6869
batch_size: 512.0
6970
context_size: 16384.0
7071
pricing:
71-
prompt_usd_per_1m: 500.0
72-
completion_usd_per_1m: 1500.0
72+
currency: USD
73+
prompt_per_1m: 0.067
74+
completion_per_1m: 0.267
7375
pii_policy:
7476
allow_by_default: false # Deny all PII by default
7577
pii_types_allowed: ["EMAIL_ADDRESS", "PERSON", "GPE", "PHONE_NUMBER"] # Only allow these specific PII types
@@ -79,8 +81,9 @@ model_config:
7981
batch_size: 512.0
8082
context_size: 16384.0
8183
pricing:
82-
prompt_usd_per_1m: 300.0
83-
completion_usd_per_1m: 900.0
84+
currency: USD
85+
prompt_per_1m: 0.1
86+
completion_per_1m: 0.3
8487
pii_policy:
8588
allow_by_default: false # Deny all PII by default
8689
pii_types_allowed: ["EMAIL_ADDRESS", "PERSON", "GPE", "PHONE_NUMBER"] # Only allow these specific PII types

src/semantic-router/pkg/config/config.go

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -193,10 +193,12 @@ type VLLMEndpoint struct {
193193

194194
// ModelParams represents configuration for model-specific parameters
195195
type ModelPricing struct {
196-
// Price in USD per 1M prompt tokens
197-
PromptUSDPer1M float64 `yaml:"prompt_usd_per_1m,omitempty"`
198-
// Price in USD per 1M completion tokens
199-
CompletionUSDPer1M float64 `yaml:"completion_usd_per_1m,omitempty"`
196+
// ISO currency code for the pricing (e.g., "USD"). Defaults to "USD" when omitted.
197+
Currency string `yaml:"currency,omitempty"`
198+
199+
// Price per 1M tokens (unit: <currency>/1_000_000 tokens)
200+
PromptPer1M float64 `yaml:"prompt_per_1m,omitempty"`
201+
CompletionPer1M float64 `yaml:"completion_per_1m,omitempty"`
200202
}
201203

202204
type ModelParams struct {
@@ -374,15 +376,20 @@ func (c *RouterConfig) GetModelContextSize(modelName string, defaultValue float6
374376
return defaultValue
375377
}
376378

377-
// GetModelPricing returns pricing in USD per 1M tokens for prompt and completion.
378-
func (c *RouterConfig) GetModelPricing(modelName string) (promptUSDPer1M float64, completionUSDPer1M float64, ok bool) {
379+
// GetModelPricing returns pricing per 1M tokens and its currency for the given model.
380+
// The currency indicates the unit of the returned rates (e.g., "USD").
381+
func (c *RouterConfig) GetModelPricing(modelName string) (promptPer1M float64, completionPer1M float64, currency string, ok bool) {
379382
if modelConfig, okc := c.ModelConfig[modelName]; okc {
380383
p := modelConfig.Pricing
381-
if p.PromptUSDPer1M != 0 || p.CompletionUSDPer1M != 0 {
382-
return p.PromptUSDPer1M, p.CompletionUSDPer1M, true
384+
if p.PromptPer1M != 0 || p.CompletionPer1M != 0 {
385+
cur := p.Currency
386+
if cur == "" {
387+
cur = "USD"
388+
}
389+
return p.PromptPer1M, p.CompletionPer1M, cur, true
383390
}
384391
}
385-
return 0, 0, false
392+
return 0, 0, "", false
386393
}
387394

388395
// GetModelPIIPolicy returns the PII policy for a given model

src/semantic-router/pkg/extproc/response_handler.go

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -56,18 +56,22 @@ func (r *OpenAIRouter) handleResponseBody(v *ext_proc.ProcessingRequest_Response
5656

5757
// Compute and record cost if pricing is configured
5858
if r.Config != nil {
59-
promptRatePer1M, completionRatePer1M, ok := r.Config.GetModelPricing(ctx.RequestModel)
59+
promptRatePer1M, completionRatePer1M, currency, ok := r.Config.GetModelPricing(ctx.RequestModel)
6060
if ok {
61-
costUSD := (float64(promptTokens)*promptRatePer1M + float64(completionTokens)*completionRatePer1M) / 1_000_000.0
62-
metrics.RecordModelCostUSD(ctx.RequestModel, costUSD)
61+
costAmount := (float64(promptTokens)*promptRatePer1M + float64(completionTokens)*completionRatePer1M) / 1_000_000.0
62+
if currency == "" {
63+
currency = "USD"
64+
}
65+
metrics.RecordModelCost(ctx.RequestModel, currency, costAmount)
6366
observability.LogEvent("llm_usage", map[string]interface{}{
6467
"request_id": ctx.RequestID,
6568
"model": ctx.RequestModel,
6669
"prompt_tokens": promptTokens,
6770
"completion_tokens": completionTokens,
6871
"total_tokens": promptTokens + completionTokens,
6972
"completion_latency_ms": completionLatency.Milliseconds(),
70-
"cost_usd": costUSD,
73+
"cost": costAmount,
74+
"currency": currency,
7175
})
7276
} else {
7377
observability.LogEvent("llm_usage", map[string]interface{}{
@@ -77,7 +81,8 @@ func (r *OpenAIRouter) handleResponseBody(v *ext_proc.ProcessingRequest_Response
7781
"completion_tokens": completionTokens,
7882
"total_tokens": promptTokens + completionTokens,
7983
"completion_latency_ms": completionLatency.Milliseconds(),
80-
"cost_usd": 0.0,
84+
"cost": 0.0,
85+
"currency": "unknown",
8186
"pricing": "not_configured",
8287
})
8388
}

src/semantic-router/pkg/metrics/metrics.go

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -102,13 +102,13 @@ var (
102102
[]string{"model"},
103103
)
104104

105-
// ModelCostUSD tracks the total USD cost attributed to each model
106-
ModelCostUSD = promauto.NewCounterVec(
105+
// ModelCost tracks the total cost attributed to each model by currency
106+
ModelCost = promauto.NewCounterVec(
107107
prometheus.CounterOpts{
108-
Name: "llm_model_cost_usd_total",
109-
Help: "The total USD cost attributed to each LLM model",
108+
Name: "llm_model_cost_total",
109+
Help: "The total cost attributed to each LLM model, labeled by currency",
110110
},
111-
[]string{"model"},
111+
[]string{"model", "currency"},
112112
)
113113

114114
// ModelTokens tracks the number of tokens used by each model
@@ -256,12 +256,15 @@ func RecordModelTokens(model string, tokens float64) {
256256
ModelTokens.WithLabelValues(model).Add(tokens)
257257
}
258258

259-
// RecordModelCostUSD adds the dollar cost attributed to a specific model
260-
func RecordModelCostUSD(model string, usd float64) {
261-
if usd < 0 {
259+
// RecordModelCost records the cost attributed to a specific model with a currency label
260+
func RecordModelCost(model string, currency string, amount float64) {
261+
if amount < 0 {
262262
return
263263
}
264-
ModelCostUSD.WithLabelValues(model).Add(usd)
264+
if currency == "" {
265+
currency = "USD"
266+
}
267+
ModelCost.WithLabelValues(model, currency).Add(amount)
265268
}
266269

267270
// RecordRoutingReasonCode increments the counter for a routing decision reason code and model

website/docs/api/router.md

Lines changed: 18 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -222,10 +222,11 @@ sum by (family, effort) (
222222

223223
The router exposes additional metrics for cost accounting and routing decisions.
224224

225-
- `llm_model_cost_usd_total{model}`
226-
- Description: Total accumulated USD cost attributed to each model (computed from token usage and per-1M pricing).
225+
- `llm_model_cost_total{model, currency}`
226+
- Description: Total accumulated cost attributed to each model (computed from token usage and per-1M pricing), labeled by currency.
227227
- Labels:
228228
- model: model name used for the request
229+
- currency: currency code (e.g., "USD")
229230

230231
- `llm_routing_reason_codes_total{reason_code, model}`
231232
- Description: Count of routing decisions by reason code and selected model.
@@ -236,8 +237,11 @@ The router exposes additional metrics for cost accounting and routing decisions.
236237
Example PromQL:
237238

238239
```prometheus
239-
# Cost by model over the last hour
240-
sum by (model) (increase(llm_model_cost_usd_total[1h]))
240+
# Cost by model and currency over the last hour
241+
sum by (model, currency) (increase(llm_model_cost_total[1h]))
242+
243+
# Or, if you only use USD, a common query is:
244+
sum by (model) (increase(llm_model_cost_total{currency="USD"}[1h]))
241245
242246
# Routing decisions by reason code over the last 15 minutes
243247
sum by (reason_code) (increase(llm_routing_reason_codes_total[15m]))
@@ -251,21 +255,24 @@ Provide per-1M pricing for your models so the router can compute request cost an
251255
model_config:
252256
phi4:
253257
pricing:
254-
prompt_usd_per_1m: 200.0
255-
completion_usd_per_1m: 600.0
258+
currency: USD
259+
prompt_per_1m: 0.07
260+
completion_per_1m: 0.35
256261
"mistral-small3.1":
257262
pricing:
258-
prompt_usd_per_1m: 300.0
259-
completion_usd_per_1m: 900.0
263+
currency: USD
264+
prompt_per_1m: 0.1
265+
completion_per_1m: 0.3
260266
gemma3:27b:
261267
pricing:
262-
prompt_usd_per_1m: 500.0
263-
completion_usd_per_1m: 1500.0
268+
currency: USD
269+
prompt_per_1m: 0.067
270+
completion_per_1m: 0.267
264271
```
265272
266273
Notes:
267274
- Pricing is optional; if omitted, cost is treated as 0 and only token metrics are emitted.
268-
- Cost is computed as: (prompt_tokens * prompt_usd_per_1m + completion_tokens * completion_usd_per_1m) / 1_000_000.
275+
- Cost is computed as: (prompt_tokens * prompt_per_1m + completion_tokens * completion_per_1m) / 1_000_000 (in the configured currency).
269276
270277
## gRPC ExtProc API
271278

website/docs/getting-started/configuration.md

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -126,25 +126,28 @@ model_config:
126126
127127
### Pricing (Optional)
128128
129-
If you want the router to compute USD cost per request and expose Prometheus cost metrics, add per-1M token pricing under each model in `model_config`.
129+
If you want the router to compute request cost and expose Prometheus cost metrics, add per-1M token pricing and currency under each model in `model_config`.
130130

131131
```yaml
132132
model_config:
133133
phi4:
134134
pricing:
135-
prompt_usd_per_1m: 0.07
136-
completion_usd_per_1m: 0.35
135+
currency: USD
136+
prompt_per_1m: 0.07
137+
completion_per_1m: 0.35
137138
"mistral-small3.1":
138139
pricing:
139-
prompt_usd_per_1m: 0.1
140-
completion_usd_per_1m: 0.3
140+
currency: USD
141+
prompt_per_1m: 0.1
142+
completion_per_1m: 0.3
141143
gemma3:27b:
142144
pricing:
143-
prompt_usd_per_1m: 0.067
144-
completion_usd_per_1m: 0.267
145+
currency: USD
146+
prompt_per_1m: 0.067
147+
completion_per_1m: 0.267
145148
```
146149

147-
- Cost formula: `(prompt_tokens * prompt_usd_per_1m + completion_tokens * completion_usd_per_1m) / 1_000_000`.
150+
- Cost formula: `(prompt_tokens * prompt_per_1m + completion_tokens * completion_per_1m) / 1_000_000` (in the given currency).
148151
- When not configured, the router still reports token and latency metrics; cost is treated as 0.
149152

150153
### Classification Models

0 commit comments

Comments
 (0)