Skip to content

Commit cff5a2e

Browse files
JeffLuooBenjaminBraunDev
authored andcommitted
rename inference_model metrics to inference_objective (kubernetes-sigs#1567)
1 parent 0b402ca commit cff5a2e

17 files changed

+546
-550
lines changed

pkg/bbr/handlers/request_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -206,7 +206,7 @@ func TestHandleRequestBody(t *testing.T) {
206206
bbr_success_total{} 1
207207
`
208208

209-
if err := metricsutils.GatherAndCompare(crmetrics.Registry, strings.NewReader(wantMetrics), "inference_model_request_total"); err != nil {
209+
if err := metricsutils.GatherAndCompare(crmetrics.Registry, strings.NewReader(wantMetrics), "inference_objective_request_total"); err != nil {
210210
t.Error(err)
211211
}
212212
}

pkg/epp/metrics/metrics.go

Lines changed: 22 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -31,27 +31,27 @@ import (
3131
)
3232

3333
const (
34-
InferenceModelComponent = "inference_model"
35-
InferencePoolComponent = "inference_pool"
36-
InferenceExtension = "inference_extension"
34+
InferenceObjectiveComponent = "inference_objective"
35+
InferencePoolComponent = "inference_pool"
36+
InferenceExtension = "inference_extension"
3737
)
3838

3939
var (
40-
// Inference Model Metrics
40+
// Inference Objective Metrics
4141
requestCounter = prometheus.NewCounterVec(
4242
prometheus.CounterOpts{
43-
Subsystem: InferenceModelComponent,
43+
Subsystem: InferenceObjectiveComponent,
4444
Name: "request_total",
45-
Help: metricsutil.HelpMsgWithStability("Counter of inference model requests broken out for each model and target model.", compbasemetrics.ALPHA),
45+
Help: metricsutil.HelpMsgWithStability("Counter of inference objective requests broken out for each model and target model.", compbasemetrics.ALPHA),
4646
},
4747
[]string{"model_name", "target_model_name"},
4848
)
4949

5050
requestErrCounter = prometheus.NewCounterVec(
5151
prometheus.CounterOpts{
52-
Subsystem: InferenceModelComponent,
52+
Subsystem: InferenceObjectiveComponent,
5353
Name: "request_error_total",
54-
Help: metricsutil.HelpMsgWithStability("Counter of inference model requests errors broken out for each model and target model.", compbasemetrics.ALPHA),
54+
Help: metricsutil.HelpMsgWithStability("Counter of inference objective requests errors broken out for each model and target model.", compbasemetrics.ALPHA),
5555
},
5656
[]string{"model_name", "target_model_name", "error_code"},
5757
)
@@ -245,9 +245,9 @@ var (
245245

246246
requestLatencies = prometheus.NewHistogramVec(
247247
prometheus.HistogramOpts{
248-
Subsystem: InferenceModelComponent,
248+
Subsystem: InferenceObjectiveComponent,
249249
Name: "request_duration_seconds",
250-
Help: metricsutil.HelpMsgWithStability("Inference model response latency distribution in seconds for each model and target model.", compbasemetrics.ALPHA),
250+
Help: metricsutil.HelpMsgWithStability("Inference objective response latency distribution in seconds for each model and target model.", compbasemetrics.ALPHA),
251251
Buckets: []float64{
252252
0.005, 0.025, 0.05, 0.1, 0.2, 0.4, 0.6, 0.8, 1.0, 1.25, 1.5, 2, 3,
253253
4, 5, 6, 8, 10, 15, 20, 30, 45, 60, 120, 180, 240, 300, 360, 480, 600, 900, 1200, 1800, 2700, 3600,
@@ -258,9 +258,9 @@ var (
258258

259259
requestSizes = prometheus.NewHistogramVec(
260260
prometheus.HistogramOpts{
261-
Subsystem: InferenceModelComponent,
261+
Subsystem: InferenceObjectiveComponent,
262262
Name: "request_sizes",
263-
Help: metricsutil.HelpMsgWithStability("Inference model requests size distribution in bytes for each model and target model.", compbasemetrics.ALPHA),
263+
Help: metricsutil.HelpMsgWithStability("Inference objective requests size distribution in bytes for each model and target model.", compbasemetrics.ALPHA),
264264
// Use buckets ranging from 1000 bytes (1KB) to 10^9 bytes (1GB).
265265
Buckets: []float64{
266266
64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768, 65536, // More fine-grained up to 64KB
@@ -273,9 +273,9 @@ var (
273273

274274
responseSizes = prometheus.NewHistogramVec(
275275
prometheus.HistogramOpts{
276-
Subsystem: InferenceModelComponent,
276+
Subsystem: InferenceObjectiveComponent,
277277
Name: "response_sizes",
278-
Help: metricsutil.HelpMsgWithStability("Inference model responses size distribution in bytes for each model and target model.", compbasemetrics.ALPHA),
278+
Help: metricsutil.HelpMsgWithStability("Inference objective responses size distribution in bytes for each model and target model.", compbasemetrics.ALPHA),
279279
// Most models have a response token < 8192 tokens. Each token, in average, has 4 characters.
280280
// 8192 * 4 = 32768.
281281
Buckets: []float64{1, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32778, 65536},
@@ -285,9 +285,9 @@ var (
285285

286286
inputTokens = prometheus.NewHistogramVec(
287287
prometheus.HistogramOpts{
288-
Subsystem: InferenceModelComponent,
288+
Subsystem: InferenceObjectiveComponent,
289289
Name: "input_tokens",
290-
Help: metricsutil.HelpMsgWithStability("Inference model input token count distribution for requests in each model.", compbasemetrics.ALPHA),
290+
Help: metricsutil.HelpMsgWithStability("Inference objective input token count distribution for requests in each model.", compbasemetrics.ALPHA),
291291
// Most models have a input context window less than 1 million tokens.
292292
Buckets: []float64{1, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32778, 65536, 131072, 262144, 524288, 1048576},
293293
},
@@ -296,9 +296,9 @@ var (
296296

297297
outputTokens = prometheus.NewHistogramVec(
298298
prometheus.HistogramOpts{
299-
Subsystem: InferenceModelComponent,
299+
Subsystem: InferenceObjectiveComponent,
300300
Name: "output_tokens",
301-
Help: metricsutil.HelpMsgWithStability("Inference model output token count distribution for requests in each model.", compbasemetrics.ALPHA),
301+
Help: metricsutil.HelpMsgWithStability("Inference objective output token count distribution for requests in each model.", compbasemetrics.ALPHA),
302302
// Most models generates output less than 8192 tokens.
303303
Buckets: []float64{1, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192},
304304
},
@@ -307,19 +307,19 @@ var (
307307

308308
runningRequests = prometheus.NewGaugeVec(
309309
prometheus.GaugeOpts{
310-
Subsystem: InferenceModelComponent,
310+
Subsystem: InferenceObjectiveComponent,
311311
Name: "running_requests",
312-
Help: metricsutil.HelpMsgWithStability("Inference model number of running requests in each model.", compbasemetrics.ALPHA),
312+
Help: metricsutil.HelpMsgWithStability("Inference objective number of running requests in each model.", compbasemetrics.ALPHA),
313313
},
314314
[]string{"model_name"},
315315
)
316316

317317
// NTPOT - Normalized Time Per Output Token
318318
NormalizedTimePerOutputToken = prometheus.NewHistogramVec(
319319
prometheus.HistogramOpts{
320-
Subsystem: InferenceModelComponent,
320+
Subsystem: InferenceObjectiveComponent,
321321
Name: "normalized_time_per_output_token_seconds",
322-
Help: metricsutil.HelpMsgWithStability("Inference model latency divided by number of output tokens in seconds for each model and target model.", compbasemetrics.ALPHA),
322+
Help: metricsutil.HelpMsgWithStability("Inference objective latency divided by number of output tokens in seconds for each model and target model.", compbasemetrics.ALPHA),
323323
// From few milliseconds per token to multiple seconds per token
324324
Buckets: []float64{
325325
0.001, 0.002, 0.005, 0.01, 0.02, 0.05, 0.1, 0.2, 0.5, 1.0, 2.0, 5.0, 10.0,

pkg/epp/metrics/metrics_test.go

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -30,15 +30,15 @@ import (
3030
)
3131

3232
const (
33-
RequestTotalMetric = InferenceModelComponent + "_request_total"
34-
RequestErrorTotalMetric = InferenceModelComponent + "_request_error_total"
35-
RequestLatenciesMetric = InferenceModelComponent + "_request_duration_seconds"
36-
RequestSizesMetric = InferenceModelComponent + "_request_sizes"
37-
ResponseSizesMetric = InferenceModelComponent + "_response_sizes"
38-
InputTokensMetric = InferenceModelComponent + "_input_tokens"
39-
OutputTokensMetric = InferenceModelComponent + "_output_tokens"
40-
NormalizedTimePerOutputTokenMetric = InferenceModelComponent + "_normalized_time_per_output_token_seconds"
41-
RunningRequestsMetric = InferenceModelComponent + "_running_requests"
33+
RequestTotalMetric = InferenceObjectiveComponent + "_request_total"
34+
RequestErrorTotalMetric = InferenceObjectiveComponent + "_request_error_total"
35+
RequestLatenciesMetric = InferenceObjectiveComponent + "_request_duration_seconds"
36+
RequestSizesMetric = InferenceObjectiveComponent + "_request_sizes"
37+
ResponseSizesMetric = InferenceObjectiveComponent + "_response_sizes"
38+
InputTokensMetric = InferenceObjectiveComponent + "_input_tokens"
39+
OutputTokensMetric = InferenceObjectiveComponent + "_output_tokens"
40+
NormalizedTimePerOutputTokenMetric = InferenceObjectiveComponent + "_normalized_time_per_output_token_seconds"
41+
RunningRequestsMetric = InferenceObjectiveComponent + "_running_requests"
4242
KVCacheAvgUsageMetric = InferencePoolComponent + "_average_kv_cache_utilization"
4343
QueueAvgSizeMetric = InferencePoolComponent + "_average_queue_size"
4444
PerPodQueueSizeMetrics = InferencePoolComponent + "_per_pod_queue_size"

0 commit comments

Comments
 (0)