Skip to content

Commit 7108536

Browse files
authored
rename inference_model metrics to inference_objective (#1567)
1 parent 1457f63 commit 7108536

17 files changed

+546
-550
lines changed

pkg/bbr/handlers/request_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -206,7 +206,7 @@ func TestHandleRequestBody(t *testing.T) {
206206
bbr_success_total{} 1
207207
`
208208

209-
if err := metricsutils.GatherAndCompare(crmetrics.Registry, strings.NewReader(wantMetrics), "inference_model_request_total"); err != nil {
209+
if err := metricsutils.GatherAndCompare(crmetrics.Registry, strings.NewReader(wantMetrics), "inference_objective_request_total"); err != nil {
210210
t.Error(err)
211211
}
212212
}

pkg/epp/metrics/metrics.go

Lines changed: 22 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -31,36 +31,36 @@ import (
3131
)
3232

3333
const (
34-
InferenceModelComponent = "inference_model"
35-
InferencePoolComponent = "inference_pool"
36-
InferenceExtension = "inference_extension"
34+
InferenceObjectiveComponent = "inference_objective"
35+
InferencePoolComponent = "inference_pool"
36+
InferenceExtension = "inference_extension"
3737
)
3838

3939
var (
40-
// Inference Model Metrics
40+
// Inference Objective Metrics
4141
requestCounter = prometheus.NewCounterVec(
4242
prometheus.CounterOpts{
43-
Subsystem: InferenceModelComponent,
43+
Subsystem: InferenceObjectiveComponent,
4444
Name: "request_total",
45-
Help: metricsutil.HelpMsgWithStability("Counter of inference model requests broken out for each model and target model.", compbasemetrics.ALPHA),
45+
Help: metricsutil.HelpMsgWithStability("Counter of inference objective requests broken out for each model and target model.", compbasemetrics.ALPHA),
4646
},
4747
[]string{"model_name", "target_model_name"},
4848
)
4949

5050
requestErrCounter = prometheus.NewCounterVec(
5151
prometheus.CounterOpts{
52-
Subsystem: InferenceModelComponent,
52+
Subsystem: InferenceObjectiveComponent,
5353
Name: "request_error_total",
54-
Help: metricsutil.HelpMsgWithStability("Counter of inference model requests errors broken out for each model and target model.", compbasemetrics.ALPHA),
54+
Help: metricsutil.HelpMsgWithStability("Counter of inference objective requests errors broken out for each model and target model.", compbasemetrics.ALPHA),
5555
},
5656
[]string{"model_name", "target_model_name", "error_code"},
5757
)
5858

5959
requestLatencies = prometheus.NewHistogramVec(
6060
prometheus.HistogramOpts{
61-
Subsystem: InferenceModelComponent,
61+
Subsystem: InferenceObjectiveComponent,
6262
Name: "request_duration_seconds",
63-
Help: metricsutil.HelpMsgWithStability("Inference model response latency distribution in seconds for each model and target model.", compbasemetrics.ALPHA),
63+
Help: metricsutil.HelpMsgWithStability("Inference objective response latency distribution in seconds for each model and target model.", compbasemetrics.ALPHA),
6464
Buckets: []float64{
6565
0.005, 0.025, 0.05, 0.1, 0.2, 0.4, 0.6, 0.8, 1.0, 1.25, 1.5, 2, 3,
6666
4, 5, 6, 8, 10, 15, 20, 30, 45, 60, 120, 180, 240, 300, 360, 480, 600, 900, 1200, 1800, 2700, 3600,
@@ -71,9 +71,9 @@ var (
7171

7272
requestSizes = prometheus.NewHistogramVec(
7373
prometheus.HistogramOpts{
74-
Subsystem: InferenceModelComponent,
74+
Subsystem: InferenceObjectiveComponent,
7575
Name: "request_sizes",
76-
Help: metricsutil.HelpMsgWithStability("Inference model requests size distribution in bytes for each model and target model.", compbasemetrics.ALPHA),
76+
Help: metricsutil.HelpMsgWithStability("Inference objective requests size distribution in bytes for each model and target model.", compbasemetrics.ALPHA),
7777
// Use buckets ranging from 1000 bytes (1KB) to 10^9 bytes (1GB).
7878
Buckets: []float64{
7979
64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768, 65536, // More fine-grained up to 64KB
@@ -86,9 +86,9 @@ var (
8686

8787
responseSizes = prometheus.NewHistogramVec(
8888
prometheus.HistogramOpts{
89-
Subsystem: InferenceModelComponent,
89+
Subsystem: InferenceObjectiveComponent,
9090
Name: "response_sizes",
91-
Help: metricsutil.HelpMsgWithStability("Inference model responses size distribution in bytes for each model and target model.", compbasemetrics.ALPHA),
91+
Help: metricsutil.HelpMsgWithStability("Inference objective responses size distribution in bytes for each model and target model.", compbasemetrics.ALPHA),
9292
// Most models have a response token < 8192 tokens. Each token, in average, has 4 characters.
9393
// 8192 * 4 = 32768.
9494
Buckets: []float64{1, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32778, 65536},
@@ -98,9 +98,9 @@ var (
9898

9999
inputTokens = prometheus.NewHistogramVec(
100100
prometheus.HistogramOpts{
101-
Subsystem: InferenceModelComponent,
101+
Subsystem: InferenceObjectiveComponent,
102102
Name: "input_tokens",
103-
Help: metricsutil.HelpMsgWithStability("Inference model input token count distribution for requests in each model.", compbasemetrics.ALPHA),
103+
Help: metricsutil.HelpMsgWithStability("Inference objective input token count distribution for requests in each model.", compbasemetrics.ALPHA),
104104
// Most models have a input context window less than 1 million tokens.
105105
Buckets: []float64{1, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32778, 65536, 131072, 262144, 524288, 1048576},
106106
},
@@ -109,9 +109,9 @@ var (
109109

110110
outputTokens = prometheus.NewHistogramVec(
111111
prometheus.HistogramOpts{
112-
Subsystem: InferenceModelComponent,
112+
Subsystem: InferenceObjectiveComponent,
113113
Name: "output_tokens",
114-
Help: metricsutil.HelpMsgWithStability("Inference model output token count distribution for requests in each model.", compbasemetrics.ALPHA),
114+
Help: metricsutil.HelpMsgWithStability("Inference objective output token count distribution for requests in each model.", compbasemetrics.ALPHA),
115115
// Most models generates output less than 8192 tokens.
116116
Buckets: []float64{1, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192},
117117
},
@@ -120,19 +120,19 @@ var (
120120

121121
runningRequests = prometheus.NewGaugeVec(
122122
prometheus.GaugeOpts{
123-
Subsystem: InferenceModelComponent,
123+
Subsystem: InferenceObjectiveComponent,
124124
Name: "running_requests",
125-
Help: metricsutil.HelpMsgWithStability("Inference model number of running requests in each model.", compbasemetrics.ALPHA),
125+
Help: metricsutil.HelpMsgWithStability("Inference objective number of running requests in each model.", compbasemetrics.ALPHA),
126126
},
127127
[]string{"model_name"},
128128
)
129129

130130
// NTPOT - Normalized Time Per Output Token
131131
NormalizedTimePerOutputToken = prometheus.NewHistogramVec(
132132
prometheus.HistogramOpts{
133-
Subsystem: InferenceModelComponent,
133+
Subsystem: InferenceObjectiveComponent,
134134
Name: "normalized_time_per_output_token_seconds",
135-
Help: metricsutil.HelpMsgWithStability("Inference model latency divided by number of output tokens in seconds for each model and target model.", compbasemetrics.ALPHA),
135+
Help: metricsutil.HelpMsgWithStability("Inference objective latency divided by number of output tokens in seconds for each model and target model.", compbasemetrics.ALPHA),
136136
// From few milliseconds per token to multiple seconds per token
137137
Buckets: []float64{
138138
0.001, 0.002, 0.005, 0.01, 0.02, 0.05, 0.1, 0.2, 0.5, 1.0, 2.0, 5.0, 10.0,

pkg/epp/metrics/metrics_test.go

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -30,15 +30,15 @@ import (
3030
)
3131

3232
const (
33-
RequestTotalMetric = InferenceModelComponent + "_request_total"
34-
RequestErrorTotalMetric = InferenceModelComponent + "_request_error_total"
35-
RequestLatenciesMetric = InferenceModelComponent + "_request_duration_seconds"
36-
RequestSizesMetric = InferenceModelComponent + "_request_sizes"
37-
ResponseSizesMetric = InferenceModelComponent + "_response_sizes"
38-
InputTokensMetric = InferenceModelComponent + "_input_tokens"
39-
OutputTokensMetric = InferenceModelComponent + "_output_tokens"
40-
NormalizedTimePerOutputTokenMetric = InferenceModelComponent + "_normalized_time_per_output_token_seconds"
41-
RunningRequestsMetric = InferenceModelComponent + "_running_requests"
33+
RequestTotalMetric = InferenceObjectiveComponent + "_request_total"
34+
RequestErrorTotalMetric = InferenceObjectiveComponent + "_request_error_total"
35+
RequestLatenciesMetric = InferenceObjectiveComponent + "_request_duration_seconds"
36+
RequestSizesMetric = InferenceObjectiveComponent + "_request_sizes"
37+
ResponseSizesMetric = InferenceObjectiveComponent + "_response_sizes"
38+
InputTokensMetric = InferenceObjectiveComponent + "_input_tokens"
39+
OutputTokensMetric = InferenceObjectiveComponent + "_output_tokens"
40+
NormalizedTimePerOutputTokenMetric = InferenceObjectiveComponent + "_normalized_time_per_output_token_seconds"
41+
RunningRequestsMetric = InferenceObjectiveComponent + "_running_requests"
4242
KVCacheAvgUsageMetric = InferencePoolComponent + "_average_kv_cache_utilization"
4343
QueueAvgSizeMetric = InferencePoolComponent + "_average_queue_size"
4444
PerPodQueueSizeMetrics = InferencePoolComponent + "_per_pod_queue_size"

0 commit comments

Comments
 (0)