You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Copy file name to clipboardExpand all lines: pkg/epp/metrics/metrics.go
+22-22Lines changed: 22 additions & 22 deletions
Original file line number
Diff line number
Diff line change
@@ -31,36 +31,36 @@ import (
31
31
)
32
32
33
33
const (
34
-
InferenceModelComponent="inference_model"
35
-
InferencePoolComponent="inference_pool"
36
-
InferenceExtension="inference_extension"
34
+
InferenceObjectiveComponent="inference_objective"
35
+
InferencePoolComponent="inference_pool"
36
+
InferenceExtension="inference_extension"
37
37
)
38
38
39
39
var (
40
-
// Inference Model Metrics
40
+
// Inference Objective Metrics
41
41
requestCounter=prometheus.NewCounterVec(
42
42
prometheus.CounterOpts{
43
-
Subsystem: InferenceModelComponent,
43
+
Subsystem: InferenceObjectiveComponent,
44
44
Name: "request_total",
45
-
Help: metricsutil.HelpMsgWithStability("Counter of inference model requests broken out for each model and target model.", compbasemetrics.ALPHA),
45
+
Help: metricsutil.HelpMsgWithStability("Counter of inference objective requests broken out for each model and target model.", compbasemetrics.ALPHA),
46
46
},
47
47
[]string{"model_name", "target_model_name"},
48
48
)
49
49
50
50
requestErrCounter=prometheus.NewCounterVec(
51
51
prometheus.CounterOpts{
52
-
Subsystem: InferenceModelComponent,
52
+
Subsystem: InferenceObjectiveComponent,
53
53
Name: "request_error_total",
54
-
Help: metricsutil.HelpMsgWithStability("Counter of inference model requests errors broken out for each model and target model.", compbasemetrics.ALPHA),
54
+
Help: metricsutil.HelpMsgWithStability("Counter of inference objective requests errors broken out for each model and target model.", compbasemetrics.ALPHA),
Help: metricsutil.HelpMsgWithStability("Inference model response latency distribution in seconds for each model and target model.", compbasemetrics.ALPHA),
63
+
Help: metricsutil.HelpMsgWithStability("Inference objective response latency distribution in seconds for each model and target model.", compbasemetrics.ALPHA),
Help: metricsutil.HelpMsgWithStability("Inference model requests size distribution in bytes for each model and target model.", compbasemetrics.ALPHA),
76
+
Help: metricsutil.HelpMsgWithStability("Inference objective requests size distribution in bytes for each model and target model.", compbasemetrics.ALPHA),
77
77
// Use buckets ranging from 1000 bytes (1KB) to 10^9 bytes (1GB).
78
78
Buckets: []float64{
79
79
64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768, 65536, // More fine-grained up to 64KB
@@ -86,9 +86,9 @@ var (
86
86
87
87
responseSizes=prometheus.NewHistogramVec(
88
88
prometheus.HistogramOpts{
89
-
Subsystem: InferenceModelComponent,
89
+
Subsystem: InferenceObjectiveComponent,
90
90
Name: "response_sizes",
91
-
Help: metricsutil.HelpMsgWithStability("Inference model responses size distribution in bytes for each model and target model.", compbasemetrics.ALPHA),
91
+
Help: metricsutil.HelpMsgWithStability("Inference objective responses size distribution in bytes for each model and target model.", compbasemetrics.ALPHA),
92
92
// Most models have a response token < 8192 tokens. Each token, in average, has 4 characters.
Help: metricsutil.HelpMsgWithStability("Inference model latency divided by number of output tokens in seconds for each model and target model.", compbasemetrics.ALPHA),
135
+
Help: metricsutil.HelpMsgWithStability("Inference objective latency divided by number of output tokens in seconds for each model and target model.", compbasemetrics.ALPHA),
136
136
// From few milliseconds per token to multiple seconds per token
0 commit comments