@@ -24,15 +24,20 @@ import (
2424 "errors"
2525 "fmt"
2626 "io"
27+ "net"
28+ "net/http"
2729 "os"
2830 "path/filepath"
31+ "strconv"
32+ "strings"
2933 "testing"
3034 "time"
3135
3236 configPb "github.com/envoyproxy/go-control-plane/envoy/config/core/v3"
3337 extProcPb "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3"
3438 envoyTypePb "github.com/envoyproxy/go-control-plane/envoy/type/v3"
3539 "github.com/google/go-cmp/cmp"
40+ "github.com/prometheus/client_golang/prometheus/promhttp"
3641 "github.com/stretchr/testify/assert"
3742 "google.golang.org/grpc"
3843 "google.golang.org/grpc/credentials/insecure"
@@ -43,12 +48,16 @@ import (
4348 utilruntime "k8s.io/apimachinery/pkg/util/runtime"
4449 k8syaml "k8s.io/apimachinery/pkg/util/yaml"
4550 clientgoscheme "k8s.io/client-go/kubernetes/scheme"
51+ "k8s.io/component-base/metrics/legacyregistry"
52+ metricsutils "k8s.io/component-base/metrics/testutil"
4653 ctrl "sigs.k8s.io/controller-runtime"
4754 k8sclient "sigs.k8s.io/controller-runtime/pkg/client"
4855 "sigs.k8s.io/controller-runtime/pkg/envtest"
56+ "sigs.k8s.io/controller-runtime/pkg/manager"
4957 "sigs.k8s.io/gateway-api-inference-extension/api/v1alpha2"
5058 "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend"
5159 "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore"
60+ "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/metrics"
5261 runserver "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/server"
5362 extprocutils "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/test"
5463 logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging"
@@ -57,7 +66,8 @@ import (
5766)
5867
5968const (
60- port = runserver .DefaultGrpcPort
69+ port = runserver .DefaultGrpcPort
70+ metricsPort = 8888
6171)
6272
6373var (
@@ -76,6 +86,7 @@ func TestKubeInferenceModelRequest(t *testing.T) {
7686 wantHeaders []* configPb.HeaderValueOption
7787 wantMetadata * structpb.Struct
7888 wantBody []byte
89+ wantMetrics string
7990 wantErr bool
8091 immediateResponse * extProcPb.ImmediateResponse
8192 }{
@@ -113,7 +124,12 @@ func TestKubeInferenceModelRequest(t *testing.T) {
113124 },
114125 wantMetadata : makeMetadata ("address-1:8000" ),
115126 wantBody : []byte ("{\" max_tokens\" :100,\" model\" :\" my-model-12345\" ,\" prompt\" :\" test1\" ,\" temperature\" :0}" ),
116- wantErr : false ,
127+ wantMetrics : `
128+ # HELP inference_model_request_total [ALPHA] Counter of inference model requests broken out for each model and target model.
129+ # TYPE inference_model_request_total counter
130+ inference_model_request_total{model_name="my-model",target_model_name="my-model-12345"} 1
131+ ` ,
132+ wantErr : false ,
117133 },
118134 {
119135 name : "select active lora, low queue" ,
@@ -161,7 +177,12 @@ func TestKubeInferenceModelRequest(t *testing.T) {
161177 },
162178 wantMetadata : makeMetadata ("address-1:8000" ),
163179 wantBody : []byte ("{\" max_tokens\" :100,\" model\" :\" sql-lora-1fdg2\" ,\" prompt\" :\" test2\" ,\" temperature\" :0}" ),
164- wantErr : false ,
180+ wantMetrics : `
181+ # HELP inference_model_request_total [ALPHA] Counter of inference model requests broken out for each model and target model.
182+ # TYPE inference_model_request_total counter
183+ inference_model_request_total{model_name="sql-lora",target_model_name="sql-lora-1fdg2"} 1
184+ ` ,
185+ wantErr : false ,
165186 },
166187 {
167188 name : "select no lora despite active model, avoid excessive queue size" ,
@@ -210,7 +231,12 @@ func TestKubeInferenceModelRequest(t *testing.T) {
210231 },
211232 wantMetadata : makeMetadata ("address-2:8000" ),
212233 wantBody : []byte ("{\" max_tokens\" :100,\" model\" :\" sql-lora-1fdg2\" ,\" prompt\" :\" test3\" ,\" temperature\" :0}" ),
213- wantErr : false ,
234+ wantMetrics : `
235+ # HELP inference_model_request_total [ALPHA] Counter of inference model requests broken out for each model and target model.
236+ # TYPE inference_model_request_total counter
237+ inference_model_request_total{model_name="sql-lora",target_model_name="sql-lora-1fdg2"} 1
238+ ` ,
239+ wantErr : false ,
214240 },
215241 {
216242 name : "noncritical and all models past threshold, shed request" ,
@@ -253,6 +279,7 @@ func TestKubeInferenceModelRequest(t *testing.T) {
253279 Code : envoyTypePb .StatusCode_TooManyRequests ,
254280 },
255281 },
282+ wantMetrics : "" ,
256283 },
257284 {
258285 name : "noncritical, but one server has capacity, do not shed" ,
@@ -301,7 +328,12 @@ func TestKubeInferenceModelRequest(t *testing.T) {
301328 },
302329 wantMetadata : makeMetadata ("address-0:8000" ),
303330 wantBody : []byte ("{\" max_tokens\" :100,\" model\" :\" sql-lora-1fdg3\" ,\" prompt\" :\" test5\" ,\" temperature\" :0}" ),
304- wantErr : false ,
331+ wantMetrics : `
332+ # HELP inference_model_request_total [ALPHA] Counter of inference model requests broken out for each model and target model.
333+ # TYPE inference_model_request_total counter
334+ inference_model_request_total{model_name="sql-lora-sheddable",target_model_name="sql-lora-1fdg3"} 1
335+ ` ,
336+ wantErr : false ,
305337 },
306338 }
307339
@@ -345,6 +377,14 @@ func TestKubeInferenceModelRequest(t *testing.T) {
345377 if diff := cmp .Diff (want , res , protocmp .Transform ()); diff != "" {
346378 t .Errorf ("Unexpected response, (-want +got): %v" , diff )
347379 }
380+
381+ if test .wantMetrics != "" {
382+ if err := metricsutils .GatherAndCompare (legacyregistry .DefaultGatherer , strings .NewReader (test .wantMetrics ), "inference_model_request_total" ); err != nil {
383+ t .Error (err )
384+ }
385+ }
386+
387+ legacyregistry .Reset ()
348388 })
349389 }
350390}
@@ -424,6 +464,10 @@ func BeforeSuit(t *testing.T) func() {
424464 logutil .Fatal (logger , err , "Failed to create controller manager" )
425465 }
426466
467+ if err := registerMetricsHandler (mgr , metricsPort ); err != nil {
468+ logutil .Fatal (logger , err , "Failed to register metrics handler" )
469+ }
470+
427471 serverRunner = runserver .NewDefaultExtProcServerRunner ()
428472 // Adjust from defaults
429473 serverRunner .PoolName = "vllm-llama2-7b-pool"
@@ -544,3 +588,31 @@ func makeMetadata(endpoint string) *structpb.Struct {
544588 },
545589 }
546590}
591+
592+ // registerMetricsHandler is a simplified version of metrics endpoint handler
593+ // without Authentication for integration tests.
594+ func registerMetricsHandler (mgr manager.Manager , port int ) error {
595+ metrics .Register ()
596+
597+ // Init HTTP server.
598+ h := promhttp .HandlerFor (
599+ legacyregistry .DefaultGatherer ,
600+ promhttp.HandlerOpts {},
601+ )
602+
603+ mux := http .NewServeMux ()
604+ mux .Handle ("/metrics" , h )
605+
606+ srv := & http.Server {
607+ Addr : net .JoinHostPort ("" , strconv .Itoa (port )),
608+ Handler : mux ,
609+ }
610+
611+ if err := mgr .Add (& manager.Server {
612+ Name : "metrics" ,
613+ Server : srv ,
614+ }); err != nil {
615+ return err
616+ }
617+ return nil
618+ }
0 commit comments