@@ -36,7 +36,6 @@ import (
3636 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
3737 "k8s.io/apimachinery/pkg/types"
3838 "sigs.k8s.io/controller-runtime/pkg/client"
39-
4039 v1 "sigs.k8s.io/gateway-api-inference-extension/api/v1"
4140 "sigs.k8s.io/gateway-api-inference-extension/apix/v1alpha2"
4241 "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/metadata"
@@ -64,9 +63,29 @@ var _ = ginkgo.Describe("InferencePool", func() {
6463 return err
6564 }
6665
67- deploy .Spec .Template .Spec .Containers [0 ].Image = "vllm-dynamic-backend:local" // TODO(ryanrosario): Change back to official image after testing
68- deploy .Spec .Template .Spec .Containers [0 ].ImagePullPolicy = corev1 .PullNever
69- deploy .Spec .Template .Spec .Containers [0 ].Args = []string {strconv .Itoa (firstPort ), strconv .Itoa (numPorts )}
66+ // Instead of hardcoding arguments, we can instead replace teh arguments that need
67+ // to be changed, preserving any others that may exist.
68+ var newArgs []string
69+ skipNext := false
70+
71+ for _ , arg := range deploy .Spec .Template .Spec .Containers [0 ].Args {
72+ if skipNext {
73+ skipNext = false
74+ continue
75+ }
76+ // If this is one of the arguments we are updating, skip it AND its value
77+ if arg == "--port" || arg == "--data-parallel-size" {
78+ skipNext = true
79+ continue
80+ }
81+ newArgs = append (newArgs , arg )
82+ } // contains only the args we want to keep
83+
84+ // add new arguments
85+ newArgs = append (newArgs , "--port" , strconv .Itoa (firstPort ))
86+ newArgs = append (newArgs , "--data-parallel-size" , strconv .Itoa (numPorts ))
87+
88+ deploy .Spec .Template .Spec .Containers [0 ].Args = newArgs
7089 deploy .Spec .Template .Spec .Containers [0 ].Ports = buildContainerPorts (firstPort , numPorts )
7190 return testConfig .K8sClient .Update (testConfig .Context , deploy )
7291 }, testConfig .ExistsTimeout , testConfig .Interval ).Should (gomega .Succeed ())
@@ -305,7 +324,7 @@ func verifyTrafficRouting() {
305324 }
306325 }
307326 for _ , p := range expectedPort {
308- if strings .Contains (resp , fmt .Sprintf ("x-backend -port: %d" , p )) {
327+ if strings .Contains (resp , fmt .Sprintf ("x-inference -port: %d" , p )) {
309328 actualPort [p ] = 0
310329 }
311330 }
@@ -347,25 +366,32 @@ func verifyMetrics() {
347366 "inference_objective_output_tokens" ,
348367 "inference_pool_average_kv_cache_utilization" ,
349368 "inference_pool_average_queue_size" ,
350- "inference_pool_per_pod_queue_size" ,
369+ // "inference_pool_per_pod_queue_size",
351370 "inference_objective_running_requests" ,
352371 "inference_pool_ready_pods" ,
353372 "inference_extension_info" ,
354373 }
355374
375+ for i := range numPorts {
376+ expectedMetrics = append (expectedMetrics , fmt .Sprintf ("inference_pool_pod_requests_total{port=\" %d\" }" , firstPort + i ))
377+ fmt .Printf ("inference_pool_pod_requests_total{port=\" %d\" }" , firstPort + i )
378+ }
379+
356380 // Generate traffic by sending requests through the inference extension.
357381 ginkgo .By ("Generating traffic through the inference extension" )
358382 curlCmd := getCurlCommand (envoyName , testConfig .NsName , envoyPort , modelName , curlTimeout , "/completions" , "Write as if you were a critic: San Francisco" , true )
359383
360384 // Run the curl command multiple times to generate some metrics data.
361- for range 5 {
385+ // Use the coupon collector computation.
386+ batches := int (math .Ceil (numPorts * harmonicNumber (numPorts )))
387+ for range batches {
362388 _ , err := testutils .ExecCommandInPod (testConfig , "curl" , "curl" , curlCmd )
363389 gomega .Expect (err ).NotTo (gomega .HaveOccurred ())
364390 }
365391
366392 // Modify the curl command to generate some error metrics.
367393 curlCmd [len (curlCmd )- 1 ] = "invalid input"
368- for range 5 {
394+ for range batches {
369395 _ , err := testutils .ExecCommandInPod (testConfig , "curl" , "curl" , curlCmd )
370396 gomega .Expect (err ).NotTo (gomega .HaveOccurred ())
371397 }
@@ -389,7 +415,7 @@ func verifyMetrics() {
389415 ginkgo .By ("Verifying that all expected metrics are present." )
390416 gomega .Eventually (func () error {
391417 // Execute the metrics scrape command inside the curl pod.
392- fmt .Printf ("pod IP: %s" , podIP )
418+ // fmt.Printf("pod IP: %s", podIP)
393419 resp , err := testutils .ExecCommandInPod (testConfig , "curl" , "curl" , metricScrapeCmd )
394420 if err != nil {
395421 return err
@@ -456,7 +482,7 @@ func getMetricsScrapeCommand(podIP, token string) []string {
456482
457483// getCurlCommand returns the command, as a slice of strings, for curl'ing
458484// the test model server at the given name, namespace, port, and model name.
459- // This command gets executed by a dummy pod that communites with Envoy
485+ // This command gets executed by a dummy pod that communicates with Envoy
460486func getCurlCommand (name , ns , port , model string , timeout time.Duration , api string , promptOrMessages any , streaming bool ) []string {
461487 body := map [string ]any {
462488 "model" : model ,
@@ -478,6 +504,26 @@ func getCurlCommand(name, ns, port, model string, timeout time.Duration, api str
478504 }
479505 b , err := json .Marshal (body )
480506 gomega .Expect (err ).NotTo (gomega .HaveOccurred ())
507+
508+ fmt .Print ([]string {
509+ "curl" ,
510+ "-i" ,
511+ "--max-time" ,
512+ strconv .Itoa ((int )(timeout .Seconds ())),
513+ fmt .Sprintf ("%s.%s.svc:%s/v1%s" , name , ns , port , api ),
514+ "-H" ,
515+ "Content-Type: application/json" ,
516+ "-H" ,
517+ "Cache-Control: no-cache" ,
518+ "-H" ,
519+ fmt .Sprintf ("%v: inferenceobjective-sample" , metadata .ObjectiveKey ),
520+ "-H" ,
521+ fmt .Sprintf ("%v: %s" , metadata .ModelNameRewriteKey , targetModelName ),
522+ "-H" ,
523+ "Connection: close" ,
524+ "-d" ,
525+ string (b ),
526+ })
481527 return []string {
482528 "curl" ,
483529 "-i" ,
@@ -538,7 +584,7 @@ func waitForDeploymentRollout(tc *testutils.TestConfig, deploy *appsv1.Deploymen
538584 }
539585
540586 if currentDeploy .Generation > currentDeploy .Status .ObservedGeneration {
541- return fmt . Errorf ("deployment generation not observed yet" )
587+ return errors . New ("deployment generation not observed yet" )
542588 }
543589
544590 desiredReplicas := * currentDeploy .Spec .Replicas
@@ -579,7 +625,7 @@ func waitForDeploymentReady(tc *testutils.TestConfig, deploy *appsv1.Deployment)
579625 }
580626
581627 if current .Status .ReadyReplicas == 0 {
582- return fmt . Errorf ("no replicas are ready yet" )
628+ return errors . New ("no replicas are ready yet" )
583629 }
584630
585631 return nil
0 commit comments