Skip to content

Commit dd7887f

Browse files
committed
Switch tests back to using llmd vllm simulator
1 parent 772d499 commit dd7887f

File tree

2 files changed

+59
-13
lines changed

2 files changed

+59
-13
lines changed

config/manifests/vllm/sim-deployment.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ spec:
1414
spec:
1515
containers:
1616
- name: vllm-sim
17-
image: ghcr.io/llm-d/llm-d-inference-sim:v0.5.0
17+
image: ghcr.io/llm-d/llm-d-inference-sim:v0.6.0
1818
imagePullPolicy: Always
1919
args:
2020
- --model

test/e2e/epp/e2e_test.go

Lines changed: 58 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,6 @@ import (
3636
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
3737
"k8s.io/apimachinery/pkg/types"
3838
"sigs.k8s.io/controller-runtime/pkg/client"
39-
4039
v1 "sigs.k8s.io/gateway-api-inference-extension/api/v1"
4140
"sigs.k8s.io/gateway-api-inference-extension/apix/v1alpha2"
4241
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/metadata"
@@ -64,9 +63,29 @@ var _ = ginkgo.Describe("InferencePool", func() {
6463
return err
6564
}
6665

67-
deploy.Spec.Template.Spec.Containers[0].Image = "vllm-dynamic-backend:local" // TODO(ryanrosario): Change back to official image after testing
68-
deploy.Spec.Template.Spec.Containers[0].ImagePullPolicy = corev1.PullNever
69-
deploy.Spec.Template.Spec.Containers[0].Args = []string{strconv.Itoa(firstPort), strconv.Itoa(numPorts)}
66+
// Instead of hardcoding arguments, we can instead replace teh arguments that need
67+
// to be changed, preserving any others that may exist.
68+
var newArgs []string
69+
skipNext := false
70+
71+
for _, arg := range deploy.Spec.Template.Spec.Containers[0].Args {
72+
if skipNext {
73+
skipNext = false
74+
continue
75+
}
76+
// If this is one of the arguments we are updating, skip it AND its value
77+
if arg == "--port" || arg == "--data-parallel-size" {
78+
skipNext = true
79+
continue
80+
}
81+
newArgs = append(newArgs, arg)
82+
} // contains only the args we want to keep
83+
84+
// add new arguments
85+
newArgs = append(newArgs, "--port", strconv.Itoa(firstPort))
86+
newArgs = append(newArgs, "--data-parallel-size", strconv.Itoa(numPorts))
87+
88+
deploy.Spec.Template.Spec.Containers[0].Args = newArgs
7089
deploy.Spec.Template.Spec.Containers[0].Ports = buildContainerPorts(firstPort, numPorts)
7190
return testConfig.K8sClient.Update(testConfig.Context, deploy)
7291
}, testConfig.ExistsTimeout, testConfig.Interval).Should(gomega.Succeed())
@@ -305,7 +324,7 @@ func verifyTrafficRouting() {
305324
}
306325
}
307326
for _, p := range expectedPort {
308-
if strings.Contains(resp, fmt.Sprintf("x-backend-port: %d", p)) {
327+
if strings.Contains(resp, fmt.Sprintf("x-inference-port: %d", p)) {
309328
actualPort[p] = 0
310329
}
311330
}
@@ -347,25 +366,32 @@ func verifyMetrics() {
347366
"inference_objective_output_tokens",
348367
"inference_pool_average_kv_cache_utilization",
349368
"inference_pool_average_queue_size",
350-
"inference_pool_per_pod_queue_size",
369+
//"inference_pool_per_pod_queue_size",
351370
"inference_objective_running_requests",
352371
"inference_pool_ready_pods",
353372
"inference_extension_info",
354373
}
355374

375+
for i := range numPorts {
376+
expectedMetrics = append(expectedMetrics, fmt.Sprintf("inference_pool_pod_requests_total{port=\"%d\"}", firstPort+i))
377+
fmt.Printf("inference_pool_pod_requests_total{port=\"%d\"}", firstPort+i)
378+
}
379+
356380
// Generate traffic by sending requests through the inference extension.
357381
ginkgo.By("Generating traffic through the inference extension")
358382
curlCmd := getCurlCommand(envoyName, testConfig.NsName, envoyPort, modelName, curlTimeout, "/completions", "Write as if you were a critic: San Francisco", true)
359383

360384
// Run the curl command multiple times to generate some metrics data.
361-
for range 5 {
385+
// Use the coupon collector computation.
386+
batches := int(math.Ceil(numPorts * harmonicNumber(numPorts)))
387+
for range batches {
362388
_, err := testutils.ExecCommandInPod(testConfig, "curl", "curl", curlCmd)
363389
gomega.Expect(err).NotTo(gomega.HaveOccurred())
364390
}
365391

366392
// Modify the curl command to generate some error metrics.
367393
curlCmd[len(curlCmd)-1] = "invalid input"
368-
for range 5 {
394+
for range batches {
369395
_, err := testutils.ExecCommandInPod(testConfig, "curl", "curl", curlCmd)
370396
gomega.Expect(err).NotTo(gomega.HaveOccurred())
371397
}
@@ -389,7 +415,7 @@ func verifyMetrics() {
389415
ginkgo.By("Verifying that all expected metrics are present.")
390416
gomega.Eventually(func() error {
391417
// Execute the metrics scrape command inside the curl pod.
392-
fmt.Printf("pod IP: %s", podIP)
418+
// fmt.Printf("pod IP: %s", podIP)
393419
resp, err := testutils.ExecCommandInPod(testConfig, "curl", "curl", metricScrapeCmd)
394420
if err != nil {
395421
return err
@@ -456,7 +482,7 @@ func getMetricsScrapeCommand(podIP, token string) []string {
456482

457483
// getCurlCommand returns the command, as a slice of strings, for curl'ing
458484
// the test model server at the given name, namespace, port, and model name.
459-
// This command gets executed by a dummy pod that communites with Envoy
485+
// This command gets executed by a dummy pod that communicates with Envoy
460486
func getCurlCommand(name, ns, port, model string, timeout time.Duration, api string, promptOrMessages any, streaming bool) []string {
461487
body := map[string]any{
462488
"model": model,
@@ -478,6 +504,26 @@ func getCurlCommand(name, ns, port, model string, timeout time.Duration, api str
478504
}
479505
b, err := json.Marshal(body)
480506
gomega.Expect(err).NotTo(gomega.HaveOccurred())
507+
508+
fmt.Print([]string{
509+
"curl",
510+
"-i",
511+
"--max-time",
512+
strconv.Itoa((int)(timeout.Seconds())),
513+
fmt.Sprintf("%s.%s.svc:%s/v1%s", name, ns, port, api),
514+
"-H",
515+
"Content-Type: application/json",
516+
"-H",
517+
"Cache-Control: no-cache",
518+
"-H",
519+
fmt.Sprintf("%v: inferenceobjective-sample", metadata.ObjectiveKey),
520+
"-H",
521+
fmt.Sprintf("%v: %s", metadata.ModelNameRewriteKey, targetModelName),
522+
"-H",
523+
"Connection: close",
524+
"-d",
525+
string(b),
526+
})
481527
return []string{
482528
"curl",
483529
"-i",
@@ -538,7 +584,7 @@ func waitForDeploymentRollout(tc *testutils.TestConfig, deploy *appsv1.Deploymen
538584
}
539585

540586
if currentDeploy.Generation > currentDeploy.Status.ObservedGeneration {
541-
return fmt.Errorf("deployment generation not observed yet")
587+
return errors.New("deployment generation not observed yet")
542588
}
543589

544590
desiredReplicas := *currentDeploy.Spec.Replicas
@@ -579,7 +625,7 @@ func waitForDeploymentReady(tc *testutils.TestConfig, deploy *appsv1.Deployment)
579625
}
580626

581627
if current.Status.ReadyReplicas == 0 {
582-
return fmt.Errorf("no replicas are ready yet")
628+
return errors.New("no replicas are ready yet")
583629
}
584630

585631
return nil

0 commit comments

Comments
 (0)