Skip to content

Commit 53d4ad4

Browse files
committed
update testcase
Signed-off-by: samzong <[email protected]>
1 parent 1ee7db2 commit 53d4ad4

File tree

6 files changed

+236
-145
lines changed

6 files changed

+236
-145
lines changed

e2e/testcases/llmd_auto_routing.go

Lines changed: 15 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,8 @@
11
package testcases
22

33
import (
4-
"bytes"
54
"context"
6-
"encoding/json"
75
"fmt"
8-
"io"
9-
"net/http"
106
"time"
117

128
pkgtestcases "github.com/vllm-project/semantic-router/e2e/pkg/testcases"
@@ -28,53 +24,30 @@ func llmdAutoRouting(ctx context.Context, client *kubernetes.Clientset, opts pkg
2824
}
2925
defer stop()
3026

31-
type caseItem struct {
27+
cases := []struct {
3228
prompt string
33-
want string
34-
}
35-
cases := []caseItem{
36-
{"What is 2+2?", "math"},
37-
{"Explain TCP three-way handshake", "computer science"},
29+
model string
30+
}{
31+
{prompt: "What is 2+2?", model: "phi4-mini"},
32+
{prompt: "Explain TCP three-way handshake", model: "llama3-8b"},
3833
}
34+
3935
for _, c := range cases {
40-
category, err := classifyOnce(ctx, localPort, c.prompt)
36+
res, err := doLLMDChat(ctx, localPort, "auto", c.prompt, 45*time.Second)
4137
if err != nil {
4238
return err
4339
}
44-
if category != c.want {
45-
return fmt.Errorf("expected category %s got %s", c.want, category)
40+
selected := getSelectedModel(res.headers)
41+
pod := getInferencePod(res.headers)
42+
if selected != c.model {
43+
return fmt.Errorf("prompt '%s' expected model %s got %s", c.prompt, c.model, selected)
44+
}
45+
if pod == "" {
46+
return fmt.Errorf("missing x-inference-pod for prompt '%s'", c.prompt)
4647
}
4748
}
4849
if opts.SetDetails != nil {
49-
opts.SetDetails(map[string]interface{}{
50-
"cases": len(cases),
51-
})
50+
opts.SetDetails(map[string]interface{}{"cases": len(cases)})
5251
}
5352
return nil
5453
}
55-
56-
func classifyOnce(ctx context.Context, port, prompt string) (string, error) {
57-
body := map[string]interface{}{
58-
"model": "auto",
59-
"messages": []map[string]string{
60-
{"role": "user", "content": prompt},
61-
},
62-
}
63-
data, _ := json.Marshal(body)
64-
req, err := http.NewRequestWithContext(ctx, "POST", fmt.Sprintf("http://localhost:%s/v1/chat/completions", port), bytes.NewBuffer(data))
65-
if err != nil {
66-
return "", err
67-
}
68-
req.Header.Set("Content-Type", "application/json")
69-
client := &http.Client{Timeout: 30 * time.Second}
70-
resp, err := client.Do(req)
71-
if err != nil {
72-
return "", err
73-
}
74-
defer resp.Body.Close()
75-
b, _ := io.ReadAll(resp.Body)
76-
if resp.StatusCode != http.StatusOK {
77-
return "", fmt.Errorf("request failed: %d %s", resp.StatusCode, string(b))
78-
}
79-
return resp.Header.Get("x-vsr-selected-category"), nil
80-
}

e2e/testcases/llmd_distributed_inference.go

Lines changed: 51 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@ package testcases
33
import (
44
"context"
55
"fmt"
6+
"sync"
7+
"time"
68

79
pkgtestcases "github.com/vllm-project/semantic-router/e2e/pkg/testcases"
810
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@@ -18,17 +20,15 @@ func init() {
1820
}
1921

2022
func llmdDistributed(ctx context.Context, client *kubernetes.Clientset, opts pkgtestcases.TestCaseOptions) error {
21-
counts := map[string]int{}
22-
names := []string{"vllm-llama3-8b-instruct", "phi4-mini"}
23-
for _, name := range names {
23+
backendDeploys := []string{"vllm-llama3-8b-instruct", "phi4-mini"}
24+
for _, name := range backendDeploys {
2425
dep, err := client.AppsV1().Deployments("default").Get(ctx, name, metav1.GetOptions{})
2526
if err != nil {
2627
return err
2728
}
2829
if dep.Status.ReadyReplicas < 2 {
2930
return fmt.Errorf("%s ready replicas %d < 2", name, dep.Status.ReadyReplicas)
3031
}
31-
counts[name] = int(dep.Status.ReadyReplicas)
3232
}
3333

3434
localPort, stop, err := setupServiceConnection(ctx, client, opts)
@@ -37,21 +37,60 @@ func llmdDistributed(ctx context.Context, client *kubernetes.Clientset, opts pkg
3737
}
3838
defer stop()
3939

40-
success := 0
41-
total := 10
40+
const total = 30
41+
var (
42+
success int
43+
mu sync.Mutex
44+
podHits = map[string]int{}
45+
)
46+
var wg sync.WaitGroup
47+
4248
for i := 0; i < total; i++ {
43-
if err := simpleChat(ctx, localPort, "llama3-8b", fmt.Sprintf("req-%d", i)); err == nil {
49+
i := i
50+
wg.Add(1)
51+
go func() {
52+
defer wg.Done()
53+
res, err := doLLMDChat(ctx, localPort, "llama3-8b", fmt.Sprintf("req-%d", i), 45*time.Second)
54+
if err != nil {
55+
return
56+
}
57+
pod := getInferencePod(res.headers)
58+
if pod == "" {
59+
return
60+
}
61+
mu.Lock()
4462
success++
63+
podHits[pod]++
64+
mu.Unlock()
65+
}()
66+
}
67+
68+
wg.Wait()
69+
70+
successRate := float64(success) / float64(total)
71+
if successRate < 0.98 {
72+
return fmt.Errorf("success rate %.2f below 0.98", successRate)
73+
}
74+
if len(podHits) < 2 {
75+
return fmt.Errorf("expected hits on >=2 pods, got %d", len(podHits))
76+
}
77+
var max, min int
78+
for _, c := range podHits {
79+
if c > max {
80+
max = c
81+
}
82+
if min == 0 || c < min {
83+
min = c
4584
}
4685
}
47-
if success != total {
48-
return fmt.Errorf("distributed calls success %d/%d", success, total)
86+
if min == 0 || float64(max)/float64(min) > 2.0 {
87+
return fmt.Errorf("pod hit imbalance max/min=%d/%d", max, min)
4988
}
5089
if opts.SetDetails != nil {
5190
opts.SetDetails(map[string]interface{}{
52-
"ready_llama": counts["vllm-llama3-8b-instruct"],
53-
"ready_phi4": counts["phi4-mini"],
54-
"requests": total,
91+
"success_rate": successRate,
92+
"total": total,
93+
"pod_hits": podHits,
5594
})
5695
}
5796
return nil

e2e/testcases/llmd_failover_recovery.go

Lines changed: 37 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -23,39 +23,66 @@ func llmdFailover(ctx context.Context, client *kubernetes.Clientset, opts pkgtes
2323
if err != nil {
2424
return err
2525
}
26-
if len(pods.Items) < 1 {
27-
return fmt.Errorf("no phi4-mini pods")
26+
if len(pods.Items) < 2 {
27+
return fmt.Errorf("need >=2 phi4-mini pods for failover, got %d", len(pods.Items))
2828
}
2929
target := pods.Items[0].Name
3030
if err := client.CoreV1().Pods("default").Delete(ctx, target, metav1.DeleteOptions{}); err != nil {
3131
return err
3232
}
33+
deleteTime := time.Now()
3334

34-
time.Sleep(10 * time.Second)
35+
time.Sleep(5 * time.Second)
3536

3637
localPort, stop, err := setupServiceConnection(ctx, client, opts)
3738
if err != nil {
3839
return err
3940
}
4041
defer stop()
4142

42-
total := 20
43+
deadline := time.Now().Add(60 * time.Second)
44+
total := 0
4345
success := 0
44-
for i := 0; i < total; i++ {
45-
if err := simpleChat(ctx, localPort, "phi4-mini", fmt.Sprintf("failover-%d", i)); err == nil {
46+
podHits := map[string]int{}
47+
var recoveredAt time.Time
48+
49+
for time.Now().Before(deadline) {
50+
total++
51+
res, err := doLLMDChat(ctx, localPort, "phi4-mini", fmt.Sprintf("failover-%d", total), 45*time.Second)
52+
if err == nil {
4653
success++
54+
pod := getInferencePod(res.headers)
55+
if pod == target {
56+
return fmt.Errorf("traffic routed to deleted pod %s", target)
57+
}
58+
if pod != "" {
59+
podHits[pod]++
60+
}
61+
if recoveredAt.IsZero() {
62+
recoveredAt = time.Now()
63+
}
4764
}
48-
time.Sleep(2 * time.Second)
65+
time.Sleep(1 * time.Second)
4966
}
5067
rate := float64(success) / float64(total)
5168
if rate < 0.95 {
5269
return fmt.Errorf("success rate %.2f below 0.95", rate)
5370
}
71+
if len(podHits) == 0 {
72+
return fmt.Errorf("no successful hits after deletion")
73+
}
74+
recoverySeconds := time.Since(deleteTime).Seconds()
75+
if !recoveredAt.IsZero() {
76+
recoverySeconds = recoveredAt.Sub(deleteTime).Seconds()
77+
}
5478
if opts.SetDetails != nil {
5579
opts.SetDetails(map[string]interface{}{
56-
"deleted_pod": target,
57-
"success": success,
58-
"total": total,
80+
"deleted_pod": target,
81+
"success": success,
82+
"total": total,
83+
"success_rate": rate,
84+
"pod_hits": podHits,
85+
"recovery_seconds": recoverySeconds,
5986
})
6087
}
6188
return nil

e2e/testcases/llmd_health_check.go

Lines changed: 30 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,13 @@
11
package testcases
22

33
import (
4-
"bytes"
54
"context"
6-
"encoding/json"
75
"fmt"
8-
"io"
9-
"net/http"
106
"time"
117

128
pkgtestcases "github.com/vllm-project/semantic-router/e2e/pkg/testcases"
139
"k8s.io/apimachinery/pkg/apis/meta/v1"
10+
"k8s.io/apimachinery/pkg/util/wait"
1411
"k8s.io/client-go/kubernetes"
1512
)
1613

@@ -24,6 +21,16 @@ func init() {
2421

2522
func llmdHealth(ctx context.Context, client *kubernetes.Clientset, opts pkgtestcases.TestCaseOptions) error {
2623
ns := "default"
24+
crdGVs := []string{
25+
"gateway.networking.k8s.io/v1",
26+
"inference.networking.k8s.io/v1",
27+
}
28+
for _, gv := range crdGVs {
29+
if err := ensureGroupVersion(ctx, client, gv); err != nil {
30+
return err
31+
}
32+
}
33+
2734
required := []struct {
2835
ns string
2936
dep string
@@ -36,7 +43,7 @@ func llmdHealth(ctx context.Context, client *kubernetes.Clientset, opts pkgtestc
3643
{ns, "phi4-mini"},
3744
}
3845
for _, r := range required {
39-
if err := waitDeploymentReady(ctx, client, r.ns, r.dep); err != nil {
46+
if err := waitDeploymentReady(ctx, client, r.ns, r.dep, 1); err != nil {
4047
return err
4148
}
4249
}
@@ -47,51 +54,37 @@ func llmdHealth(ctx context.Context, client *kubernetes.Clientset, opts pkgtestc
4754
}
4855
defer stop()
4956

50-
if err := simpleChat(ctx, localPort, "llama3-8b", "ping"); err != nil {
57+
res, err := doLLMDChat(ctx, localPort, "llama3-8b", "ping", 30*time.Second)
58+
if err != nil {
5159
return err
5260
}
5361

5462
if opts.SetDetails != nil {
5563
opts.SetDetails(map[string]interface{}{
56-
"deployments": len(required),
64+
"deployments": len(required),
65+
"crds": crdGVs,
66+
"latency_ms": res.duration.Milliseconds(),
67+
"x-selected-model": getSelectedModel(res.headers),
68+
"x-inference-pod": getInferencePod(res.headers),
5769
})
5870
}
5971
return nil
6072
}
6173

62-
func waitDeploymentReady(ctx context.Context, client *kubernetes.Clientset, ns, name string) error {
63-
dep, err := client.AppsV1().Deployments(ns).Get(ctx, name, v1.GetOptions{})
64-
if err != nil {
65-
return err
66-
}
67-
if dep.Status.ReadyReplicas < 1 {
68-
return fmt.Errorf("deployment %s/%s not ready", ns, name)
69-
}
70-
return nil
74+
func waitDeploymentReady(ctx context.Context, client *kubernetes.Clientset, ns, name string, want int32) error {
75+
return wait.PollUntilContextTimeout(ctx, 2*time.Second, 5*time.Minute, true, func(ctx context.Context) (bool, error) {
76+
dep, err := client.AppsV1().Deployments(ns).Get(ctx, name, v1.GetOptions{})
77+
if err != nil {
78+
return false, err
79+
}
80+
return dep.Status.ReadyReplicas >= want, nil
81+
})
7182
}
7283

73-
func simpleChat(ctx context.Context, port, model, content string) error {
74-
body := map[string]interface{}{
75-
"model": model,
76-
"messages": []map[string]string{
77-
{"role": "user", "content": content},
78-
},
79-
}
80-
data, _ := json.Marshal(body)
81-
req, err := http.NewRequestWithContext(ctx, "POST", fmt.Sprintf("http://localhost:%s/v1/chat/completions", port), bytes.NewBuffer(data))
84+
func ensureGroupVersion(ctx context.Context, client *kubernetes.Clientset, gv string) error {
85+
_, err := client.Discovery().ServerResourcesForGroupVersion(gv)
8286
if err != nil {
83-
return err
84-
}
85-
req.Header.Set("Content-Type", "application/json")
86-
client := &http.Client{Timeout: 30 * time.Second}
87-
resp, err := client.Do(req)
88-
if err != nil {
89-
return err
90-
}
91-
defer resp.Body.Close()
92-
b, _ := io.ReadAll(resp.Body)
93-
if resp.StatusCode != http.StatusOK {
94-
return fmt.Errorf("chat failed: %d %s", resp.StatusCode, string(b))
87+
return fmt.Errorf("CRD groupVersion %s not present: %w", gv, err)
9588
}
9689
return nil
9790
}

0 commit comments

Comments
 (0)