feat: add LLM-D profile for E2E testing framework (#705)

samzong · web-flow · commit 37e594159fd7 · 2025-11-26T21:08:42.000+08:00
diff --git a/.github/workflows/integration-test-k8s.yml b/.github/workflows/integration-test-k8s.yml
@@ -18,11 +18,11 @@ on:
 jobs:
   integration-test:
     runs-on: ubuntu-latest
-    timeout-minutes: 60
+    timeout-minutes: 75
     strategy:
       fail-fast: false  # Continue testing other profiles even if one fails
       matrix:
-        profile: [ai-gateway, aibrix, routing-strategies]
+        profile: [ai-gateway, aibrix, routing-strategies, llm-d]
 
     steps:
       - name: Check out the repo
@@ -165,4 +165,3 @@ jobs:
         if: always()
         run: |
           make e2e-cleanup || true
-
diff --git a/e2e/README.md b/e2e/README.md
@@ -16,7 +16,7 @@ The framework follows a **separation of concerns** design:
 - **aibrix**: Tests Semantic Router with vLLM AIBrix integration
 - **istio**: Tests Semantic Router with Istio Gateway (future)
 - **production-stack**: Tests vLLM Production Stack configurations (future)
-- **llm-d**: Tests with LLM-D (future)
+- **llm-d**: Tests Semantic Router with LLM-D distributed inference
 - **dynamo**: Tests with Nvidia Dynamo (future)
 
 ## Directory Structure
diff --git a/e2e/cmd/e2e/main.go b/e2e/cmd/e2e/main.go
@@ -12,11 +12,13 @@ import (
 	aigateway "github.com/vllm-project/semantic-router/e2e/profiles/ai-gateway"
 	aibrix "github.com/vllm-project/semantic-router/e2e/profiles/aibrix"
 	dynamicconfig "github.com/vllm-project/semantic-router/e2e/profiles/dynamic-config"
+	llmd "github.com/vllm-project/semantic-router/e2e/profiles/llm-d"
 	routingstrategies "github.com/vllm-project/semantic-router/e2e/profiles/routing-strategies"
 
 	// Import profiles to register test cases
 	_ "github.com/vllm-project/semantic-router/e2e/profiles/ai-gateway"
 	_ "github.com/vllm-project/semantic-router/e2e/profiles/aibrix"
+	_ "github.com/vllm-project/semantic-router/e2e/profiles/llm-d"
 	_ "github.com/vllm-project/semantic-router/e2e/profiles/routing-strategies"
 )
 
@@ -105,6 +107,8 @@ func getProfile(name string) (framework.Profile, error) {
 		return dynamicconfig.NewProfile(), nil
 	case "aibrix":
 		return aibrix.NewProfile(), nil
+	case "llm-d":
+		return llmd.NewProfile(), nil
 	case "routing-strategies":
 		return routingstrategies.NewProfile(), nil
 	// Add more profiles here as they are implemented
diff --git a/e2e/profiles/llm-d/manifests/httproute-services.yaml b/e2e/profiles/llm-d/manifests/httproute-services.yaml
@@ -0,0 +1,51 @@
+apiVersion: gateway.networking.k8s.io/v1
+kind: HTTPRoute
+metadata:
+  name: vsr-llama8b-svc
+  namespace: default
+spec:
+  parentRefs:
+    - group: gateway.networking.k8s.io
+      kind: Gateway
+      name: inference-gateway
+  rules:
+    - backendRefs:
+        - group: inference.networking.k8s.io
+          kind: InferencePool
+          name: vllm-llama3-8b-instruct
+      matches:
+        - path:
+            type: PathPrefix
+            value: /
+          headers:
+            - type: Exact
+              name: x-selected-model
+              value: llama3-8b
+      timeouts:
+        request: 300s
+---
+apiVersion: gateway.networking.k8s.io/v1
+kind: HTTPRoute
+metadata:
+  name: vsr-phi4-mini-svc
+  namespace: default
+spec:
+  parentRefs:
+    - group: gateway.networking.k8s.io
+      kind: Gateway
+      name: inference-gateway
+  rules:
+    - backendRefs:
+        - group: inference.networking.k8s.io
+          kind: InferencePool
+          name: vllm-phi4-mini
+      matches:
+        - path:
+            type: PathPrefix
+            value: /
+          headers:
+            - type: Exact
+              name: x-selected-model
+              value: phi4-mini
+      timeouts:
+        request: 300s
diff --git a/e2e/profiles/llm-d/manifests/inference-sim.yaml b/e2e/profiles/llm-d/manifests/inference-sim.yaml
@@ -0,0 +1,101 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: vllm-llama3-8b-instruct
+  namespace: default
+spec:
+  replicas: 2
+  selector:
+    matchLabels:
+      app: vllm-llama3-8b-instruct
+  template:
+    metadata:
+      labels:
+        app: vllm-llama3-8b-instruct
+    spec:
+      containers:
+        - name: sim
+          image: ghcr.io/llm-d/llm-d-inference-sim:v0.6.1
+          env:
+            - name: POD_NAME
+              valueFrom:
+                fieldRef:
+                  fieldPath: metadata.name
+            - name: POD_NAMESPACE
+              valueFrom:
+                fieldRef:
+                  fieldPath: metadata.namespace
+          args:
+            - --model
+            - llama3-8b
+            - --port
+            - "8000"
+          ports:
+            - containerPort: 8000
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: vllm-llama3-8b-instruct
+  namespace: default
+  labels:
+    app: vllm-llama3-8b-instruct
+spec:
+  type: ClusterIP
+  selector:
+    app: vllm-llama3-8b-instruct
+  ports:
+    - port: 8000
+      targetPort: 8000
+      protocol: TCP
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: phi4-mini
+  namespace: default
+spec:
+  replicas: 2
+  selector:
+    matchLabels:
+      app: phi4-mini
+  template:
+    metadata:
+      labels:
+        app: phi4-mini
+    spec:
+      containers:
+        - name: sim
+          image: ghcr.io/llm-d/llm-d-inference-sim:v0.6.1
+          env:
+            - name: POD_NAME
+              valueFrom:
+                fieldRef:
+                  fieldPath: metadata.name
+            - name: POD_NAMESPACE
+              valueFrom:
+                fieldRef:
+                  fieldPath: metadata.namespace
+          args:
+            - --model
+            - phi4-mini
+            - --port
+            - "8000"
+          ports:
+            - containerPort: 8000
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: phi4-mini
+  namespace: default
+  labels:
+    app: phi4-mini
+spec:
+  type: ClusterIP
+  selector:
+    app: phi4-mini
+  ports:
+    - port: 8000
+      targetPort: 8000
+      protocol: TCP
diff --git a/e2e/profiles/llm-d/manifests/rbac.yaml b/e2e/profiles/llm-d/manifests/rbac.yaml
@@ -0,0 +1,27 @@
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+  name: llmd-epp-access
+rules:
+  - apiGroups: ["inference.networking.k8s.io", "inference.networking.x-k8s.io"]
+    resources: ["inferencepools", "inferenceobjectives"]
+    verbs: ["get", "list", "watch"]
+  - apiGroups: [""]
+    resources: ["pods"]
+    verbs: ["get", "list", "watch"]
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRoleBinding
+metadata:
+  name: llmd-epp-access-binding
+subjects:
+  - kind: ServiceAccount
+    name: vllm-llama3-8b-instruct-epp
+    namespace: default
+  - kind: ServiceAccount
+    name: vllm-phi4-mini-epp
+    namespace: default
+roleRef:
+  apiGroup: rbac.authorization.k8s.io
+  kind: ClusterRole
+  name: llmd-epp-access
diff --git a/e2e/profiles/llm-d/profile.go b/e2e/profiles/llm-d/profile.go
diff --git a/e2e/profiles/llm-d/values.yaml b/e2e/profiles/llm-d/values.yaml
diff --git a/tools/make/e2e.mk b/tools/make/e2e.mk