vllm-project · samzong · Nov 20, 2025 · Nov 21, 2025 · Nov 21, 2025 · Nov 21, 2025
@@ -12,11 +12,11 @@ on:
 jobs:
   integration-test:
     runs-on: ubuntu-latest
-    timeout-minutes: 60
+    timeout-minutes: 75
     strategy:
       fail-fast: false  # Continue testing other profiles even if one fails
       matrix:
-        profile: [ai-gateway, aibrix]
+        profile: [ai-gateway, aibrix, llm-d]
 
     steps:
       - name: Check out the repo
@@ -159,4 +159,3 @@ jobs:
         if: always()
         run: |
           make e2e-cleanup || true
-
@@ -16,7 +16,7 @@ The framework follows a **separation of concerns** design:
 - **aibrix**: Tests Semantic Router with vLLM AIBrix integration
 - **istio**: Tests Semantic Router with Istio Gateway (future)
 - **production-stack**: Tests vLLM Production Stack configurations (future)
-- **llm-d**: Tests with LLM-D (future)
+- **llm-d**: Tests Semantic Router with LLM-D distributed inference
 - **dynamo**: Tests with Nvidia Dynamo (future)
 
 ## Directory Structure

@@ -12,10 +12,12 @@ import (
 	aigateway "github.com/vllm-project/semantic-router/e2e/profiles/ai-gateway"
 	aibrix "github.com/vllm-project/semantic-router/e2e/profiles/aibrix"
 	dynamicconfig "github.com/vllm-project/semantic-router/e2e/profiles/dynamic-config"
+	llmd "github.com/vllm-project/semantic-router/e2e/profiles/llm-d"
 
 	// Import profiles to register test cases
 	_ "github.com/vllm-project/semantic-router/e2e/profiles/ai-gateway"
 	_ "github.com/vllm-project/semantic-router/e2e/profiles/aibrix"
+	_ "github.com/vllm-project/semantic-router/e2e/profiles/llm-d"
 )
 
 const version = "v1.0.0"
@@ -103,6 +105,8 @@ func getProfile(name string) (framework.Profile, error) {
 		return dynamicconfig.NewProfile(), nil
 	case "aibrix":
 		return aibrix.NewProfile(), nil
+	case "llm-d":
+		return llmd.NewProfile(), nil
 	// Add more profiles here as they are implemented
 	// case "istio":
 	//     return istio.NewProfile(), nil

@@ -0,0 +1,49 @@
+apiVersion: gateway.networking.k8s.io/v1
+kind: HTTPRoute
+metadata:
+  name: vsr-llama8b-svc
+  namespace: default
+spec:
+  parentRefs:
+    - group: gateway.networking.k8s.io
+      kind: Gateway
+      name: inference-gateway
+  rules:
+    - backendRefs:
+        - name: vllm-llama3-8b-instruct
+          port: 8000
+      matches:
+        - path:
+            type: PathPrefix
+            value: /
+          headers:
+            - type: Exact
+              name: x-selected-model
+              value: llama3-8b
+      timeouts:
+        request: 300s
+---
+apiVersion: gateway.networking.k8s.io/v1
+kind: HTTPRoute
+metadata:
+  name: vsr-phi4-mini-svc
+  namespace: default
+spec:
+  parentRefs:
+    - group: gateway.networking.k8s.io
+      kind: Gateway
+      name: inference-gateway
+  rules:
+    - backendRefs:
+        - name: phi4-mini
+          port: 8000
+      matches:
+        - path:
+            type: PathPrefix
+            value: /
+          headers:
+            - type: Exact
+              name: x-selected-model
+              value: phi4-mini
+      timeouts:
+        request: 300s
@@ -0,0 +1,83 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: vllm-llama3-8b-instruct
+  namespace: default
+spec:
+  replicas: 2
+  selector:
+    matchLabels:
+      app: vllm-llama3-8b-instruct
+  template:
+    metadata:
+      labels:
+        app: vllm-llama3-8b-instruct
+    spec:
+      containers:
+        - name: sim
+          image: ghcr.io/llm-d/llm-d-inference-sim:v0.6.1
+          args:
+            - --model
+            - llama3-8b
+            - --port
+            - "8000"
+          ports:
+            - containerPort: 8000
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: vllm-llama3-8b-instruct
+  namespace: default
+  labels:
+    app: vllm-llama3-8b-instruct
+spec:
+  type: ClusterIP
+  selector:
+    app: vllm-llama3-8b-instruct
+  ports:
+    - port: 8000
+      targetPort: 8000
+      protocol: TCP
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: phi4-mini
+  namespace: default
+spec:
+  replicas: 2
+  selector:
+    matchLabels:
+      app: phi4-mini
+  template:
+    metadata:
+      labels:
+        app: phi4-mini
+    spec:
+      containers:
+        - name: sim
+          image: ghcr.io/llm-d/llm-d-inference-sim:v0.6.1
+          args:
+            - --model
+            - phi4-mini
+            - --port
+            - "8000"
+          ports:
+            - containerPort: 8000
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: phi4-mini
+  namespace: default
+  labels:
+    app: phi4-mini
+spec:
+  type: ClusterIP
+  selector:
+    app: phi4-mini
+  ports:
+    - port: 8000
+      targetPort: 8000
+      protocol: TCP
@@ -0,0 +1,27 @@
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+  name: llmd-epp-access
+rules:
+  - apiGroups: ["inference.networking.k8s.io", "inference.networking.x-k8s.io"]
+    resources: ["inferencepools", "inferenceobjectives"]
+    verbs: ["get", "list", "watch"]
+  - apiGroups: [""]
+    resources: ["pods"]
+    verbs: ["get", "list", "watch"]
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRoleBinding
+metadata:
+  name: llmd-epp-access-binding
+subjects:
+  - kind: ServiceAccount
+    name: vllm-llama3-8b-instruct-epp
+    namespace: default
+  - kind: ServiceAccount
+    name: vllm-phi4-mini-epp
+    namespace: default
+roleRef:
+  apiGroup: rbac.authorization.k8s.io
+  kind: ClusterRole
+  name: llmd-epp-access