add istio resources in deploy and update routing rules

samzong · samzong · commit aa79e71bbb3b · 2025-11-21T09:42:53.000+08:00
Signed-off-by: samzong &lt;samzong.lu@gmail.com&gt;
diff --git a/e2e/profiles/llm-d/profile.go b/e2e/profiles/llm-d/profile.go
@@ -96,6 +96,8 @@ func (p *Profile) Teardown(ctx context.Context, opts *framework.TeardownOptions)
 	_ = p.kubectlDelete(ctx, "deploy/kubernetes/llmd-base/inferencepool-phi4.yaml")
 	_ = p.kubectlDelete(ctx, "e2e/profiles/llm-d/manifests/inference-sim.yaml")
 	_ = p.kubectlDelete(ctx, "e2e/profiles/llm-d/manifests/rbac.yaml")
+	_ = p.kubectlDelete(ctx, "deploy/kubernetes/istio/envoyfilter.yaml")
+	_ = p.kubectlDelete(ctx, "deploy/kubernetes/istio/destinationrule.yaml")
 	_ = p.kubectlDelete(ctx, "deploy/kubernetes/istio/gateway.yaml")
 
 	deployer := helm.NewDeployer(opts.KubeConfig, opts.Verbose)
@@ -227,6 +229,12 @@ func (p *Profile) deployGatewayRoutes(ctx context.Context) error {
 	if err := p.kubectlApply(ctx, "deploy/kubernetes/llmd-base/httproute-phi4-pool.yaml"); err != nil {
 		return err
 	}
+	if err := p.kubectlApply(ctx, "deploy/kubernetes/istio/destinationrule.yaml"); err != nil {
+		return err
+	}
+	if err := p.kubectlApply(ctx, "deploy/kubernetes/istio/envoyfilter.yaml"); err != nil {
+		return err
+	}
 	return nil
 }
 
diff --git a/e2e/profiles/llm-d/values.yaml b/e2e/profiles/llm-d/values.yaml
@@ -1,58 +1,61 @@
 config:
   default_model: llama3-8b
-  categories:
-    - name: math
-      description: "Math"
-    - name: computer science
-      description: "Computer science"
-    - name: other
-      description: "Other"
   decisions:
-    - name: math_decision
+    - name: math_route
       priority: 20
       rules:
         operator: OR
         conditions:
-          - type: domain
+          - type: keyword
             name: math
+            keywords: ["math", "+", "-", "*", "/", "sum", "add", "calculate"]
+            case_sensitive: false
       modelRefs:
-        - model: phi4-mini
-          use_reasoning: false
-    - name: cs_decision
+        - model: llama3-8b
+    - name: cs_route
       priority: 10
       rules:
         operator: OR
         conditions:
-          - type: domain
-            name: computer science
+          - type: keyword
+            name: cs
+            keywords: ["computer", "network", "tcp", "ip", "http", "server", "code"]
+            case_sensitive: false
       modelRefs:
         - model: llama3-8b
-          use_reasoning: false
-    - name: other_decision
+    - name: default_route
       priority: 1
       rules:
         operator: OR
         conditions:
-          - type: domain
+          - type: keyword
             name: other
+            keywords: [""]
+            case_sensitive: false
       modelRefs:
         - model: llama3-8b
-          use_reasoning: false
-  classifier:
-    category_model:
-      model_id: models/category_classifier_modernbert-base_model
-      use_modernbert: true
-      threshold: 0.6
-      use_cpu: true
-      category_mapping_path: models/category_classifier_modernbert-base_model/category_mapping.json
   semantic_cache:
     enabled: false
+  prompt_guard:
+    enabled: false
+  tools:
+    enabled: false
+  classifier:
+    category_model:
+      model_id: ""
+      threshold: 1.0
+      use_modernbert: false
+      category_mapping_path: ""
+    pii_model:
+      model_id: ""
+      threshold: 1.0
+      use_modernbert: false
+      pii_mapping_path: ""
   bert_model:
     model_id: models/all-MiniLM-L12-v2
     threshold: 0.6
     use_cpu: true
-  api:
-    batch_classification:
-      max_batch_size: 16
-      concurrency_threshold: 4
-      max_concurrency: 8
+
+# 保持与默认 chart 一致：initContainer、models 下载、PVC 均走 chart 默认值
+image:
+  pullPolicy: IfNotPresent