Skip to content

Commit aa79e71

Browse files
committed
add istio resources in deploy and update routing rules
Signed-off-by: samzong <[email protected]>
1 parent ee01b19 commit aa79e71

File tree

2 files changed

+41
-30
lines changed

2 files changed

+41
-30
lines changed

e2e/profiles/llm-d/profile.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,8 @@ func (p *Profile) Teardown(ctx context.Context, opts *framework.TeardownOptions)
9696
_ = p.kubectlDelete(ctx, "deploy/kubernetes/llmd-base/inferencepool-phi4.yaml")
9797
_ = p.kubectlDelete(ctx, "e2e/profiles/llm-d/manifests/inference-sim.yaml")
9898
_ = p.kubectlDelete(ctx, "e2e/profiles/llm-d/manifests/rbac.yaml")
99+
_ = p.kubectlDelete(ctx, "deploy/kubernetes/istio/envoyfilter.yaml")
100+
_ = p.kubectlDelete(ctx, "deploy/kubernetes/istio/destinationrule.yaml")
99101
_ = p.kubectlDelete(ctx, "deploy/kubernetes/istio/gateway.yaml")
100102

101103
deployer := helm.NewDeployer(opts.KubeConfig, opts.Verbose)
@@ -227,6 +229,12 @@ func (p *Profile) deployGatewayRoutes(ctx context.Context) error {
227229
if err := p.kubectlApply(ctx, "deploy/kubernetes/llmd-base/httproute-phi4-pool.yaml"); err != nil {
228230
return err
229231
}
232+
if err := p.kubectlApply(ctx, "deploy/kubernetes/istio/destinationrule.yaml"); err != nil {
233+
return err
234+
}
235+
if err := p.kubectlApply(ctx, "deploy/kubernetes/istio/envoyfilter.yaml"); err != nil {
236+
return err
237+
}
230238
return nil
231239
}
232240

e2e/profiles/llm-d/values.yaml

Lines changed: 33 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,58 +1,61 @@
11
config:
22
default_model: llama3-8b
3-
categories:
4-
- name: math
5-
description: "Math"
6-
- name: computer science
7-
description: "Computer science"
8-
- name: other
9-
description: "Other"
103
decisions:
11-
- name: math_decision
4+
- name: math_route
125
priority: 20
136
rules:
147
operator: OR
158
conditions:
16-
- type: domain
9+
- type: keyword
1710
name: math
11+
keywords: ["math", "+", "-", "*", "/", "sum", "add", "calculate"]
12+
case_sensitive: false
1813
modelRefs:
19-
- model: phi4-mini
20-
use_reasoning: false
21-
- name: cs_decision
14+
- model: llama3-8b
15+
- name: cs_route
2216
priority: 10
2317
rules:
2418
operator: OR
2519
conditions:
26-
- type: domain
27-
name: computer science
20+
- type: keyword
21+
name: cs
22+
keywords: ["computer", "network", "tcp", "ip", "http", "server", "code"]
23+
case_sensitive: false
2824
modelRefs:
2925
- model: llama3-8b
30-
use_reasoning: false
31-
- name: other_decision
26+
- name: default_route
3227
priority: 1
3328
rules:
3429
operator: OR
3530
conditions:
36-
- type: domain
31+
- type: keyword
3732
name: other
33+
keywords: [""]
34+
case_sensitive: false
3835
modelRefs:
3936
- model: llama3-8b
40-
use_reasoning: false
41-
classifier:
42-
category_model:
43-
model_id: models/category_classifier_modernbert-base_model
44-
use_modernbert: true
45-
threshold: 0.6
46-
use_cpu: true
47-
category_mapping_path: models/category_classifier_modernbert-base_model/category_mapping.json
4837
semantic_cache:
4938
enabled: false
39+
prompt_guard:
40+
enabled: false
41+
tools:
42+
enabled: false
43+
classifier:
44+
category_model:
45+
model_id: ""
46+
threshold: 1.0
47+
use_modernbert: false
48+
category_mapping_path: ""
49+
pii_model:
50+
model_id: ""
51+
threshold: 1.0
52+
use_modernbert: false
53+
pii_mapping_path: ""
5054
bert_model:
5155
model_id: models/all-MiniLM-L12-v2
5256
threshold: 0.6
5357
use_cpu: true
54-
api:
55-
batch_classification:
56-
max_batch_size: 16
57-
concurrency_threshold: 4
58-
max_concurrency: 8
58+
59+
# 保持与默认 chart 一致:initContainer、models 下载、PVC 均走 chart 默认值
60+
image:
61+
pullPolicy: IfNotPresent

0 commit comments

Comments
 (0)