Skip to content

Commit b11c661

Browse files
committed
✨ feat(milvus): add Kubernetes deployment configs and semantic cache support
Signed-off-by: samzong <[email protected]>
1 parent 32c3399 commit b11c661

File tree

14 files changed

+774
-3
lines changed

14 files changed

+774
-3
lines changed
Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
apiVersion: v1
2+
kind: ConfigMap
3+
metadata:
4+
name: semantic-router-kserve-config-milvus
5+
labels:
6+
app: semantic-router
7+
component: config
8+
data:
9+
config.yaml: |
10+
bert_model:
11+
model_id: models/{{EMBEDDING_MODEL}}
12+
threshold: 0.6
13+
use_cpu: true
14+
15+
semantic_cache:
16+
enabled: true
17+
backend_type: "milvus"
18+
backend_config_path: "config/semantic-cache/milvus.yaml"
19+
similarity_threshold: 0.8
20+
ttl_seconds: 3600
21+
embedding_model: "bert"
22+
23+
tools:
24+
enabled: false
25+
top_k: 3
26+
similarity_threshold: 0.2
27+
tools_db_path: "config/tools_db.json"
28+
fallback_to_empty: true
29+
30+
prompt_guard:
31+
enabled: true
32+
use_modernbert: true
33+
model_id: "models/jailbreak_classifier_modernbert-base_model"
34+
threshold: 0.7
35+
use_cpu: true
36+
jailbreak_mapping_path: "models/jailbreak_classifier_modernbert-base_model/jailbreak_type_mapping.json"
37+
38+
vllm_endpoints:
39+
- name: "{{INFERENCESERVICE_NAME}}-endpoint"
40+
address: "{{PREDICTOR_SERVICE_IP}}"
41+
port: 8080
42+
weight: 1
43+
44+
model_config:
45+
"{{MODEL_NAME}}":
46+
reasoning_family: "qwen3"
47+
preferred_endpoints: ["{{INFERENCESERVICE_NAME}}-endpoint"]
48+
49+
classifier:
50+
category_model:
51+
model_id: "models/category_classifier_modernbert-base_model"
52+
use_modernbert: true
53+
threshold: 0.6
54+
use_cpu: true
55+
category_mapping_path: "models/category_classifier_modernbert-base_model/category_mapping.json"
56+
pii_model:
57+
model_id: "models/pii_classifier_modernbert-base_presidio_token_model"
58+
use_modernbert: true
59+
threshold: 0.7
60+
use_cpu: true
61+
pii_mapping_path: "models/pii_classifier_modernbert-base_presidio_token_model/pii_type_mapping.json"
62+
63+
categories:
64+
- name: business
65+
- name: law
66+
- name: psychology
67+
- name: biology
68+
- name: chemistry
69+
- name: history
70+
- name: other
71+
- name: health
72+
- name: economics
73+
- name: math
74+
- name: physics
75+
- name: computer_science
76+
- name: philosophy
77+
- name: engineering
78+
79+
strategy: "priority"
80+
81+
decisions:
82+
- name: "general_decision"
83+
description: "General knowledge and miscellaneous topics"
84+
priority: 50
85+
rules:
86+
operator: "AND"
87+
conditions:
88+
- type: "domain"
89+
name: "other"
90+
modelRefs:
91+
- model: "{{MODEL_NAME}}"
92+
use_reasoning: false
93+
plugins:
94+
- type: "semantic-cache"
95+
configuration:
96+
enabled: true
97+
similarity_threshold: 0.75
98+
- type: "pii"
99+
configuration:
100+
enabled: true
101+
pii_types_allowed: []
102+
103+
default_model: {{MODEL_NAME}}
104+
105+
reasoning_families:
106+
deepseek:
107+
type: "chat_template_kwargs"
108+
parameter: "thinking"
109+
qwen3:
110+
type: "chat_template_kwargs"
111+
parameter: "enable_thinking"
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
bert_model:
2+
model_id: models/all-MiniLM-L12-v2
3+
threshold: 0.6
4+
use_cpu: true
5+
6+
semantic_cache:
7+
enabled: true
8+
backend_type: "memory"
9+
similarity_threshold: 0.8
10+
ttl_seconds: 3600
11+
embedding_model: "bert"
12+
backend_config_path: "config/semantic-cache/milvus.yaml"
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
apiVersion: v1
2+
kind: ConfigMap
3+
metadata:
4+
name: milvus-client-config
5+
namespace: vllm-semantic-router-system
6+
data:
7+
milvus.yaml: |
8+
connection:
9+
host: "milvus-cluster.vllm-semantic-router-system.svc.cluster.local"
10+
port: 19530
11+
timeout: 60
12+
auth:
13+
enabled: false
14+
username: ""
15+
password: ""
16+
tls:
17+
enabled: false
18+
collection:
19+
name: "semantic_cache"
20+
description: "Semantic cache"
21+
vector_field:
22+
name: "embedding"
23+
dimension: 384
24+
metric_type: "IP"
25+
index:
26+
type: "HNSW"
27+
params:
28+
M: 16
29+
efConstruction: 64
30+
search:
31+
params:
32+
ef: 64
33+
topk: 10
34+
consistency_level: "Session"
35+
development:
36+
auto_create_collection: true
37+
verbose_errors: true
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
apiVersion: milvus.io/v1beta1
2+
kind: Milvus
3+
metadata:
4+
name: milvus-cluster
5+
namespace: vllm-semantic-router-system
6+
spec:
7+
mode: cluster
8+
components:
9+
disableMetrics: false
10+
dependencies:
11+
storage:
12+
inCluster:
13+
values:
14+
mode: distributed
15+
deletionPolicy: Retain
16+
pvcDeletion: false
17+
etcd:
18+
inCluster:
19+
values:
20+
replicaCount: 3
21+
pulsar:
22+
inCluster:
23+
values:
24+
broker:
25+
replicaCount: 1
26+
config: {}
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
apiVersion: milvus.io/v1beta1
2+
kind: Milvus
3+
metadata:
4+
name: milvus-standalone
5+
namespace: vllm-semantic-router-system
6+
spec:
7+
mode: standalone
8+
components:
9+
disableMetrics: false
10+
dependencies:
11+
storage:
12+
inCluster:
13+
values:
14+
mode: standalone
15+
deletionPolicy: Delete
16+
pvcDeletion: true
17+
etcd:
18+
inCluster:
19+
values:
20+
replicaCount: 1
21+
config: {}
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
apiVersion: networking.k8s.io/v1
2+
kind: NetworkPolicy
3+
metadata:
4+
name: allow-router-to-milvus
5+
namespace: vllm-semantic-router-system
6+
spec:
7+
podSelector:
8+
matchLabels:
9+
app.kubernetes.io/name: milvus
10+
policyTypes:
11+
- Ingress
12+
ingress:
13+
- from:
14+
- namespaceSelector:
15+
matchLabels:
16+
kubernetes.io/metadata.name: vllm-semantic-router-system
17+
podSelector:
18+
matchLabels:
19+
app.kubernetes.io/name: semantic-router
20+
ports:
21+
- protocol: TCP
22+
port: 19530
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
apiVersion: v1
2+
kind: PersistentVolumeClaim
3+
metadata:
4+
name: milvus-data
5+
namespace: vllm-semantic-router-system
6+
spec:
7+
accessModes:
8+
- ReadWriteOnce
9+
resources:
10+
requests:
11+
storage: 20Gi
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
apiVersion: v1
2+
kind: Secret
3+
metadata:
4+
name: milvus-auth
5+
namespace: vllm-semantic-router-system
6+
type: Opaque
7+
stringData:
8+
username: ""
9+
password: ""
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
apiVersion: monitoring.coreos.com/v1
2+
kind: ServiceMonitor
3+
metadata:
4+
name: milvus-servicemonitor
5+
namespace: vllm-semantic-router-system
6+
spec:
7+
selector:
8+
matchLabels:
9+
app.kubernetes.io/name: milvus
10+
namespaceSelector:
11+
matchNames:
12+
- vllm-semantic-router-system
13+
endpoints:
14+
- targetPort: 9091
15+
path: /metrics
16+
interval: 30s
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
cluster:
2+
enabled: true
3+
etcd:
4+
replicaCount: 3
5+
minio:
6+
mode: distributed
7+
pulsar:
8+
enabled: true
9+
metrics:
10+
serviceMonitor:
11+
enabled: true

0 commit comments

Comments
 (0)