Skip to content

Commit e47b745

Browse files
committed
pvc local model success
Signed-off-by: JaredforReal <[email protected]>
1 parent ddc0195 commit e47b745

File tree

9 files changed

+190
-407
lines changed

9 files changed

+190
-407
lines changed

deploy/kubernetes/config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
bert_model:
2-
model_id: sentence-transformers/all-MiniLM-L12-v2
2+
model_id: models/all-MiniLM-L12-v2
33
threshold: 0.6
44
use_cpu: true
55

deploy/kubernetes/deployment.yaml

Lines changed: 151 additions & 109 deletions
Original file line numberDiff line numberDiff line change
@@ -16,121 +16,163 @@ spec:
1616
app: semantic-router
1717
spec:
1818
initContainers:
19-
- name: model-downloader
20-
image: python:3.11-slim
21-
securityContext:
22-
runAsNonRoot: false
23-
allowPrivilegeEscalation: false
24-
command: ["/bin/bash", "-c"]
25-
args:
26-
- |
27-
set -e
28-
echo "Installing Hugging Face CLI..."
29-
pip install --no-cache-dir huggingface_hub[cli]
19+
- name: model-downloader
20+
image: python:3.11-slim
21+
securityContext:
22+
runAsNonRoot: false
23+
allowPrivilegeEscalation: false
24+
command: ["/bin/bash", "-c"]
25+
args:
26+
- |
27+
set -e
28+
# Check if all required models already exist in PVC; if yes, skip downloads entirely
29+
REQUIRED_DIRS=(
30+
"all-MiniLM-L12-v2"
31+
"category_classifier_modernbert-base_model"
32+
"pii_classifier_modernbert-base_model"
33+
"jailbreak_classifier_modernbert-base_model"
34+
"pii_classifier_modernbert-base_presidio_token_model"
35+
)
36+
mkdir -p /app/models
37+
cd /app/models
38+
MISSING=false
39+
for d in "${REQUIRED_DIRS[@]}"; do
40+
if [ ! -d "$d" ]; then
41+
MISSING=true
42+
break
43+
fi
44+
done
45+
if [ "$MISSING" = false ]; then
46+
echo "All required models already present in PVC. Skipping download."
47+
exit 0
48+
fi
3049
31-
echo "Downloading models to persistent volume..."
32-
cd /app/models
50+
echo "Installing Hugging Face CLI..."
51+
pip install --no-cache-dir huggingface_hub[cli]
3352
34-
# Download category classifier model
35-
if [ ! -d "category_classifier_modernbert-base_model" ]; then
36-
echo "Downloading category classifier model..."
37-
huggingface-cli download LLM-Semantic-Router/category_classifier_modernbert-base_model --local-dir category_classifier_modernbert-base_model
38-
else
39-
echo "Category classifier model already exists, skipping..."
40-
fi
53+
echo "Downloading missing models to persistent volume..."
4154
42-
# Download PII classifier model
43-
if [ ! -d "pii_classifier_modernbert-base_model" ]; then
44-
echo "Downloading PII classifier model..."
45-
huggingface-cli download LLM-Semantic-Router/pii_classifier_modernbert-base_model --local-dir pii_classifier_modernbert-base_model
46-
else
47-
echo "PII classifier model already exists, skipping..."
48-
fi
55+
# Download all-MiniLM-L12-v2 model
56+
if [ ! -d "all-MiniLM-L12-v2" ]; then
57+
echo "Downloading all-MiniLM-L12-v2 model..."
58+
hf download sentence-transformers/all-MiniLM-L12-v2 --local-dir all-MiniLM-L12-v2
59+
else
60+
echo "all-MiniLM-L12-v2 model already exists, skipping..."
61+
fi
4962
50-
# Download jailbreak classifier model
51-
if [ ! -d "jailbreak_classifier_modernbert-base_model" ]; then
52-
echo "Downloading jailbreak classifier model..."
53-
huggingface-cli download LLM-Semantic-Router/jailbreak_classifier_modernbert-base_model --local-dir jailbreak_classifier_modernbert-base_model
54-
else
55-
echo "Jailbreak classifier model already exists, skipping..."
56-
fi
63+
# Download category classifier model
64+
if [ ! -d "category_classifier_modernbert-base_model" ]; then
65+
echo "Downloading category classifier model..."
66+
hf download LLM-Semantic-Router/category_classifier_modernbert-base_model --local-dir category_classifier_modernbert-base_model
67+
else
68+
echo "Category classifier model already exists, skipping..."
69+
fi
5770
58-
# Download PII token classifier model
59-
if [ ! -d "pii_classifier_modernbert-base_presidio_token_model" ]; then
60-
echo "Downloading PII token classifier model..."
61-
huggingface-cli download LLM-Semantic-Router/pii_classifier_modernbert-base_presidio_token_model --local-dir pii_classifier_modernbert-base_presidio_token_model
62-
else
63-
echo "PII token classifier model already exists, skipping..."
64-
fi
71+
# Download PII classifier model
72+
if [ ! -d "pii_classifier_modernbert-base_model" ]; then
73+
echo "Downloading PII classifier model..."
74+
hf download LLM-Semantic-Router/pii_classifier_modernbert-base_model --local-dir pii_classifier_modernbert-base_model
75+
else
76+
echo "PII classifier model already exists, skipping..."
77+
fi
6578
66-
echo "All models downloaded successfully!"
67-
ls -la /app/models/
68-
env:
69-
- name: HF_HUB_CACHE
70-
value: /tmp/hf_cache
71-
# Reduced resource requirements for init container
72-
resources:
73-
requests:
74-
memory: "512Mi"
75-
cpu: "250m"
76-
limits:
77-
memory: "1Gi"
78-
cpu: "500m"
79-
volumeMounts:
80-
- name: models-volume
81-
mountPath: /app/models
79+
# Download jailbreak classifier model
80+
if [ ! -d "jailbreak_classifier_modernbert-base_model" ]; then
81+
echo "Downloading jailbreak classifier model..."
82+
hf download LLM-Semantic-Router/jailbreak_classifier_modernbert-base_model --local-dir jailbreak_classifier_modernbert-base_model
83+
else
84+
echo "Jailbreak classifier model already exists, skipping..."
85+
fi
86+
87+
# Download PII token classifier model
88+
if [ ! -d "pii_classifier_modernbert-base_presidio_token_model" ]; then
89+
echo "Downloading PII token classifier model..."
90+
hf download LLM-Semantic-Router/pii_classifier_modernbert-base_presidio_token_model --local-dir pii_classifier_modernbert-base_presidio_token_model
91+
else
92+
echo "PII token classifier model already exists, skipping..."
93+
fi
94+
95+
echo "All missing models downloaded successfully!"
96+
ls -la /app/models/
97+
env:
98+
- name: HF_HUB_CACHE
99+
value: /tmp/hf_cache
100+
# China Mirror
101+
- name: HUGGINGFACE_HUB_CACHE
102+
value: /tmp/hf_cache
103+
- name: HUGGINGFACE_HUB_ENABLE_HF_TRANSFER
104+
value: "1"
105+
- name: HUGGINGFACE_HUB_DOWNLOAD_TIMEOUT
106+
value: "300"
107+
- name: HUGGINGFACE_HUB_PROXY_URL
108+
value: "https://hf-mirror.com"
109+
- name: PIP_INDEX_URL
110+
value: https://pypi.tuna.tsinghua.edu.cn/simple
111+
- name: NO_PROXY
112+
value: localhost,127.0.0.1,10.0.0.0/8,172.16.0.0/12,192.168.0.0/16,.svc,.svc.cluster.local
113+
# Reduced resource requirements for init container
114+
resources:
115+
requests:
116+
memory: "512Mi"
117+
cpu: "250m"
118+
limits:
119+
memory: "1Gi"
120+
cpu: "500m"
121+
volumeMounts:
122+
- name: models-volume
123+
mountPath: /app/models
82124
containers:
83-
- name: semantic-router
84-
image: ghcr.io/vllm-project/semantic-router/extproc:latest
85-
args: ["--secure=true"]
86-
securityContext:
87-
runAsNonRoot: false
88-
allowPrivilegeEscalation: false
89-
ports:
90-
- containerPort: 50051
91-
name: grpc
92-
protocol: TCP
93-
- containerPort: 9190
94-
name: metrics
95-
protocol: TCP
96-
- containerPort: 8080
97-
name: classify-api
98-
protocol: TCP
99-
env:
100-
- name: LD_LIBRARY_PATH
101-
value: "/app/lib"
102-
volumeMounts:
125+
- name: semantic-router
126+
image: ghcr.io/vllm-project/semantic-router/extproc:latest
127+
args: ["--secure=true"]
128+
securityContext:
129+
runAsNonRoot: false
130+
allowPrivilegeEscalation: false
131+
ports:
132+
- containerPort: 50051
133+
name: grpc
134+
protocol: TCP
135+
- containerPort: 9190
136+
name: metrics
137+
protocol: TCP
138+
- containerPort: 8080
139+
name: classify-api
140+
protocol: TCP
141+
env:
142+
- name: LD_LIBRARY_PATH
143+
value: "/app/lib"
144+
volumeMounts:
145+
- name: config-volume
146+
mountPath: /app/config
147+
readOnly: true
148+
- name: models-volume
149+
mountPath: /app/models
150+
livenessProbe:
151+
tcpSocket:
152+
port: 50051
153+
initialDelaySeconds: 60
154+
periodSeconds: 30
155+
timeoutSeconds: 10
156+
failureThreshold: 3
157+
readinessProbe:
158+
tcpSocket:
159+
port: 50051
160+
initialDelaySeconds: 90
161+
periodSeconds: 30
162+
timeoutSeconds: 10
163+
failureThreshold: 3
164+
# Significantly reduced resource requirements for kind cluster
165+
resources:
166+
requests:
167+
memory: "3Gi" # Reduced from 8Gi
168+
cpu: "1" # Reduced from 2
169+
limits:
170+
memory: "6Gi" # Reduced from 12Gi
171+
cpu: "2" # Reduced from 4
172+
volumes:
103173
- name: config-volume
104-
mountPath: /app/config
105-
readOnly: true
174+
configMap:
175+
name: semantic-router-config
106176
- name: models-volume
107-
mountPath: /app/models
108-
livenessProbe:
109-
tcpSocket:
110-
port: 50051
111-
initialDelaySeconds: 60
112-
periodSeconds: 30
113-
timeoutSeconds: 10
114-
failureThreshold: 3
115-
readinessProbe:
116-
tcpSocket:
117-
port: 50051
118-
initialDelaySeconds: 90
119-
periodSeconds: 30
120-
timeoutSeconds: 10
121-
failureThreshold: 3
122-
# Significantly reduced resource requirements for kind cluster
123-
resources:
124-
requests:
125-
memory: "3Gi" # Reduced from 8Gi
126-
cpu: "1" # Reduced from 2
127-
limits:
128-
memory: "6Gi" # Reduced from 12Gi
129-
cpu: "2" # Reduced from 4
130-
volumes:
131-
- name: config-volume
132-
configMap:
133-
name: semantic-router-config
134-
- name: models-volume
135-
persistentVolumeClaim:
136-
claimName: semantic-router-models
177+
persistentVolumeClaim:
178+
claimName: semantic-router-models

deploy/kubernetes/kustomization.yaml

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ metadata:
66

77
resources:
88
- namespace.yaml
9-
- pvc.yaml
9+
- pv-models.yaml
1010
- deployment.yaml
1111
- service.yaml
1212

@@ -17,10 +17,4 @@ configMapGenerator:
1717
- config.yaml
1818
- tools_db.json
1919

20-
# Namespace for all resources
2120
namespace: vllm-semantic-router-system
22-
23-
images:
24-
- name: ghcr.io/vllm-project/semantic-router/extproc
25-
newName: semantic-router-extproc
26-
newTag: local

deploy/kubernetes/pv-models.yaml

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
# PV for Models
2+
apiVersion: v1
3+
kind: PersistentVolume
4+
metadata:
5+
name: semantic-router-models-pv
6+
labels:
7+
app: semantic-router
8+
spec:
9+
capacity:
10+
storage: 20Gi
11+
accessModes:
12+
- ReadWriteOnce
13+
storageClassName: standard
14+
persistentVolumeReclaimPolicy: Retain
15+
hostPath:
16+
path: /mnt/models
17+
type: DirectoryOrCreate
18+
---
19+
# PVC for Models
20+
apiVersion: v1
21+
kind: PersistentVolumeClaim
22+
metadata:
23+
name: semantic-router-models
24+
namespace: vllm-semantic-router-system
25+
labels:
26+
app: semantic-router
27+
spec:
28+
accessModes:
29+
- ReadWriteOnce
30+
resources:
31+
requests:
32+
storage: 20Gi
33+
storageClassName: standard
34+
volumeName: semantic-router-models-pv

deploy/kubernetes/pvc.yaml

Lines changed: 0 additions & 13 deletions
This file was deleted.

0 commit comments

Comments
 (0)