@@ -16,121 +16,163 @@ spec:
1616 app : semantic-router
1717 spec :
1818 initContainers :
19- - name : model-downloader
20- image : python:3.11-slim
21- securityContext :
22- runAsNonRoot : false
23- allowPrivilegeEscalation : false
24- command : ["/bin/bash", "-c"]
25- args :
26- - |
27- set -e
28- echo "Installing Hugging Face CLI..."
29- pip install --no-cache-dir huggingface_hub[cli]
19+ - name : model-downloader
20+ image : python:3.11-slim
21+ securityContext :
22+ runAsNonRoot : false
23+ allowPrivilegeEscalation : false
24+ command : ["/bin/bash", "-c"]
25+ args :
26+ - |
27+ set -e
28+ # Check if all required models already exist in PVC; if yes, skip downloads entirely
29+ REQUIRED_DIRS=(
30+ "all-MiniLM-L12-v2"
31+ "category_classifier_modernbert-base_model"
32+ "pii_classifier_modernbert-base_model"
33+ "jailbreak_classifier_modernbert-base_model"
34+ "pii_classifier_modernbert-base_presidio_token_model"
35+ )
36+ mkdir -p /app/models
37+ cd /app/models
38+ MISSING=false
39+ for d in "${REQUIRED_DIRS[@]}"; do
40+ if [ ! -d "$d" ]; then
41+ MISSING=true
42+ break
43+ fi
44+ done
45+ if [ "$MISSING" = false ]; then
46+ echo "All required models already present in PVC. Skipping download."
47+ exit 0
48+ fi
3049
31- echo "Downloading models to persistent volume ..."
32- cd /app/models
50+ echo "Installing Hugging Face CLI ..."
51+ pip install --no-cache-dir huggingface_hub[cli]
3352
34- # Download category classifier model
35- if [ ! -d "category_classifier_modernbert-base_model" ]; then
36- echo "Downloading category classifier model..."
37- huggingface-cli download LLM-Semantic-Router/category_classifier_modernbert-base_model --local-dir category_classifier_modernbert-base_model
38- else
39- echo "Category classifier model already exists, skipping..."
40- fi
53+ echo "Downloading missing models to persistent volume..."
4154
42- # Download PII classifier model
43- if [ ! -d "pii_classifier_modernbert-base_model " ]; then
44- echo "Downloading PII classifier model..."
45- huggingface-cli download LLM-Semantic-Router/pii_classifier_modernbert-base_model --local-dir pii_classifier_modernbert-base_model
46- else
47- echo "PII classifier model already exists, skipping..."
48- fi
55+ # Download all-MiniLM-L12-v2 model
56+ if [ ! -d "all-MiniLM-L12-v2 " ]; then
57+ echo "Downloading all-MiniLM-L12-v2 model..."
58+ hf download sentence-transformers/all-MiniLM-L12-v2 --local-dir all-MiniLM-L12-v2
59+ else
60+ echo "all-MiniLM-L12-v2 model already exists, skipping..."
61+ fi
4962
50- # Download jailbreak classifier model
51- if [ ! -d "jailbreak_classifier_modernbert -base_model" ]; then
52- echo "Downloading jailbreak classifier model..."
53- huggingface-cli download LLM-Semantic-Router/jailbreak_classifier_modernbert -base_model --local-dir jailbreak_classifier_modernbert -base_model
54- else
55- echo "Jailbreak classifier model already exists, skipping..."
56- fi
63+ # Download category classifier model
64+ if [ ! -d "category_classifier_modernbert -base_model" ]; then
65+ echo "Downloading category classifier model..."
66+ hf download LLM-Semantic-Router/category_classifier_modernbert -base_model --local-dir category_classifier_modernbert -base_model
67+ else
68+ echo "Category classifier model already exists, skipping..."
69+ fi
5770
58- # Download PII token classifier model
59- if [ ! -d "pii_classifier_modernbert-base_presidio_token_model " ]; then
60- echo "Downloading PII token classifier model..."
61- huggingface-cli download LLM-Semantic-Router/pii_classifier_modernbert-base_presidio_token_model --local-dir pii_classifier_modernbert-base_presidio_token_model
62- else
63- echo "PII token classifier model already exists, skipping..."
64- fi
71+ # Download PII classifier model
72+ if [ ! -d "pii_classifier_modernbert-base_model " ]; then
73+ echo "Downloading PII classifier model..."
74+ hf download LLM-Semantic-Router/pii_classifier_modernbert-base_model --local-dir pii_classifier_modernbert-base_model
75+ else
76+ echo "PII classifier model already exists, skipping..."
77+ fi
6578
66- echo "All models downloaded successfully!"
67- ls -la /app/models/
68- env :
69- - name : HF_HUB_CACHE
70- value : /tmp/hf_cache
71- # Reduced resource requirements for init container
72- resources :
73- requests :
74- memory : " 512Mi"
75- cpu : " 250m"
76- limits :
77- memory : " 1Gi"
78- cpu : " 500m"
79- volumeMounts :
80- - name : models-volume
81- mountPath : /app/models
79+ # Download jailbreak classifier model
80+ if [ ! -d "jailbreak_classifier_modernbert-base_model" ]; then
81+ echo "Downloading jailbreak classifier model..."
82+ hf download LLM-Semantic-Router/jailbreak_classifier_modernbert-base_model --local-dir jailbreak_classifier_modernbert-base_model
83+ else
84+ echo "Jailbreak classifier model already exists, skipping..."
85+ fi
86+
87+ # Download PII token classifier model
88+ if [ ! -d "pii_classifier_modernbert-base_presidio_token_model" ]; then
89+ echo "Downloading PII token classifier model..."
90+ hf download LLM-Semantic-Router/pii_classifier_modernbert-base_presidio_token_model --local-dir pii_classifier_modernbert-base_presidio_token_model
91+ else
92+ echo "PII token classifier model already exists, skipping..."
93+ fi
94+
95+ echo "All missing models downloaded successfully!"
96+ ls -la /app/models/
97+ env :
98+ - name : HF_HUB_CACHE
99+ value : /tmp/hf_cache
100+ # China Mirror
101+ - name : HUGGINGFACE_HUB_CACHE
102+ value : /tmp/hf_cache
103+ - name : HUGGINGFACE_HUB_ENABLE_HF_TRANSFER
104+ value : " 1"
105+ - name : HUGGINGFACE_HUB_DOWNLOAD_TIMEOUT
106+ value : " 300"
107+ - name : HUGGINGFACE_HUB_PROXY_URL
108+ value : " https://hf-mirror.com"
109+ - name : PIP_INDEX_URL
110+ value : https://pypi.tuna.tsinghua.edu.cn/simple
111+ - name : NO_PROXY
112+ value : localhost,127.0.0.1,10.0.0.0/8,172.16.0.0/12,192.168.0.0/16,.svc,.svc.cluster.local
113+ # Reduced resource requirements for init container
114+ resources :
115+ requests :
116+ memory : " 512Mi"
117+ cpu : " 250m"
118+ limits :
119+ memory : " 1Gi"
120+ cpu : " 500m"
121+ volumeMounts :
122+ - name : models-volume
123+ mountPath : /app/models
82124 containers :
83- - name : semantic-router
84- image : ghcr.io/vllm-project/semantic-router/extproc:latest
85- args : ["--secure=true"]
86- securityContext :
87- runAsNonRoot : false
88- allowPrivilegeEscalation : false
89- ports :
90- - containerPort : 50051
91- name : grpc
92- protocol : TCP
93- - containerPort : 9190
94- name : metrics
95- protocol : TCP
96- - containerPort : 8080
97- name : classify-api
98- protocol : TCP
99- env :
100- - name : LD_LIBRARY_PATH
101- value : " /app/lib"
102- volumeMounts :
125+ - name : semantic-router
126+ image : ghcr.io/vllm-project/semantic-router/extproc:latest
127+ args : ["--secure=true"]
128+ securityContext :
129+ runAsNonRoot : false
130+ allowPrivilegeEscalation : false
131+ ports :
132+ - containerPort : 50051
133+ name : grpc
134+ protocol : TCP
135+ - containerPort : 9190
136+ name : metrics
137+ protocol : TCP
138+ - containerPort : 8080
139+ name : classify-api
140+ protocol : TCP
141+ env :
142+ - name : LD_LIBRARY_PATH
143+ value : " /app/lib"
144+ volumeMounts :
145+ - name : config-volume
146+ mountPath : /app/config
147+ readOnly : true
148+ - name : models-volume
149+ mountPath : /app/models
150+ livenessProbe :
151+ tcpSocket :
152+ port : 50051
153+ initialDelaySeconds : 60
154+ periodSeconds : 30
155+ timeoutSeconds : 10
156+ failureThreshold : 3
157+ readinessProbe :
158+ tcpSocket :
159+ port : 50051
160+ initialDelaySeconds : 90
161+ periodSeconds : 30
162+ timeoutSeconds : 10
163+ failureThreshold : 3
164+ # Significantly reduced resource requirements for kind cluster
165+ resources :
166+ requests :
167+ memory : " 3Gi" # Reduced from 8Gi
168+ cpu : " 1" # Reduced from 2
169+ limits :
170+ memory : " 6Gi" # Reduced from 12Gi
171+ cpu : " 2" # Reduced from 4
172+ volumes :
103173 - name : config-volume
104- mountPath : /app/config
105- readOnly : true
174+ configMap :
175+ name : semantic-router-config
106176 - name : models-volume
107- mountPath : /app/models
108- livenessProbe :
109- tcpSocket :
110- port : 50051
111- initialDelaySeconds : 60
112- periodSeconds : 30
113- timeoutSeconds : 10
114- failureThreshold : 3
115- readinessProbe :
116- tcpSocket :
117- port : 50051
118- initialDelaySeconds : 90
119- periodSeconds : 30
120- timeoutSeconds : 10
121- failureThreshold : 3
122- # Significantly reduced resource requirements for kind cluster
123- resources :
124- requests :
125- memory : " 3Gi" # Reduced from 8Gi
126- cpu : " 1" # Reduced from 2
127- limits :
128- memory : " 6Gi" # Reduced from 12Gi
129- cpu : " 2" # Reduced from 4
130- volumes :
131- - name : config-volume
132- configMap :
133- name : semantic-router-config
134- - name : models-volume
135- persistentVolumeClaim :
136- claimName : semantic-router-models
177+ persistentVolumeClaim :
178+ claimName : semantic-router-models
0 commit comments