Skip to content

Commit 138c82e

Browse files
committed
Fix missing files from previous commit.
Signed-off-by: Sanjeev Rampal <[email protected]>
1 parent b5e81f4 commit 138c82e

File tree

4 files changed

+191
-0
lines changed

4 files changed

+191
-0
lines changed
Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
apiVersion: apps/v1
2+
kind: Deployment
3+
metadata:
4+
name: semantic-router
5+
namespace: vllm-semantic-router-system
6+
labels:
7+
app: semantic-router
8+
spec:
9+
replicas: 1
10+
selector:
11+
matchLabels:
12+
app: semantic-router
13+
template:
14+
metadata:
15+
labels:
16+
app: semantic-router
17+
spec:
18+
initContainers:
19+
- name: model-downloader
20+
image: python:3.11-slim
21+
securityContext:
22+
runAsNonRoot: false
23+
allowPrivilegeEscalation: false
24+
command: ["/bin/bash", "-c"]
25+
args:
26+
- |
27+
set -e
28+
echo "Installing Hugging Face CLI..."
29+
pip install --no-cache-dir huggingface_hub[cli]
30+
31+
echo "Downloading models to persistent volume..."
32+
cd /app/models
33+
34+
# Download category classifier model
35+
if [ ! -d "category_classifier_modernbert-base_model" ]; then
36+
echo "Downloading category classifier model..."
37+
huggingface-cli download LLM-Semantic-Router/category_classifier_modernbert-base_model --local-dir category_classifier_modernbert-base_model
38+
else
39+
echo "Category classifier model already exists, skipping..."
40+
fi
41+
42+
# Download PII classifier model
43+
if [ ! -d "pii_classifier_modernbert-base_model" ]; then
44+
echo "Downloading PII classifier model..."
45+
huggingface-cli download LLM-Semantic-Router/pii_classifier_modernbert-base_model --local-dir pii_classifier_modernbert-base_model
46+
else
47+
echo "PII classifier model already exists, skipping..."
48+
fi
49+
50+
# Download jailbreak classifier model
51+
if [ ! -d "jailbreak_classifier_modernbert-base_model" ]; then
52+
echo "Downloading jailbreak classifier model..."
53+
huggingface-cli download LLM-Semantic-Router/jailbreak_classifier_modernbert-base_model --local-dir jailbreak_classifier_modernbert-base_model
54+
else
55+
echo "Jailbreak classifier model already exists, skipping..."
56+
fi
57+
58+
# Download PII token classifier model
59+
if [ ! -d "pii_classifier_modernbert-base_presidio_token_model" ]; then
60+
echo "Downloading PII token classifier model..."
61+
huggingface-cli download LLM-Semantic-Router/pii_classifier_modernbert-base_presidio_token_model --local-dir pii_classifier_modernbert-base_presidio_token_model
62+
else
63+
echo "PII token classifier model already exists, skipping..."
64+
fi
65+
66+
echo "All models downloaded successfully!"
67+
ls -la /app/models/
68+
env:
69+
- name: HF_HUB_CACHE
70+
value: /tmp/hf_cache
71+
# Reduced resource requirements for init container
72+
resources:
73+
requests:
74+
memory: "512Mi"
75+
cpu: "250m"
76+
limits:
77+
memory: "1Gi"
78+
cpu: "500m"
79+
volumeMounts:
80+
- name: models-volume
81+
mountPath: /app/models
82+
containers:
83+
- name: semantic-router
84+
image: ghcr.io/vllm-project/semantic-router/extproc:latest
85+
args: ["--secure=true"]
86+
securityContext:
87+
runAsNonRoot: false
88+
allowPrivilegeEscalation: false
89+
ports:
90+
- containerPort: 50051
91+
name: grpc
92+
protocol: TCP
93+
- containerPort: 9190
94+
name: metrics
95+
protocol: TCP
96+
- containerPort: 8080
97+
name: classify-api
98+
protocol: TCP
99+
env:
100+
- name: LD_LIBRARY_PATH
101+
value: "/app/lib"
102+
volumeMounts:
103+
- name: config-volume
104+
mountPath: /app/config
105+
readOnly: true
106+
- name: models-volume
107+
mountPath: /app/models
108+
livenessProbe:
109+
tcpSocket:
110+
port: 50051
111+
initialDelaySeconds: 60
112+
periodSeconds: 30
113+
timeoutSeconds: 10
114+
failureThreshold: 3
115+
readinessProbe:
116+
tcpSocket:
117+
port: 50051
118+
initialDelaySeconds: 90
119+
periodSeconds: 30
120+
timeoutSeconds: 10
121+
failureThreshold: 3
122+
# Significantly reduced resource requirements for kind cluster
123+
resources:
124+
requests:
125+
memory: "3Gi" # Reduced from 8Gi
126+
cpu: "1" # Reduced from 2
127+
limits:
128+
memory: "6Gi" # Reduced from 12Gi
129+
cpu: "2" # Reduced from 4
130+
volumes:
131+
- name: config-volume
132+
configMap:
133+
name: semantic-router-config
134+
- name: models-volume
135+
persistentVolumeClaim:
136+
claimName: semantic-router-models
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
apiVersion: v1
2+
kind: Namespace
3+
metadata:
4+
name: vllm-semantic-router-system

deploy/kubernetes/istio/pvc.yaml

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
apiVersion: v1
2+
kind: PersistentVolumeClaim
3+
metadata:
4+
name: semantic-router-models
5+
labels:
6+
app: semantic-router
7+
spec:
8+
accessModes:
9+
- ReadWriteOnce
10+
resources:
11+
requests:
12+
storage: 10Gi
13+
storageClassName: standard
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
apiVersion: v1
2+
kind: Service
3+
metadata:
4+
name: semantic-router
5+
namespace: vllm-semantic-router-system
6+
labels:
7+
app: semantic-router
8+
spec:
9+
type: ClusterIP
10+
ports:
11+
- port: 50051
12+
targetPort: grpc
13+
protocol: TCP
14+
name: grpc
15+
- port: 8080
16+
targetPort: 8080
17+
protocol: TCP
18+
name: classify-api
19+
selector:
20+
app: semantic-router
21+
---
22+
apiVersion: v1
23+
kind: Service
24+
metadata:
25+
name: semantic-router-metrics
26+
namespace: vllm-semantic-router-system
27+
labels:
28+
app: semantic-router
29+
service: metrics
30+
spec:
31+
type: ClusterIP
32+
ports:
33+
- port: 9190
34+
targetPort: metrics
35+
protocol: TCP
36+
name: metrics
37+
selector:
38+
app: semantic-router

0 commit comments

Comments
 (0)