Skip to content

Commit ac2df4b

Browse files
committed
feat: implement decision-based routing with plugin architecture
Signed-off-by: bitliu <[email protected]>
1 parent 5c0ff94 commit ac2df4b

File tree

2 files changed

+5
-6
lines changed

2 files changed

+5
-6
lines changed

deploy/helm/semantic-router/templates/deployment.yaml

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,6 @@ spec:
3535
image: {{ .Values.initContainer.image }}
3636
securityContext:
3737
{{- toYaml .Values.securityContext | nindent 10 }}
38-
# Allow up to 10 minutes for model downloads in CI environments
39-
# This prevents the init container from being killed prematurely
4038
command: ["/bin/bash", "-c"]
4139
args:
4240
- |
@@ -53,7 +51,7 @@ spec:
5351
# Remove .cache directory to ensure fresh download
5452
rm -rf "{{ .name }}/.cache" 2>/dev/null || true
5553
# Download with ignore_patterns to exclude ONNX-only files if pytorch model exists
56-
python -c "from huggingface_hub import snapshot_download; snapshot_download(repo_id='{{ .repo }}', local_dir='{{ .name }}', local_dir_use_symlinks=False, ignore_patterns=['*.onnx', '*.msgpack', '*.h5', '*.tflite'] if '{{ .name }}' == 'all-MiniLM-L12-v2' else None)"
54+
python -c "from huggingface_hub import snapshot_download; snapshot_download(repo_id='{{ .repo }}', local_dir='{{ .name }}', ignore_patterns=['*.onnx', '*.msgpack', '*.h5', '*.tflite'] if '{{ .name }}' == 'all-MiniLM-L12-v2' else None)"
5755
5856
# Check for required model files
5957
echo "Checking {{ .name }} for required files:"

deploy/helm/semantic-router/values.yaml

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,9 @@ initContainer:
142142
cpu: "500m"
143143
# -- Models to download
144144
models:
145+
# Embedding models for semantic cache and tools
146+
- name: Qwen3-Embedding-0.6B
147+
repo: Qwen/Qwen3-Embedding-0.6B
145148
- name: all-MiniLM-L12-v2
146149
repo: sentence-transformers/all-MiniLM-L12-v2
147150
- name: category_classifier_modernbert-base_model
@@ -152,9 +155,7 @@ initContainer:
152155
repo: LLM-Semantic-Router/jailbreak_classifier_modernbert-base_model
153156
- name: pii_classifier_modernbert-base_presidio_token_model
154157
repo: LLM-Semantic-Router/pii_classifier_modernbert-base_presidio_token_model
155-
# Embedding models for semantic cache and tools
156-
- name: Qwen3-Embedding-0.6B
157-
repo: Qwen/Qwen3-Embedding-0.6B
158+
158159

159160
# Autoscaling configuration
160161
autoscaling:

0 commit comments

Comments
 (0)