feat: implement decision-based routing with plugin architecture

Xunzhuo · Xunzhuo · commit ac2df4b68e3b · 2025-11-18T12:07:38.000+08:00
Signed-off-by: bitliu &lt;bitliu@tencent.com&gt;
diff --git a/deploy/helm/semantic-router/templates/deployment.yaml b/deploy/helm/semantic-router/templates/deployment.yaml
@@ -35,8 +35,6 @@ spec:
         image: {{ .Values.initContainer.image }}
         securityContext:
           {{- toYaml .Values.securityContext | nindent 10 }}
-        # Allow up to 10 minutes for model downloads in CI environments
-        # This prevents the init container from being killed prematurely
         command: ["/bin/bash", "-c"]
         args:
         - |
@@ -53,7 +51,7 @@ spec:
           # Remove .cache directory to ensure fresh download
           rm -rf "{{ .name }}/.cache" 2>/dev/null || true
           # Download with ignore_patterns to exclude ONNX-only files if pytorch model exists
-          python -c "from huggingface_hub import snapshot_download; snapshot_download(repo_id='{{ .repo }}', local_dir='{{ .name }}', local_dir_use_symlinks=False, ignore_patterns=['*.onnx', '*.msgpack', '*.h5', '*.tflite'] if '{{ .name }}' == 'all-MiniLM-L12-v2' else None)"
+          python -c "from huggingface_hub import snapshot_download; snapshot_download(repo_id='{{ .repo }}', local_dir='{{ .name }}', ignore_patterns=['*.onnx', '*.msgpack', '*.h5', '*.tflite'] if '{{ .name }}' == 'all-MiniLM-L12-v2' else None)"
 
           # Check for required model files
           echo "Checking {{ .name }} for required files:"
diff --git a/deploy/helm/semantic-router/values.yaml b/deploy/helm/semantic-router/values.yaml
@@ -142,6 +142,9 @@ initContainer:
       cpu: "500m"
   # -- Models to download
   models:
+    # Embedding models for semantic cache and tools
+    - name: Qwen3-Embedding-0.6B
+      repo: Qwen/Qwen3-Embedding-0.6B
     - name: all-MiniLM-L12-v2
       repo: sentence-transformers/all-MiniLM-L12-v2
     - name: category_classifier_modernbert-base_model
@@ -152,9 +155,7 @@ initContainer:
       repo: LLM-Semantic-Router/jailbreak_classifier_modernbert-base_model
     - name: pii_classifier_modernbert-base_presidio_token_model
       repo: LLM-Semantic-Router/pii_classifier_modernbert-base_presidio_token_model
-    # Embedding models for semantic cache and tools
-    - name: Qwen3-Embedding-0.6B
-      repo: Qwen/Qwen3-Embedding-0.6B
+
 
 # Autoscaling configuration
 autoscaling: