From 2f3294945fde40cdb5416bc8eb00f279ea517207 Mon Sep 17 00:00:00 2001 From: samzong Date: Wed, 3 Dec 2025 00:26:47 +0800 Subject: [PATCH] [Feat] helm: use downloader image and add global.imageRegistry support Signed-off-by: samzong --- deploy/helm/README.md | 2 ++ deploy/helm/semantic-router/README.md | 3 ++- deploy/helm/semantic-router/templates/deployment.yaml | 11 ++++++----- deploy/helm/semantic-router/values.yaml | 8 +++++++- website/docs/installation/k8s/ai-gateway.md | 1 + website/docs/installation/k8s/aibrix.md | 1 + website/docs/installation/k8s/production-stack.md | 1 + 7 files changed, 20 insertions(+), 7 deletions(-) diff --git a/deploy/helm/README.md b/deploy/helm/README.md index 79fa026fe..be026f3df 100644 --- a/deploy/helm/README.md +++ b/deploy/helm/README.md @@ -48,6 +48,8 @@ helm install semantic-router ./deploy/helm/semantic-router \ --create-namespace ``` +> Need a registry mirror/proxy (e.g., in China)? Append `--set global.imageRegistry=` to any Helm install/upgrade command. + ### Verify Installation ```bash diff --git a/deploy/helm/semantic-router/README.md b/deploy/helm/semantic-router/README.md index 0c7dce4cd..134b1fdfb 100644 --- a/deploy/helm/semantic-router/README.md +++ b/deploy/helm/semantic-router/README.md @@ -228,6 +228,7 @@ kubectl apply -f deploy/helm/semantic-router/crds/ | env[0].value | string | `"/app/lib"` | | | fullnameOverride | string | `""` | Override the full name of the chart | | global.namespace | string | `""` | Namespace for all resources (if not specified, uses Release.Namespace) | +| global.imageRegistry | string | `""` | Optional registry prefix applied to all images (e.g., mirror registry in China) | | image.pullPolicy | string | `"IfNotPresent"` | Image pull policy | | image.repository | string | `"ghcr.io/vllm-project/semantic-router/extproc"` | Image repository | | image.tag | string | `"latest"` | Image tag (overrides the image tag whose default is the chart appVersion) | @@ -238,7 +239,7 @@ kubectl apply -f deploy/helm/semantic-router/crds/ | ingress.hosts | list | `[{"host":"semantic-router.local","paths":[{"path":"/","pathType":"Prefix","servicePort":8080}]}]` | Ingress hosts configuration | | ingress.tls | list | `[]` | Ingress TLS configuration | | initContainer.enabled | bool | `true` | Enable init container | -| initContainer.image | string | `"python:3.11-slim"` | Init container image | +| initContainer.image | object | `{ "repository": "ghcr.io/vllm-project/semantic-router/model-downloader", "tag": "" (defaults to chart appVersion), "pullPolicy": "IfNotPresent" }` | Init container image | | initContainer.models | list | `[{"name":"all-MiniLM-L12-v2","repo":"sentence-transformers/all-MiniLM-L12-v2"},{"name":"category_classifier_modernbert-base_model","repo":"LLM-Semantic-Router/category_classifier_modernbert-base_model"},{"name":"pii_classifier_modernbert-base_model","repo":"LLM-Semantic-Router/pii_classifier_modernbert-base_model"},{"name":"jailbreak_classifier_modernbert-base_model","repo":"LLM-Semantic-Router/jailbreak_classifier_modernbert-base_model"},{"name":"pii_classifier_modernbert-base_presidio_token_model","repo":"LLM-Semantic-Router/pii_classifier_modernbert-base_presidio_token_model"}]` | Models to download | | initContainer.resources | object | `{"limits":{"cpu":"1000m","memory":"2Gi"},"requests":{"cpu":"500m","memory":"1Gi"}}` | Resource limits for init container | | livenessProbe.enabled | bool | `true` | Enable liveness probe | diff --git a/deploy/helm/semantic-router/templates/deployment.yaml b/deploy/helm/semantic-router/templates/deployment.yaml index 853876534..6c5c6b46c 100644 --- a/deploy/helm/semantic-router/templates/deployment.yaml +++ b/deploy/helm/semantic-router/templates/deployment.yaml @@ -26,22 +26,23 @@ spec: imagePullSecrets: {{- toYaml . | nindent 8 }} {{- end }} + {{- $registry := trimSuffix "/" (default "" .Values.global.imageRegistry) }} + {{- $prefix := ternary "" (printf "%s/" $registry) (eq $registry "") }} serviceAccountName: {{ include "semantic-router.serviceAccountName" . }} securityContext: {{- toYaml .Values.podSecurityContext | nindent 8 }} {{- if .Values.initContainer.enabled }} initContainers: - name: model-downloader - image: {{ .Values.initContainer.image }} + {{- $initImage := .Values.initContainer.image }} + image: "{{ $prefix }}{{ $initImage.repository }}:{{ $initImage.tag | default .Chart.AppVersion }}" + imagePullPolicy: {{ $initImage.pullPolicy | default "IfNotPresent" }} securityContext: {{- toYaml .Values.securityContext | nindent 10 }} command: ["/bin/bash", "-c"] args: - | set -e - echo "Installing Hugging Face Hub..." - pip install -U --no-cache-dir "huggingface_hub>=0.19.0" - echo "Downloading models to persistent volume..." cd /app/models @@ -79,7 +80,7 @@ spec: {{- end }} containers: - name: {{ .Chart.Name }} - image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + image: "{{ $prefix }}{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" imagePullPolicy: {{ .Values.image.pullPolicy }} {{- with .Values.args }} args: diff --git a/deploy/helm/semantic-router/values.yaml b/deploy/helm/semantic-router/values.yaml index 1ca81118b..08e73543d 100644 --- a/deploy/helm/semantic-router/values.yaml +++ b/deploy/helm/semantic-router/values.yaml @@ -6,6 +6,8 @@ global: # -- Namespace for all resources (if not specified, uses Release.Namespace) namespace: "" + # -- Optional registry prefix applied to all images (e.g., mirror in China such as registry.cn-hangzhou.aliyuncs.com) + imageRegistry: "" # -- Number of replicas for the deployment replicaCount: 1 @@ -131,7 +133,11 @@ initContainer: # -- Enable init container enabled: true # -- Init container image - image: python:3.11-slim + image: + repository: ghcr.io/vllm-project/semantic-router/model-downloader + # Leave empty to default to the chart AppVersion; override with a pinned tag if desired + tag: "" + pullPolicy: IfNotPresent # -- Resource limits for init container resources: limits: diff --git a/website/docs/installation/k8s/ai-gateway.md b/website/docs/installation/k8s/ai-gateway.md index b0883cca5..e3fcd67cb 100644 --- a/website/docs/installation/k8s/ai-gateway.md +++ b/website/docs/installation/k8s/ai-gateway.md @@ -103,6 +103,7 @@ Deploy the semantic router service with all required components using Helm: ```bash # Install with custom values from GHCR OCI registry +# (Optional) If you use a registry mirror/proxy, append: --set global.imageRegistry= helm install semantic-router oci://ghcr.io/vllm-project/charts/semantic-router \ --version v0.0.0-latest \ --namespace vllm-semantic-router-system \ diff --git a/website/docs/installation/k8s/aibrix.md b/website/docs/installation/k8s/aibrix.md index 11e0e728f..c940fd23d 100644 --- a/website/docs/installation/k8s/aibrix.md +++ b/website/docs/installation/k8s/aibrix.md @@ -56,6 +56,7 @@ Deploy the semantic router service with all required components using Helm: ```bash # Install with custom values from GHCR OCI registry +# (Optional) If you use a registry mirror/proxy, append: --set global.imageRegistry= helm install semantic-router oci://ghcr.io/vllm-project/charts/semantic-router \ --version v0.0.0-latest \ --namespace vllm-semantic-router-system \ diff --git a/website/docs/installation/k8s/production-stack.md b/website/docs/installation/k8s/production-stack.md index 180efcf6b..2f42c78d6 100644 --- a/website/docs/installation/k8s/production-stack.md +++ b/website/docs/installation/k8s/production-stack.md @@ -99,6 +99,7 @@ Deploy using Helm with custom values: ```bash # Deploy vLLM Semantic Router with custom values from GHCR OCI registry + # (Optional) If you use a registry mirror/proxy, append: --set global.imageRegistry= helm install semantic-router oci://ghcr.io/vllm-project/charts/semantic-router \ --version v0.0.0-latest \ --namespace vllm-semantic-router-system \