8282 name : kind Cluster Integration Test
8383 runs-on : ubuntu-latest
8484 needs : validate-manifests
85- timeout-minutes : 30
85+ timeout-minutes : 45 # Increased to account for model downloads
8686
8787 steps :
8888 - name : Checkout code
9191 - name : Set up Docker Buildx
9292 uses : docker/setup-buildx-action@v3
9393
94+ - name : Create kind cluster
95+ 96+ with :
97+ version : ${{ env.KIND_VERSION }}
98+ config : tools/kind/kind-config.yaml
99+ cluster_name : semantic-router-test
100+ wait : 120s
101+
94102 - name : Build semantic-router image
95103 uses : docker/build-push-action@v5
96104 with :
@@ -101,46 +109,11 @@ jobs:
101109 cache-from : type=gha
102110 cache-to : type=gha,mode=max
103111
104- - name : Create kind cluster with local registry
105- 106- with :
107- version : ${{ env.KIND_VERSION }}
108- config : tools/kind/kind-config.yaml
109- cluster_name : semantic-router-test
110- wait : 120s
111-
112- - name : Setup local Docker registry
113- run : |
114- # Create a local registry container
115- docker run -d -p 5001:5000 --name kind-registry --network kind registry:2
116-
117- # Connect the registry to the kind network if not already connected
118- docker network connect kind kind-registry || true
119-
120- # Document the local registry
121- kubectl apply -f - <<EOF
122- apiVersion: v1
123- kind: ConfigMap
124- metadata:
125- name: local-registry-hosting
126- namespace: kube-public
127- data:
128- localRegistryHosting.v1: |
129- host: "localhost:5001"
130- help: "https://kind.sigs.k8s.io/docs/user/local-registry/"
131- EOF
132-
133- - name : Tag and push image to local registry
112+ - name : Load image into kind cluster
134113 run : |
135- # Generate unique tag using PR number or run ID
136- IMAGE_TAG="test-${{ github.run_id }}-${{ github.run_attempt }}"
137- echo "IMAGE_TAG=${IMAGE_TAG}" >> $GITHUB_ENV
138-
139- # Tag and push to local registry
140- docker tag ghcr.io/vllm-project/semantic-router/extproc:test localhost:5001/semantic-router/extproc:${IMAGE_TAG}
141- docker push localhost:5001/semantic-router/extproc:${IMAGE_TAG}
142-
143- echo "Image pushed to local registry with tag: ${IMAGE_TAG}"
114+ echo "Loading image into kind cluster..."
115+ kind load docker-image ghcr.io/vllm-project/semantic-router/extproc:test --name semantic-router-test
116+ echo "Image loaded successfully!"
144117
145118 - name : Verify cluster
146119 run : |
@@ -169,19 +142,20 @@ jobs:
169142 cp ../deployment.yaml ./deployment.yaml
170143 cp ../pvc.yaml ./pvc.yaml
171144
172- # Fix init container to use 'hf' command (not 'huggingface-cli')
173- # This matches the working approach in test-and-build.yml
174- perl -i -pe 's/huggingface-cli download/hf download/g' deployment.yaml
175-
176- # Update pip install to include hf_transfer for faster downloads
145+ # Optimize init container for CI testing
146+ # 1. Update pip install to include hf_transfer for faster downloads
177147 perl -i -pe 's/pip install --no-cache-dir huggingface_hub\[cli\]/pip install --no-cache-dir "huggingface_hub[cli]" hf_transfer/g' deployment.yaml
178148
179- # Remove the directory existence checks - CI always starts fresh
180- perl -i -0pe 's/if \[ ! -d "[^"]*" \]; then\n[^\n]*\n[^\n]*\n[^\n]*else\n[^\n]*\n[^\n]*fi\n\n//g' deployment.yaml
149+ # 2. Enable HF_HUB_ENABLE_HF_TRANSFER for faster downloads
150+ perl -i -pe 's/(env:)/\1\n - name: HF_HUB_ENABLE_HF_TRANSFER\n value: "1"/g' deployment.yaml
151+
152+ # 3. Simplify the download logic - remove directory checks since CI always starts fresh
153+ # Replace the entire args section with a simpler version
154+ perl -i -0pe 's/args:\s*\n\s*-\s*\|\s*\n\s*set -e.*?ls -la \/app\/models\//args:\n - |\n set -e\n echo "Installing Hugging Face CLI..."\n pip install --no-cache-dir "huggingface_hub[cli]" hf_transfer\n \n echo "Downloading models to persistent volume..."\n cd \/app\/models\n \n echo "Downloading category classifier model..."\n hf download LLM-Semantic-Router\/category_classifier_modernbert-base_model --local-dir category_classifier_modernbert-base_model\n \n echo "Downloading PII classifier model..."\n hf download LLM-Semantic-Router\/pii_classifier_modernbert-base_model --local-dir pii_classifier_modernbert-base_model\n \n echo "Downloading jailbreak classifier model..."\n hf download LLM-Semantic-Router\/jailbreak_classifier_modernbert-base_model --local-dir jailbreak_classifier_modernbert-base_model\n \n echo "Downloading PII token classifier model..."\n hf download LLM-Semantic-Router\/pii_classifier_modernbert-base_presidio_token_model --local-dir pii_classifier_modernbert-base_presidio_token_model\n \n echo "All models downloaded successfully!"\n ls -la \/app\/models\//gs' deployment.yaml
181155
182- echo "✓ Updated init container to use 'hf' command with optimized settings "
156+ echo "✓ Updated init container with optimized model download for CI "
183157
184- # Create kustomization with local resources (no PVC for CI)
158+ # Create kustomization with local resources
185159 cat > kustomization.yaml << EOF
186160 apiVersion: kustomize.config.k8s.io/v1beta1
187161 kind: Kustomization
@@ -200,13 +174,14 @@ jobs:
200174
201175 namespace: vllm-semantic-router-system
202176
177+ # Use the same image that was loaded into kind cluster
203178 images:
204179 - name: ghcr.io/vllm-project/semantic-router/extproc
205- newName: localhost:5001/semantic-router/extproc
206- newTag: ${IMAGE_TAG}
180+ newTag: test
207181
208- # Reduce resource requirements for CI testing (main container only)
182+ # Reduce resource requirements for CI testing and set imagePullPolicy
209183 patches:
184+ # Patch for main container
210185 - patch: |-
211186 - op: replace
212187 path: /spec/template/spec/containers/0/resources/requests/memory
@@ -220,6 +195,26 @@ jobs:
220195 - op: replace
221196 path: /spec/template/spec/containers/0/resources/limits/cpu
222197 value: "2"
198+ - op: add
199+ path: /spec/template/spec/containers/0/imagePullPolicy
200+ value: "IfNotPresent"
201+ target:
202+ kind: Deployment
203+ name: semantic-router
204+ # Patch for init container - increase resources for faster downloads
205+ - patch: |-
206+ - op: replace
207+ path: /spec/template/spec/initContainers/0/resources/requests/memory
208+ value: "1Gi"
209+ - op: replace
210+ path: /spec/template/spec/initContainers/0/resources/requests/cpu
211+ value: "500m"
212+ - op: replace
213+ path: /spec/template/spec/initContainers/0/resources/limits/memory
214+ value: "2Gi"
215+ - op: replace
216+ path: /spec/template/spec/initContainers/0/resources/limits/cpu
217+ value: "1"
223218 target:
224219 kind: Deployment
225220 name: semantic-router
@@ -230,6 +225,20 @@ jobs:
230225 echo "=== Files in overlay directory ==="
231226 ls -la
232227
228+ - name : Pre-flight check for Hugging Face connectivity
229+ run : |
230+ echo "Testing Hugging Face Hub connectivity..."
231+ curl -I https://huggingface.co || {
232+ echo "⚠️ Warning: Cannot reach huggingface.co"
233+ }
234+
235+ # Test one of the model repos
236+ curl -I https://huggingface.co/LLM-Semantic-Router/category_classifier_modernbert-base_model || {
237+ echo "⚠️ Warning: Cannot reach model repository"
238+ }
239+
240+ echo "✓ Connectivity check completed"
241+
233242 - name : Deploy to kind cluster
234243 run : |
235244 echo "Deploying semantic-router to kind cluster..."
@@ -262,27 +271,55 @@ jobs:
262271 kubectl get pods -n vllm-semantic-router-system
263272
264273 # Wait for init container to complete (model download)
265- echo "Waiting for init container to complete (downloading models)..."
266- kubectl wait --for=condition=Initialized pods -l app=semantic-router -n vllm-semantic-router-system --timeout=600s || {
267- echo "Init container did not complete in time. Showing logs..."
268- kubectl logs -n vllm-semantic-router-system -l app=semantic-router -c model-downloader --tail=100 || true
274+ # Increased timeout to 15 minutes for model downloads
275+ echo "Waiting for init container to complete (downloading models, this may take 10-15 minutes)..."
276+ kubectl wait --for=condition=Initialized pods -l app=semantic-router -n vllm-semantic-router-system --timeout=900s || {
277+ echo "❌ Init container did not complete in time. Showing logs..."
278+ kubectl logs -n vllm-semantic-router-system -l app=semantic-router -c model-downloader --tail=200 || true
279+ echo ""
280+ echo "Checking pod status..."
281+ kubectl describe pods -n vllm-semantic-router-system -l app=semantic-router
269282 exit 1
270283 }
271284
272- # Show init container logs to see downloaded file structure
273- echo "=== Init Container Logs (showing file structure) ==="
285+ # Show init container logs and verify models were downloaded
286+ echo "=== Init Container Logs ==="
274287 kubectl logs -n vllm-semantic-router-system -l app=semantic-router -c model-downloader --tail=100 || true
275288
289+ # Verify models were actually downloaded
290+ echo ""
291+ echo "=== Verifying Model Downloads ==="
292+ POD_NAME=$(kubectl get pods -n vllm-semantic-router-system -l app=semantic-router -o jsonpath='{.items[0].metadata.name}')
293+
294+ # Check if models directory has content
295+ echo "Checking models directory content..."
296+ kubectl exec -n vllm-semantic-router-system $POD_NAME -- ls -la /app/models/ || {
297+ echo "⚠️ Warning: Could not list models directory"
298+ }
299+
300+ # Count model directories (should be 4)
301+ MODEL_COUNT=$(kubectl exec -n vllm-semantic-router-system $POD_NAME -- sh -c 'ls -1 /app/models/ | grep -c "model" || echo 0')
302+ echo "Found $MODEL_COUNT model directories"
303+
304+ if [ "$MODEL_COUNT" -lt 4 ]; then
305+ echo "❌ Error: Expected 4 model directories, found $MODEL_COUNT"
306+ echo "Init container may have failed to download all models"
307+ exit 1
308+ fi
309+
310+ echo "✓ All models verified successfully"
311+
276312 # Wait for main container to be ready
313+ echo ""
277314 echo "Waiting for main container to be ready..."
278315 kubectl wait --for=condition=Ready pods -l app=semantic-router -n vllm-semantic-router-system --timeout=300s || {
279- echo "Pod did not become ready in time. Showing status and logs..."
316+ echo "❌ Pod did not become ready in time. Showing status and logs..."
280317 kubectl describe pods -n vllm-semantic-router-system -l app=semantic-router
281- kubectl logs -n vllm-semantic-router-system -l app=semantic-router --tail=100 || true
318+ kubectl logs -n vllm-semantic-router-system -l app=semantic-router --tail=200 || true
282319 exit 1
283320 }
284321
285- echo "Deployment is ready!"
322+ echo "✅ Deployment is ready!"
286323
287324 - name : Verify deployment
288325 run : |
@@ -406,10 +443,6 @@ jobs:
406443 echo "Cleaning up resources..."
407444 kubectl delete namespace vllm-semantic-router-system --timeout=60s || true
408445
409- # Stop and remove local registry
410- docker stop kind-registry || true
411- docker rm kind-registry || true
412-
413446 test-with-custom-config :
414447 name : Test with Custom Configuration
415448 runs-on : ubuntu-latest
0 commit comments