Skip to content

Commit b82dea1

Browse files
committed
change image loading strategy & models init
Signed-off-by: JaredforReal <[email protected]>
1 parent ea4d715 commit b82dea1

File tree

1 file changed

+98
-65
lines changed

1 file changed

+98
-65
lines changed

.github/workflows/k8s-integration-test.yml

Lines changed: 98 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ jobs:
8282
name: kind Cluster Integration Test
8383
runs-on: ubuntu-latest
8484
needs: validate-manifests
85-
timeout-minutes: 30
85+
timeout-minutes: 45 # Increased to account for model downloads
8686

8787
steps:
8888
- name: Checkout code
@@ -91,6 +91,14 @@ jobs:
9191
- name: Set up Docker Buildx
9292
uses: docker/setup-buildx-action@v3
9393

94+
- name: Create kind cluster
95+
uses: helm/[email protected]
96+
with:
97+
version: ${{ env.KIND_VERSION }}
98+
config: tools/kind/kind-config.yaml
99+
cluster_name: semantic-router-test
100+
wait: 120s
101+
94102
- name: Build semantic-router image
95103
uses: docker/build-push-action@v5
96104
with:
@@ -101,46 +109,11 @@ jobs:
101109
cache-from: type=gha
102110
cache-to: type=gha,mode=max
103111

104-
- name: Create kind cluster with local registry
105-
uses: helm/[email protected]
106-
with:
107-
version: ${{ env.KIND_VERSION }}
108-
config: tools/kind/kind-config.yaml
109-
cluster_name: semantic-router-test
110-
wait: 120s
111-
112-
- name: Setup local Docker registry
113-
run: |
114-
# Create a local registry container
115-
docker run -d -p 5001:5000 --name kind-registry --network kind registry:2
116-
117-
# Connect the registry to the kind network if not already connected
118-
docker network connect kind kind-registry || true
119-
120-
# Document the local registry
121-
kubectl apply -f - <<EOF
122-
apiVersion: v1
123-
kind: ConfigMap
124-
metadata:
125-
name: local-registry-hosting
126-
namespace: kube-public
127-
data:
128-
localRegistryHosting.v1: |
129-
host: "localhost:5001"
130-
help: "https://kind.sigs.k8s.io/docs/user/local-registry/"
131-
EOF
132-
133-
- name: Tag and push image to local registry
112+
- name: Load image into kind cluster
134113
run: |
135-
# Generate unique tag using PR number or run ID
136-
IMAGE_TAG="test-${{ github.run_id }}-${{ github.run_attempt }}"
137-
echo "IMAGE_TAG=${IMAGE_TAG}" >> $GITHUB_ENV
138-
139-
# Tag and push to local registry
140-
docker tag ghcr.io/vllm-project/semantic-router/extproc:test localhost:5001/semantic-router/extproc:${IMAGE_TAG}
141-
docker push localhost:5001/semantic-router/extproc:${IMAGE_TAG}
142-
143-
echo "Image pushed to local registry with tag: ${IMAGE_TAG}"
114+
echo "Loading image into kind cluster..."
115+
kind load docker-image ghcr.io/vllm-project/semantic-router/extproc:test --name semantic-router-test
116+
echo "Image loaded successfully!"
144117
145118
- name: Verify cluster
146119
run: |
@@ -169,19 +142,20 @@ jobs:
169142
cp ../deployment.yaml ./deployment.yaml
170143
cp ../pvc.yaml ./pvc.yaml
171144
172-
# Fix init container to use 'hf' command (not 'huggingface-cli')
173-
# This matches the working approach in test-and-build.yml
174-
perl -i -pe 's/huggingface-cli download/hf download/g' deployment.yaml
175-
176-
# Update pip install to include hf_transfer for faster downloads
145+
# Optimize init container for CI testing
146+
# 1. Update pip install to include hf_transfer for faster downloads
177147
perl -i -pe 's/pip install --no-cache-dir huggingface_hub\[cli\]/pip install --no-cache-dir "huggingface_hub[cli]" hf_transfer/g' deployment.yaml
178148
179-
# Remove the directory existence checks - CI always starts fresh
180-
perl -i -0pe 's/if \[ ! -d "[^"]*" \]; then\n[^\n]*\n[^\n]*\n[^\n]*else\n[^\n]*\n[^\n]*fi\n\n//g' deployment.yaml
149+
# 2. Enable HF_HUB_ENABLE_HF_TRANSFER for faster downloads
150+
perl -i -pe 's/(env:)/\1\n - name: HF_HUB_ENABLE_HF_TRANSFER\n value: "1"/g' deployment.yaml
151+
152+
# 3. Simplify the download logic - remove directory checks since CI always starts fresh
153+
# Replace the entire args section with a simpler version
154+
perl -i -0pe 's/args:\s*\n\s*-\s*\|\s*\n\s*set -e.*?ls -la \/app\/models\//args:\n - |\n set -e\n echo "Installing Hugging Face CLI..."\n pip install --no-cache-dir "huggingface_hub[cli]" hf_transfer\n \n echo "Downloading models to persistent volume..."\n cd \/app\/models\n \n echo "Downloading category classifier model..."\n hf download LLM-Semantic-Router\/category_classifier_modernbert-base_model --local-dir category_classifier_modernbert-base_model\n \n echo "Downloading PII classifier model..."\n hf download LLM-Semantic-Router\/pii_classifier_modernbert-base_model --local-dir pii_classifier_modernbert-base_model\n \n echo "Downloading jailbreak classifier model..."\n hf download LLM-Semantic-Router\/jailbreak_classifier_modernbert-base_model --local-dir jailbreak_classifier_modernbert-base_model\n \n echo "Downloading PII token classifier model..."\n hf download LLM-Semantic-Router\/pii_classifier_modernbert-base_presidio_token_model --local-dir pii_classifier_modernbert-base_presidio_token_model\n \n echo "All models downloaded successfully!"\n ls -la \/app\/models\//gs' deployment.yaml
181155
182-
echo "✓ Updated init container to use 'hf' command with optimized settings"
156+
echo "✓ Updated init container with optimized model download for CI"
183157
184-
# Create kustomization with local resources (no PVC for CI)
158+
# Create kustomization with local resources
185159
cat > kustomization.yaml << EOF
186160
apiVersion: kustomize.config.k8s.io/v1beta1
187161
kind: Kustomization
@@ -200,13 +174,14 @@ jobs:
200174
201175
namespace: vllm-semantic-router-system
202176
177+
# Use the same image that was loaded into kind cluster
203178
images:
204179
- name: ghcr.io/vllm-project/semantic-router/extproc
205-
newName: localhost:5001/semantic-router/extproc
206-
newTag: ${IMAGE_TAG}
180+
newTag: test
207181
208-
# Reduce resource requirements for CI testing (main container only)
182+
# Reduce resource requirements for CI testing and set imagePullPolicy
209183
patches:
184+
# Patch for main container
210185
- patch: |-
211186
- op: replace
212187
path: /spec/template/spec/containers/0/resources/requests/memory
@@ -220,6 +195,26 @@ jobs:
220195
- op: replace
221196
path: /spec/template/spec/containers/0/resources/limits/cpu
222197
value: "2"
198+
- op: add
199+
path: /spec/template/spec/containers/0/imagePullPolicy
200+
value: "IfNotPresent"
201+
target:
202+
kind: Deployment
203+
name: semantic-router
204+
# Patch for init container - increase resources for faster downloads
205+
- patch: |-
206+
- op: replace
207+
path: /spec/template/spec/initContainers/0/resources/requests/memory
208+
value: "1Gi"
209+
- op: replace
210+
path: /spec/template/spec/initContainers/0/resources/requests/cpu
211+
value: "500m"
212+
- op: replace
213+
path: /spec/template/spec/initContainers/0/resources/limits/memory
214+
value: "2Gi"
215+
- op: replace
216+
path: /spec/template/spec/initContainers/0/resources/limits/cpu
217+
value: "1"
223218
target:
224219
kind: Deployment
225220
name: semantic-router
@@ -230,6 +225,20 @@ jobs:
230225
echo "=== Files in overlay directory ==="
231226
ls -la
232227
228+
- name: Pre-flight check for Hugging Face connectivity
229+
run: |
230+
echo "Testing Hugging Face Hub connectivity..."
231+
curl -I https://huggingface.co || {
232+
echo "⚠️ Warning: Cannot reach huggingface.co"
233+
}
234+
235+
# Test one of the model repos
236+
curl -I https://huggingface.co/LLM-Semantic-Router/category_classifier_modernbert-base_model || {
237+
echo "⚠️ Warning: Cannot reach model repository"
238+
}
239+
240+
echo "✓ Connectivity check completed"
241+
233242
- name: Deploy to kind cluster
234243
run: |
235244
echo "Deploying semantic-router to kind cluster..."
@@ -262,27 +271,55 @@ jobs:
262271
kubectl get pods -n vllm-semantic-router-system
263272
264273
# Wait for init container to complete (model download)
265-
echo "Waiting for init container to complete (downloading models)..."
266-
kubectl wait --for=condition=Initialized pods -l app=semantic-router -n vllm-semantic-router-system --timeout=600s || {
267-
echo "Init container did not complete in time. Showing logs..."
268-
kubectl logs -n vllm-semantic-router-system -l app=semantic-router -c model-downloader --tail=100 || true
274+
# Increased timeout to 15 minutes for model downloads
275+
echo "Waiting for init container to complete (downloading models, this may take 10-15 minutes)..."
276+
kubectl wait --for=condition=Initialized pods -l app=semantic-router -n vllm-semantic-router-system --timeout=900s || {
277+
echo "❌ Init container did not complete in time. Showing logs..."
278+
kubectl logs -n vllm-semantic-router-system -l app=semantic-router -c model-downloader --tail=200 || true
279+
echo ""
280+
echo "Checking pod status..."
281+
kubectl describe pods -n vllm-semantic-router-system -l app=semantic-router
269282
exit 1
270283
}
271284
272-
# Show init container logs to see downloaded file structure
273-
echo "=== Init Container Logs (showing file structure) ==="
285+
# Show init container logs and verify models were downloaded
286+
echo "=== Init Container Logs ==="
274287
kubectl logs -n vllm-semantic-router-system -l app=semantic-router -c model-downloader --tail=100 || true
275288
289+
# Verify models were actually downloaded
290+
echo ""
291+
echo "=== Verifying Model Downloads ==="
292+
POD_NAME=$(kubectl get pods -n vllm-semantic-router-system -l app=semantic-router -o jsonpath='{.items[0].metadata.name}')
293+
294+
# Check if models directory has content
295+
echo "Checking models directory content..."
296+
kubectl exec -n vllm-semantic-router-system $POD_NAME -- ls -la /app/models/ || {
297+
echo "⚠️ Warning: Could not list models directory"
298+
}
299+
300+
# Count model directories (should be 4)
301+
MODEL_COUNT=$(kubectl exec -n vllm-semantic-router-system $POD_NAME -- sh -c 'ls -1 /app/models/ | grep -c "model" || echo 0')
302+
echo "Found $MODEL_COUNT model directories"
303+
304+
if [ "$MODEL_COUNT" -lt 4 ]; then
305+
echo "❌ Error: Expected 4 model directories, found $MODEL_COUNT"
306+
echo "Init container may have failed to download all models"
307+
exit 1
308+
fi
309+
310+
echo "✓ All models verified successfully"
311+
276312
# Wait for main container to be ready
313+
echo ""
277314
echo "Waiting for main container to be ready..."
278315
kubectl wait --for=condition=Ready pods -l app=semantic-router -n vllm-semantic-router-system --timeout=300s || {
279-
echo "Pod did not become ready in time. Showing status and logs..."
316+
echo "Pod did not become ready in time. Showing status and logs..."
280317
kubectl describe pods -n vllm-semantic-router-system -l app=semantic-router
281-
kubectl logs -n vllm-semantic-router-system -l app=semantic-router --tail=100 || true
318+
kubectl logs -n vllm-semantic-router-system -l app=semantic-router --tail=200 || true
282319
exit 1
283320
}
284321
285-
echo "Deployment is ready!"
322+
echo "Deployment is ready!"
286323
287324
- name: Verify deployment
288325
run: |
@@ -406,10 +443,6 @@ jobs:
406443
echo "Cleaning up resources..."
407444
kubectl delete namespace vllm-semantic-router-system --timeout=60s || true
408445
409-
# Stop and remove local registry
410-
docker stop kind-registry || true
411-
docker rm kind-registry || true
412-
413446
test-with-custom-config:
414447
name: Test with Custom Configuration
415448
runs-on: ubuntu-latest

0 commit comments

Comments
 (0)