Skip to content

Commit fded467

Browse files
committed
use bigger memory and delete network config
Signed-off-by: JaredforReal <[email protected]>
1 parent 8703149 commit fded467

File tree

4 files changed

+17
-30
lines changed

4 files changed

+17
-30
lines changed

.github/workflows/k8s-api-functionality-test.yml

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -112,16 +112,16 @@ jobs:
112112
- patch: |-
113113
- op: replace
114114
path: /spec/template/spec/containers/0/resources/requests/memory
115-
value: "512Mi"
115+
value: "2Gi"
116116
- op: replace
117117
path: /spec/template/spec/containers/0/resources/requests/cpu
118-
value: "250m"
118+
value: "500m"
119119
- op: replace
120120
path: /spec/template/spec/containers/0/resources/limits/memory
121-
value: "1Gi"
121+
value: "4Gi"
122122
- op: replace
123123
path: /spec/template/spec/containers/0/resources/limits/cpu
124-
value: "500m"
124+
value: "1"
125125
- op: add
126126
path: /spec/template/spec/containers/0/imagePullPolicy
127127
value: "IfNotPresent"
@@ -141,7 +141,7 @@ jobs:
141141
kubectl wait --for=jsonpath='{.status.phase}'=Active namespace/vllm-semantic-router-system --timeout=60s
142142
kubectl wait --for=jsonpath='{.status.phase}'=Bound pvc/semantic-router-models -n vllm-semantic-router-system --timeout=120s
143143
kubectl wait --for=condition=Initialized pods -l app=semantic-router -n vllm-semantic-router-system --timeout=600s
144-
kubectl wait --for=condition=Ready pods -l app=semantic-router -n vllm-semantic-router-system --timeout=300s
144+
kubectl wait --for=condition=Ready pods -l app=semantic-router -n vllm-semantic-router-system --timeout=600s
145145
146146
- name: Run comprehensive API tests
147147
run: |
@@ -150,9 +150,9 @@ jobs:
150150
POD_NAME=$(kubectl get pods -n vllm-semantic-router-system -l app=semantic-router -o jsonpath='{.items[0].metadata.name}')
151151
echo "Pod name: $POD_NAME"
152152
153-
# Wait for API to be ready
153+
# Wait for API to be ready (increased timeout for model loading)
154154
echo "Waiting for API to be ready..."
155-
timeout 120 bash -c 'until kubectl exec -n vllm-semantic-router-system $POD_NAME -- curl -s http://localhost:8080/health > /dev/null 2>&1; do echo "Waiting for API..."; sleep 10; done'
155+
timeout 300 bash -c 'until kubectl exec -n vllm-semantic-router-system $POD_NAME -- curl -s http://localhost:8080/health > /dev/null 2>&1; do echo "Waiting for API..."; sleep 10; done'
156156
157157
# Test results tracking
158158
TESTS_PASSED=0

.github/workflows/k8s-integration-test.yml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -44,37 +44,37 @@ jobs:
4444
# Step 1: Validate Kubernetes manifests
4545
validate-manifests:
4646
uses: ./.github/workflows/k8s-validate-manifests.yml
47-
with:
47+
with:
4848
kustomize_version: v5.7.1
4949

5050
# Step 2: Run kind cluster integration test
5151
kind-integration-test:
5252
uses: ./.github/workflows/k8s-kind-integration-test.yml
5353
needs: validate-manifests
54-
with:
54+
with:
5555
kind_version: v0.20.0
5656
kustomize_version: v5.7.1
5757

5858
# Step 3: Run comprehensive API functionality tests
5959
test-api-functionality:
6060
uses: ./.github/workflows/k8s-api-functionality-test.yml
6161
needs: kind-integration-test
62-
with:
62+
with:
6363
kind_version: v0.20.0
6464
kustomize_version: v5.7.1
6565

6666
# Step 4: Test with custom configurations
6767
test-with-custom-config:
6868
uses: ./.github/workflows/k8s-config-test.yml
6969
needs: validate-manifests
70-
with:
70+
with:
7171
kustomize_version: v5.7.1
7272

7373
# Step 5: Run security scans
7474
security-scan:
7575
uses: ./.github/workflows/k8s-security-scan.yml
7676
needs: validate-manifests
77-
with:
77+
with:
7878
kustomize_version: v5.7.1
7979

8080
# Step 6: Generate test summary

.github/workflows/k8s-kind-integration-test.yml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -193,18 +193,18 @@ jobs:
193193
- name: ghcr.io/vllm-project/semantic-router/extproc
194194
newTag: test
195195
196-
# Patch for CI - reduce resources and set imagePullPolicy
196+
# Patch for CI - adjust resources for model loading and set imagePullPolicy
197197
patches:
198198
- patch: |-
199199
- op: replace
200200
path: /spec/template/spec/containers/0/resources/requests/memory
201-
value: "1Gi"
201+
value: "2Gi"
202202
- op: replace
203203
path: /spec/template/spec/containers/0/resources/requests/cpu
204204
value: "500m"
205205
- op: replace
206206
path: /spec/template/spec/containers/0/resources/limits/memory
207-
value: "2Gi"
207+
value: "4Gi"
208208
- op: replace
209209
path: /spec/template/spec/containers/0/resources/limits/cpu
210210
value: "1"
@@ -288,9 +288,9 @@ jobs:
288288
exit 1
289289
}
290290
291-
# Wait for main container to be ready
291+
# Wait for main container to be ready (increased timeout for model loading)
292292
echo "Waiting for main container to be ready..."
293-
kubectl wait --for=condition=Ready pods -l app=semantic-router -n vllm-semantic-router-system --timeout=300s || {
293+
kubectl wait --for=condition=Ready pods -l app=semantic-router -n vllm-semantic-router-system --timeout=600s || {
294294
echo "❌ Pod did not become ready in time. Showing status and logs..."
295295
kubectl describe pods -n vllm-semantic-router-system -l app=semantic-router
296296
kubectl logs -n vllm-semantic-router-system -l app=semantic-router --tail=200 || true

deploy/kubernetes/deployment.yaml

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -97,19 +97,6 @@ spec:
9797
env:
9898
- name: HF_HUB_CACHE
9999
value: /tmp/hf_cache
100-
# China Mirror
101-
- name: HUGGINGFACE_HUB_CACHE
102-
value: /tmp/hf_cache
103-
- name: HUGGINGFACE_HUB_ENABLE_HF_TRANSFER
104-
value: "1"
105-
- name: HUGGINGFACE_HUB_DOWNLOAD_TIMEOUT
106-
value: "300"
107-
- name: HUGGINGFACE_HUB_PROXY_URL
108-
value: "https://hf-mirror.com"
109-
- name: PIP_INDEX_URL
110-
value: https://pypi.tuna.tsinghua.edu.cn/simple
111-
- name: NO_PROXY
112-
value: localhost,127.0.0.1,10.0.0.0/8,172.16.0.0/12,192.168.0.0/16,.svc,.svc.cluster.local
113100
# Reduced resource requirements for init container
114101
resources:
115102
requests:

0 commit comments

Comments
 (0)