@@ -101,139 +101,139 @@ jobs:
101
101
~/.kube/config
102
102
src/tests/perftest/logs
103
103
104
- # k8s-discovery-e2e-test:
105
- # runs-on: self-hosted
106
- # needs: e2e-test
107
- # if: github.event.pull_request.draft == false
108
- # steps:
109
- # - name: Check out repository code
110
- # uses: actions/checkout@v4
111
-
112
- # - name: Setup Python
113
- # uses: actions/setup-python@v5
114
- # with:
115
- # python-version: "3.12"
116
-
117
- # - name: Install Python dependencies
118
- # run: |
119
- # python -m pip install --upgrade pip
120
- # pip install -r benchmarks/multi-round-qa/requirements.txt
121
- # pip install -e .
122
-
123
- # - name: Setup minikube environment
124
- # env:
125
- # DOCKER_BUILDKIT: 1
126
- # run: |
127
- # echo "🔧 Setting up minikube environment"
128
- # sudo sysctl fs.protected_regular=0
129
- # # Verify minikube is running
130
- # minikube status
131
- # # Ensure kubectl is configured for minikube
132
- # kubectl config use-context minikube
133
-
134
- # - name: Build and deploy router image
135
- # env:
136
- # DOCKER_BUILDKIT: 1
137
- # run: |
138
- # echo "🔨 Building router docker image"
139
- # cd ${{ github.workspace }}
140
- # eval "$(minikube docker-env)"
141
- # docker build --build-arg INSTALL_OPTIONAL_DEP=default -t git-act-router -f docker/Dockerfile.kvaware .
142
-
143
- # - name: Run all k8s discovery routing tests
144
- # run: |
145
- # echo "🧪 Running all k8s discovery routing tests"
146
- # ./tests/e2e/run-k8s-routing-test.sh all \
147
- # --model "facebook/opt-125m" \
148
- # --num-requests 25 \
149
- # --chunk-size 128 \
150
- # --verbose \
151
- # --result-dir /tmp/k8s-discovery-routing-results-pr-${{ github.event.pull_request.number || 'main' }} \
152
- # --timeout 10
153
- # timeout-minutes: 10
154
-
155
- # - name: Archive k8s discovery routing test results
156
- # uses: actions/upload-artifact@v4
157
- # if: always()
158
- # with:
159
- # name: k8s-discovery-routing-test-results-pr-${{ github.event.pull_request.number || 'main' }}
160
- # path: |
161
- # /tmp/k8s-discovery-routing-results-pr-${{ github.event.pull_request.number || 'main' }}/*
162
-
163
- # - run: echo "🍏 K8s discovery e2e test job status is ${{ job.status }}."
164
-
165
- # static-discovery-e2e-test:
166
- # runs-on: self-hosted
167
- # needs: e2e-test
168
- # if: github.event.pull_request.draft == false
169
- # env:
170
- # LOG_DIR: /tmp/static-discovery-e2e-test-${{ github.event.pull_request.number || 'main' }}
171
-
172
- # steps:
173
- # - name: Check out repository code
174
- # uses: actions/checkout@v4
175
-
176
- # - name: Setup Python
177
- # uses: actions/setup-python@v5
178
- # with:
179
- # python-version: "3.12"
180
-
181
- # - name: Install Python dependencies
182
- # run: |
183
- # python -m pip install --upgrade pip
184
- # pip install -e .
185
-
186
- # - name: Install vLLM and lmcache
187
- # run: |
188
- # pip install vllm
189
- # pip install lmcache
190
-
191
- # - name: Start 2 vLLM serve backends
192
- # run: |
193
- # echo "🚀 Starting vLLM serve backend"
194
- # mkdir -p "$LOG_DIR"
195
- # CUDA_VISIBLE_DEVICES=0 vllm serve facebook/opt-125m --port 8001 --gpu-memory-utilization 0.7 --chat-template .github/template-chatml.jinja > "$LOG_DIR/backend1.log" 2>&1 &
196
- # CUDA_VISIBLE_DEVICES=1 vllm serve facebook/opt-125m --port 8002 --gpu-memory-utilization 0.7 --chat-template .github/template-chatml.jinja > "$LOG_DIR/backend2.log" 2>&1 &
197
-
198
- # - name: Wait for backends to be ready
199
- # run: |
200
- # echo "⏳ Waiting for backends to be ready"
201
- # chmod +x tests/e2e/wait-for-backends.sh
202
- # ./tests/e2e/wait-for-backends.sh 180 "http://localhost:8001" "http://localhost:8002"
203
-
204
- # - name: Run All Static Discovery Routing Tests
205
- # env:
206
- # PYTHONPATH: ${{ github.workspace }}/src
207
- # run: |
208
- # echo "🧪 Running all static discovery routing tests sequentially"
209
- # chmod +x tests/e2e/run-static-discovery-routing-test.sh
210
- # ./tests/e2e/run-static-discovery-routing-test.sh all \
211
- # --pythonpath "$PYTHONPATH" \
212
- # --log-dir "$LOG_DIR" \
213
- # --num-requests 20 \
214
- # --verbose \
215
- # --backends-url "http://localhost:8001,http://localhost:8002"
216
- # timeout-minutes: 5
217
-
218
- # - name: Archive static discovery test results and logs
219
- # uses: actions/upload-artifact@v4
220
- # if: always()
221
- # with:
222
- # name: static-discovery-test-results-pr-${{ github.event.pull_request.number || 'main' }}
223
- # path: |
224
- # ${{ env.LOG_DIR }}/*
225
-
226
- # - name: Cleanup processes
227
- # if: always()
228
- # run: |
229
- # echo "🧹 Cleaning up processes"
230
- # pkill -f "vllm serve" || true
231
- # pkill -f "python3 -m src.vllm_router.app" || true
232
-
233
- # - run: echo "🍏 Static discovery e2e test job status is ${{ job.status }}."
104
+ k8s-discovery-e2e-test :
105
+ runs-on : self-hosted
106
+ needs : e2e-test
107
+ if : github.event.pull_request.draft == false
108
+ steps :
109
+ - name : Check out repository code
110
+ uses : actions/checkout@v4
111
+
112
+ - name : Setup Python
113
+ uses : actions/setup-python@v5
114
+ with :
115
+ python-version : " 3.12"
116
+
117
+ - name : Install Python dependencies
118
+ run : |
119
+ python -m pip install --upgrade pip
120
+ pip install -r benchmarks/multi-round-qa/requirements.txt
121
+ pip install -e .
122
+
123
+ - name : Setup minikube environment
124
+ env :
125
+ DOCKER_BUILDKIT : 1
126
+ run : |
127
+ echo "🔧 Setting up minikube environment"
128
+ sudo sysctl fs.protected_regular=0
129
+ # Verify minikube is running
130
+ minikube status
131
+ # Ensure kubectl is configured for minikube
132
+ kubectl config use-context minikube
133
+
134
+ - name : Build and deploy router image
135
+ env :
136
+ DOCKER_BUILDKIT : 1
137
+ run : |
138
+ echo "🔨 Building router docker image"
139
+ cd ${{ github.workspace }}
140
+ eval "$(minikube docker-env)"
141
+ docker build --build-arg INSTALL_OPTIONAL_DEP=default -t git-act-router -f docker/Dockerfile.kvaware .
142
+
143
+ - name : Run all k8s discovery routing tests
144
+ run : |
145
+ echo "🧪 Running all k8s discovery routing tests"
146
+ ./tests/e2e/run-k8s-routing-test.sh all \
147
+ --model "facebook/opt-125m" \
148
+ --num-requests 25 \
149
+ --chunk-size 128 \
150
+ --verbose \
151
+ --result-dir /tmp/k8s-discovery-routing-results-pr-${{ github.event.pull_request.number || 'main' }} \
152
+ --timeout 10
153
+ timeout-minutes : 10
154
+
155
+ - name : Archive k8s discovery routing test results
156
+ uses : actions/upload-artifact@v4
157
+ if : always()
158
+ with :
159
+ name : k8s-discovery-routing-test-results-pr-${{ github.event.pull_request.number || 'main' }}
160
+ path : |
161
+ /tmp/k8s-discovery-routing-results-pr-${{ github.event.pull_request.number || 'main' }}/*
162
+
163
+ - run : echo "🍏 K8s discovery e2e test job status is ${{ job.status }}."
164
+
165
+ static-discovery-e2e-test :
166
+ runs-on : self-hosted
167
+ needs : e2e-test
168
+ if : github.event.pull_request.draft == false
169
+ env :
170
+ LOG_DIR : /tmp/static-discovery-e2e-test-${{ github.event.pull_request.number || 'main' }}
171
+
172
+ steps :
173
+ - name : Check out repository code
174
+ uses : actions/checkout@v4
175
+
176
+ - name : Setup Python
177
+ uses : actions/setup-python@v5
178
+ with :
179
+ python-version : " 3.12"
180
+
181
+ - name : Install Python dependencies
182
+ run : |
183
+ python -m pip install --upgrade pip
184
+ pip install -e .
185
+
186
+ - name : Install vLLM and lmcache
187
+ run : |
188
+ pip install vllm
189
+ pip install lmcache
190
+
191
+ - name : Start 2 vLLM serve backends
192
+ run : |
193
+ echo "🚀 Starting vLLM serve backend"
194
+ mkdir -p "$LOG_DIR"
195
+ CUDA_VISIBLE_DEVICES=0 vllm serve facebook/opt-125m --port 8001 --gpu-memory-utilization 0.7 --chat-template .github/template-chatml.jinja > "$LOG_DIR/backend1.log" 2>&1 &
196
+ CUDA_VISIBLE_DEVICES=1 vllm serve facebook/opt-125m --port 8002 --gpu-memory-utilization 0.7 --chat-template .github/template-chatml.jinja > "$LOG_DIR/backend2.log" 2>&1 &
197
+
198
+ - name : Wait for backends to be ready
199
+ run : |
200
+ echo "⏳ Waiting for backends to be ready"
201
+ chmod +x tests/e2e/wait-for-backends.sh
202
+ ./tests/e2e/wait-for-backends.sh 180 "http://localhost:8001" "http://localhost:8002"
203
+
204
+ - name : Run All Static Discovery Routing Tests
205
+ env :
206
+ PYTHONPATH : ${{ github.workspace }}/src
207
+ run : |
208
+ echo "🧪 Running all static discovery routing tests sequentially"
209
+ chmod +x tests/e2e/run-static-discovery-routing-test.sh
210
+ ./tests/e2e/run-static-discovery-routing-test.sh all \
211
+ --pythonpath "$PYTHONPATH" \
212
+ --log-dir "$LOG_DIR" \
213
+ --num-requests 20 \
214
+ --verbose \
215
+ --backends-url "http://localhost:8001,http://localhost:8002"
216
+ timeout-minutes : 5
217
+
218
+ - name : Archive static discovery test results and logs
219
+ uses : actions/upload-artifact@v4
220
+ if : always()
221
+ with :
222
+ name : static-discovery-test-results-pr-${{ github.event.pull_request.number || 'main' }}
223
+ path : |
224
+ ${{ env.LOG_DIR }}/*
225
+
226
+ - name : Cleanup processes
227
+ if : always()
228
+ run : |
229
+ echo "🧹 Cleaning up processes"
230
+ pkill -f "vllm serve" || true
231
+ pkill -f "python3 -m src.vllm_router.app" || true
232
+
233
+ - run : echo "🍏 Static discovery e2e test job status is ${{ job.status }}."
234
234
235
235
router-stress-test :
236
- runs-on : ubuntu-latest
236
+ runs-on : self-hosted
237
237
needs : e2e-test
238
238
if : github.event.pull_request.draft == false
239
239
env :
@@ -266,14 +266,14 @@ jobs:
266
266
echo "🧪 Running router stress test with mock backends"
267
267
chmod +x tests/e2e/router-stress-test.sh
268
268
./tests/e2e/router-stress-test.sh \
269
- --concurrent 100 \
270
- --requests 1000 \
269
+ --concurrent 20000 \
270
+ --requests 100000 \
271
271
--port 30080 \
272
272
--log-dir "$LOG_DIR" \
273
273
--model "facebook/opt-125m" \
274
274
--backend1-port 8000 \
275
275
--backend2-port 8001
276
- timeout-minutes : 2
276
+ timeout-minutes : 10
277
277
278
278
- name : Archive router stress test results and logs
279
279
uses : actions/upload-artifact@v4
0 commit comments