@@ -101,139 +101,139 @@ jobs:
101
101
~/.kube/config
102
102
src/tests/perftest/logs
103
103
104
- k8s-discovery-e2e-test :
105
- runs-on : self-hosted
106
- needs : e2e-test
107
- if : github.event.pull_request.draft == false
108
- steps :
109
- - name : Check out repository code
110
- uses : actions/checkout@v4
111
-
112
- - name : Setup Python
113
- uses : actions/setup-python@v5
114
- with :
115
- python-version : " 3.12"
116
-
117
- - name : Install Python dependencies
118
- run : |
119
- python -m pip install --upgrade pip
120
- pip install -r benchmarks/multi-round-qa/requirements.txt
121
- pip install -e .
122
-
123
- - name : Setup minikube environment
124
- env :
125
- DOCKER_BUILDKIT : 1
126
- run : |
127
- echo "🔧 Setting up minikube environment"
128
- sudo sysctl fs.protected_regular=0
129
- # Verify minikube is running
130
- minikube status
131
- # Ensure kubectl is configured for minikube
132
- kubectl config use-context minikube
133
-
134
- - name : Build and deploy router image
135
- env :
136
- DOCKER_BUILDKIT : 1
137
- run : |
138
- echo "🔨 Building router docker image"
139
- cd ${{ github.workspace }}
140
- eval "$(minikube docker-env)"
141
- docker build --build-arg INSTALL_OPTIONAL_DEP=default -t git-act-router -f docker/Dockerfile.kvaware .
142
-
143
- - name : Run all k8s discovery routing tests
144
- run : |
145
- echo "🧪 Running all k8s discovery routing tests"
146
- ./tests/e2e/run-k8s-routing-test.sh all \
147
- --model "facebook/opt-125m" \
148
- --num-requests 25 \
149
- --chunk-size 128 \
150
- --verbose \
151
- --result-dir /tmp/k8s-discovery-routing-results-pr-${{ github.event.pull_request.number || 'main' }} \
152
- --timeout 10
153
- timeout-minutes : 10
154
-
155
- - name : Archive k8s discovery routing test results
156
- uses : actions/upload-artifact@v4
157
- if : always()
158
- with :
159
- name : k8s-discovery-routing-test-results-pr-${{ github.event.pull_request.number || 'main' }}
160
- path : |
161
- /tmp/k8s-discovery-routing-results-pr-${{ github.event.pull_request.number || 'main' }}/*
162
-
163
- - run : echo "🍏 K8s discovery e2e test job status is ${{ job.status }}."
164
-
165
- static-discovery-e2e-test :
166
- runs-on : self-hosted
167
- needs : e2e-test
168
- if : github.event.pull_request.draft == false
169
- env :
170
- LOG_DIR : /tmp/static-discovery-e2e-test-${{ github.event.pull_request.number || 'main' }}
171
-
172
- steps :
173
- - name : Check out repository code
174
- uses : actions/checkout@v4
175
-
176
- - name : Setup Python
177
- uses : actions/setup-python@v5
178
- with :
179
- python-version : " 3.12"
180
-
181
- - name : Install Python dependencies
182
- run : |
183
- python -m pip install --upgrade pip
184
- pip install -e .
185
-
186
- - name : Install vLLM and lmcache
187
- run : |
188
- pip install vllm
189
- pip install lmcache
190
-
191
- - name : Start 2 vLLM serve backends
192
- run : |
193
- echo "🚀 Starting vLLM serve backend"
194
- mkdir -p "$LOG_DIR"
195
- CUDA_VISIBLE_DEVICES=0 vllm serve facebook/opt-125m --port 8001 --gpu-memory-utilization 0.7 --chat-template .github/template-chatml.jinja > "$LOG_DIR/backend1.log" 2>&1 &
196
- CUDA_VISIBLE_DEVICES=1 vllm serve facebook/opt-125m --port 8002 --gpu-memory-utilization 0.7 --chat-template .github/template-chatml.jinja > "$LOG_DIR/backend2.log" 2>&1 &
197
-
198
- - name : Wait for backends to be ready
199
- run : |
200
- echo "⏳ Waiting for backends to be ready"
201
- chmod +x tests/e2e/wait-for-backends.sh
202
- ./tests/e2e/wait-for-backends.sh 180 "http://localhost:8001" "http://localhost:8002"
203
-
204
- - name : Run All Static Discovery Routing Tests
205
- env :
206
- PYTHONPATH : ${{ github.workspace }}/src
207
- run : |
208
- echo "🧪 Running all static discovery routing tests sequentially"
209
- chmod +x tests/e2e/run-static-discovery-routing-test.sh
210
- ./tests/e2e/run-static-discovery-routing-test.sh all \
211
- --pythonpath "$PYTHONPATH" \
212
- --log-dir "$LOG_DIR" \
213
- --num-requests 20 \
214
- --verbose \
215
- --backends-url "http://localhost:8001,http://localhost:8002"
216
- timeout-minutes : 5
217
-
218
- - name : Archive static discovery test results and logs
219
- uses : actions/upload-artifact@v4
220
- if : always()
221
- with :
222
- name : static-discovery-test-results-pr-${{ github.event.pull_request.number || 'main' }}
223
- path : |
224
- ${{ env.LOG_DIR }}/*
225
-
226
- - name : Cleanup processes
227
- if : always()
228
- run : |
229
- echo "🧹 Cleaning up processes"
230
- pkill -f "vllm serve" || true
231
- pkill -f "python3 -m src.vllm_router.app" || true
232
-
233
- - run : echo "🍏 Static discovery e2e test job status is ${{ job.status }}."
104
+ # k8s-discovery-e2e-test:
105
+ # runs-on: self-hosted
106
+ # needs: e2e-test
107
+ # if: github.event.pull_request.draft == false
108
+ # steps:
109
+ # - name: Check out repository code
110
+ # uses: actions/checkout@v4
111
+
112
+ # - name: Setup Python
113
+ # uses: actions/setup-python@v5
114
+ # with:
115
+ # python-version: "3.12"
116
+
117
+ # - name: Install Python dependencies
118
+ # run: |
119
+ # python -m pip install --upgrade pip
120
+ # pip install -r benchmarks/multi-round-qa/requirements.txt
121
+ # pip install -e .
122
+
123
+ # - name: Setup minikube environment
124
+ # env:
125
+ # DOCKER_BUILDKIT: 1
126
+ # run: |
127
+ # echo "🔧 Setting up minikube environment"
128
+ # sudo sysctl fs.protected_regular=0
129
+ # # Verify minikube is running
130
+ # minikube status
131
+ # # Ensure kubectl is configured for minikube
132
+ # kubectl config use-context minikube
133
+
134
+ # - name: Build and deploy router image
135
+ # env:
136
+ # DOCKER_BUILDKIT: 1
137
+ # run: |
138
+ # echo "🔨 Building router docker image"
139
+ # cd ${{ github.workspace }}
140
+ # eval "$(minikube docker-env)"
141
+ # docker build --build-arg INSTALL_OPTIONAL_DEP=default -t git-act-router -f docker/Dockerfile.kvaware .
142
+
143
+ # - name: Run all k8s discovery routing tests
144
+ # run: |
145
+ # echo "🧪 Running all k8s discovery routing tests"
146
+ # ./tests/e2e/run-k8s-routing-test.sh all \
147
+ # --model "facebook/opt-125m" \
148
+ # --num-requests 25 \
149
+ # --chunk-size 128 \
150
+ # --verbose \
151
+ # --result-dir /tmp/k8s-discovery-routing-results-pr-${{ github.event.pull_request.number || 'main' }} \
152
+ # --timeout 10
153
+ # timeout-minutes: 10
154
+
155
+ # - name: Archive k8s discovery routing test results
156
+ # uses: actions/upload-artifact@v4
157
+ # if: always()
158
+ # with:
159
+ # name: k8s-discovery-routing-test-results-pr-${{ github.event.pull_request.number || 'main' }}
160
+ # path: |
161
+ # /tmp/k8s-discovery-routing-results-pr-${{ github.event.pull_request.number || 'main' }}/*
162
+
163
+ # - run: echo "🍏 K8s discovery e2e test job status is ${{ job.status }}."
164
+
165
+ # static-discovery-e2e-test:
166
+ # runs-on: self-hosted
167
+ # needs: e2e-test
168
+ # if: github.event.pull_request.draft == false
169
+ # env:
170
+ # LOG_DIR: /tmp/static-discovery-e2e-test-${{ github.event.pull_request.number || 'main' }}
171
+
172
+ # steps:
173
+ # - name: Check out repository code
174
+ # uses: actions/checkout@v4
175
+
176
+ # - name: Setup Python
177
+ # uses: actions/setup-python@v5
178
+ # with:
179
+ # python-version: "3.12"
180
+
181
+ # - name: Install Python dependencies
182
+ # run: |
183
+ # python -m pip install --upgrade pip
184
+ # pip install -e .
185
+
186
+ # - name: Install vLLM and lmcache
187
+ # run: |
188
+ # pip install vllm
189
+ # pip install lmcache
190
+
191
+ # - name: Start 2 vLLM serve backends
192
+ # run: |
193
+ # echo "🚀 Starting vLLM serve backend"
194
+ # mkdir -p "$LOG_DIR"
195
+ # CUDA_VISIBLE_DEVICES=0 vllm serve facebook/opt-125m --port 8001 --gpu-memory-utilization 0.7 --chat-template .github/template-chatml.jinja > "$LOG_DIR/backend1.log" 2>&1 &
196
+ # CUDA_VISIBLE_DEVICES=1 vllm serve facebook/opt-125m --port 8002 --gpu-memory-utilization 0.7 --chat-template .github/template-chatml.jinja > "$LOG_DIR/backend2.log" 2>&1 &
197
+
198
+ # - name: Wait for backends to be ready
199
+ # run: |
200
+ # echo "⏳ Waiting for backends to be ready"
201
+ # chmod +x tests/e2e/wait-for-backends.sh
202
+ # ./tests/e2e/wait-for-backends.sh 180 "http://localhost:8001" "http://localhost:8002"
203
+
204
+ # - name: Run All Static Discovery Routing Tests
205
+ # env:
206
+ # PYTHONPATH: ${{ github.workspace }}/src
207
+ # run: |
208
+ # echo "🧪 Running all static discovery routing tests sequentially"
209
+ # chmod +x tests/e2e/run-static-discovery-routing-test.sh
210
+ # ./tests/e2e/run-static-discovery-routing-test.sh all \
211
+ # --pythonpath "$PYTHONPATH" \
212
+ # --log-dir "$LOG_DIR" \
213
+ # --num-requests 20 \
214
+ # --verbose \
215
+ # --backends-url "http://localhost:8001,http://localhost:8002"
216
+ # timeout-minutes: 5
217
+
218
+ # - name: Archive static discovery test results and logs
219
+ # uses: actions/upload-artifact@v4
220
+ # if: always()
221
+ # with:
222
+ # name: static-discovery-test-results-pr-${{ github.event.pull_request.number || 'main' }}
223
+ # path: |
224
+ # ${{ env.LOG_DIR }}/*
225
+
226
+ # - name: Cleanup processes
227
+ # if: always()
228
+ # run: |
229
+ # echo "🧹 Cleaning up processes"
230
+ # pkill -f "vllm serve" || true
231
+ # pkill -f "python3 -m src.vllm_router.app" || true
232
+
233
+ # - run: echo "🍏 Static discovery e2e test job status is ${{ job.status }}."
234
234
235
235
stress-test :
236
- runs-on : self-hosted
236
+ runs-on : ubuntu-latest
237
237
needs : e2e-test
238
238
if : github.event.pull_request.draft == false
239
239
env :
0 commit comments