Skip to content

Commit fc7e0d5

Browse files
committed
[ci] Add integration tests for new large model architectures
1 parent 9da2977 commit fc7e0d5

File tree

5 files changed

+155
-50
lines changed

5 files changed

+155
-50
lines changed

.github/workflows/integration.yml

Lines changed: 67 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -37,14 +37,18 @@ on:
3737
outputs:
3838
failure_cpu:
3939
value: ${{ jobs.test.outputs.failure_cpu || '0' }}
40-
failure_gpu:
41-
value: ${{ jobs.test.outputs.failure_gpu || '0' }}
40+
failure_g6:
41+
value: ${{ jobs.test.outputs.failure_g6 || '0' }}
4242
failure_aarch64:
4343
value: ${{ jobs.test.outputs.failure_aarch64 || '0' }}
4444
failure_lmi:
4545
value: ${{ jobs.test.outputs.failure_lmi || '0' }}
4646
failure_trtllm:
4747
value: ${{ jobs.test.outputs.failure_trtllm || '0' }}
48+
failure_p4d:
49+
value: ${{ jobs.test.outputs.failure_p4d || '0' }}
50+
# failure_p4de:
51+
# value: ${{ jobs.test.outputs.failure_p4de || '0' }}
4852

4953

5054
permissions:
@@ -56,7 +60,7 @@ jobs:
5660
runs-on: [self-hosted, scheduler]
5761
steps:
5862
- name: Create new G6 instance
59-
id: create_gpu
63+
id: create_g6_1
6064
run: |
6165
cd /home/ubuntu/djl_benchmark_script/scripts
6266
token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \
@@ -65,7 +69,7 @@ jobs:
6569
| jq '.token' | tr -d '"' )
6670
./start_instance.sh action_g6 $token djl-serving
6771
- name: Create new G6 instance
68-
id: create_gpu2
72+
id: create_g6_2
6973
run: |
7074
cd /home/ubuntu/djl_benchmark_script/scripts
7175
token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \
@@ -74,7 +78,7 @@ jobs:
7478
| jq '.token' | tr -d '"' )
7579
./start_instance.sh action_g6 $token djl-serving
7680
- name: Create new G6 instance
77-
id: create_gpu3
81+
id: create_g6_3
7882
run: |
7983
cd /home/ubuntu/djl_benchmark_script/scripts
8084
token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \
@@ -83,7 +87,7 @@ jobs:
8387
| jq '.token' | tr -d '"' )
8488
./start_instance.sh action_g6 $token djl-serving
8589
- name: Create new G6 instance
86-
id: create_gpu4
90+
id: create_g6_4
8791
run: |
8892
cd /home/ubuntu/djl_benchmark_script/scripts
8993
token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \
@@ -110,14 +114,33 @@ jobs:
110114
--fail \
111115
| jq '.token' | tr -d '"' )
112116
./start_instance.sh action_cpu $token djl-serving
117+
- name: Create new P4D instance
118+
id: create_p4d_1
119+
run: |
120+
cd /home/ubuntu/djl_benchmark_script/scripts
121+
token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \
122+
https://api.github.com/repos/deepjavalibrary/djl-serving/actions/runners/registration-token \
123+
--fail \
124+
| jq '.token' | tr -d '"' )
125+
./start_instance.sh action_lmic_p4d $token djl-serving
126+
# - name: Create new P4DE instance
127+
# id: create_p4de_1
128+
# run: |
129+
# cd /home/ubuntu/djl_benchmark_script/scripts
130+
# token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \
131+
# https://api.github.com/repos/deepjavalibrary/djl-serving/actions/runners/registration-token \
132+
# --fail \
133+
# | jq '.token' | tr -d '"' )
134+
# ./start_instance.sh action_lmic_p4de $token djl-serving
113135
outputs:
114-
gpu_instance_id_1: ${{ steps.create_gpu.outputs.action_g6_instance_id }}
115-
gpu_instance_id_2: ${{ steps.create_gpu2.outputs.action_g6_instance_id }}
116-
gpu_instance_id_3: ${{ steps.create_gpu3.outputs.action_g6_instance_id }}
117-
gpu_instance_id_4: ${{ steps.create_gpu4.outputs.action_g6_instance_id }}
136+
g6_instance_id_1: ${{ steps.create_g6_1.outputs.action_g6_instance_id }}
137+
g6_instance_id_2: ${{ steps.create_g6_2.outputs.action_g6_instance_id }}
138+
g6_instance_id_3: ${{ steps.create_g6_3.outputs.action_g6_instance_id }}
139+
g6_instance_id_4: ${{ steps.create_g6_4.outputs.action_g6_instance_id }}
118140
aarch64_instance_id: ${{ steps.create_aarch64.outputs.action_graviton_instance_id }}
119-
120141
cpu_instance_id: ${{ steps.create_cpu.outputs.action_cpu_instance_id }}
142+
p4d_instance_id_1: ${{ steps.create_p4d_1.outputs.action_lmic_p4d_instance_id }}
143+
# p4de_instance_id_1: ${{ steps.create_p4de_1.outputs.action_lmic_p4de_instance_id }}
121144

122145
test:
123146
runs-on:
@@ -139,57 +162,67 @@ jobs:
139162
- test: TestCpuBoth
140163
instance: cpu
141164
failure-prefix: cpu
142-
- test: TestGpu
165+
- test: TestGpu_g6
143166
instance: g6
144167
failure-prefix: gpu
145168
- test: TestAarch64
146169
instance: aarch64
147170
failure-prefix: aarch64
148-
# - test: TestHfHandler
171+
# - test: TestHfHandler_g6
149172
# instance: g6
150173
# failure-prefix: lmi
151-
# - test: TestTrtLlmHandler1
174+
# - test: TestTrtLlmHandler1_g6
152175
# instance: g6
153176
# failure-prefix: trtllm
154-
# - test: TestTrtLlmHandler2
177+
# - test: TestTrtLlmHandler2_g6
155178
# instance: g6
156179
# failure-prefix: trtllm
157-
- test: TestVllm1
180+
- test: TestVllm1_g6
158181
instance: g6
159182
failure-prefix: lmi
160-
- test: TestVllm2
183+
- test: TestVllm2_g6
161184
instance: g6
162185
failure-prefix: lmi
163-
- test: TestVllmCustomHandlers
186+
- test: TestVllmCustomHandlers_g6
164187
instance: g6
165188
failure-prefix: lmi
166-
- test: TestVllmCustomFormatters
189+
- test: TestVllmCustomFormatters_g6
167190
instance: g6
168191
failure-prefix: lmi
169-
- test: TestVllmLora
192+
- test: TestVllmLora_g6
170193
instance: g6
171194
failure-prefix: lmi
172-
- test: TestVllmAsyncLora
195+
- test: TestVllmAsyncLora_g6
173196
instance: g6
174197
failure-prefix: lmi
175-
- test: TestMultiModalVllm
198+
- test: TestMultiModalVllm_g6
176199
instance: g6
177200
failure-prefix: lmi
178-
# - test: TestTextEmbedding
201+
# - test: TestTextEmbedding_g6
179202
# instance: g6
180203
# failure-prefix: lmi
181-
# - test: TestCorrectnessTrtLlm
204+
# - test: TestCorrectnessTrtLlm_g6
182205
# instance: g6
183206
# failure-prefix: trtllm
184-
- test: TestStatefulModel
207+
- test: TestStatefulModel_g6
185208
instance: g6
186209
failure-prefix: lmi
210+
# P4D instance tests
211+
- test: TestVllm_p4d
212+
instance: p4d
213+
failure-prefix: lmi
214+
# P4DE instance tests
215+
# - test: TestVllm_p4de
216+
# instance: p4de
217+
# failure-prefix: lmi
187218
outputs:
188219
failure_cpu: ${{ steps.test-failure.outputs.failure_cpu }}
189-
failure_gpu: ${{ steps.test-failure.outputs.failure_gpu }}
220+
failure_g6: ${{ steps.test-failure.outputs.failure_g6 }}
190221
failure_aarch64: ${{ steps.test-failure.outputs.failure_aarch64 }}
191222
failure_lmi: ${{ steps.test-failure.outputs.failure_lmi }}
192223
failure_trtllm: ${{ steps.test-failure.outputs.failure_trtllm }}
224+
failure_p4d: ${{ steps.test-failure.outputs.failure_p4d }}
225+
# failure_p4de: ${{ steps.test-failure.outputs.failure_p4de }}
193226

194227
steps:
195228
- uses: actions/checkout@v4
@@ -269,16 +302,19 @@ jobs:
269302
- name: Stop all instances
270303
run: |
271304
cd /home/ubuntu/djl_benchmark_script/scripts
272-
instance_id=${{ needs.create-runners.outputs.gpu_instance_id_1 }}
305+
instance_id=${{ needs.create-runners.outputs.g6_instance_id_1 }}
273306
./stop_instance.sh $instance_id
274-
instance_id=${{ needs.create-runners.outputs.gpu_instance_id_2 }}
307+
instance_id=${{ needs.create-runners.outputs.g6_instance_id_2 }}
275308
./stop_instance.sh $instance_id
276-
instance_id=${{ needs.create-runners.outputs.gpu_instance_id_3 }}
309+
instance_id=${{ needs.create-runners.outputs.g6_instance_id_3 }}
277310
./stop_instance.sh $instance_id
278-
instance_id=${{ needs.create-runners.outputs.gpu_instance_id_4 }}
311+
instance_id=${{ needs.create-runners.outputs.g6_instance_id_4 }}
279312
./stop_instance.sh $instance_id
280313
instance_id=${{ needs.create-runners.outputs.aarch64_instance_id }}
281314
./stop_instance.sh $instance_id
282-
283315
instance_id=${{ needs.create-runners.outputs.cpu_instance_id }}
284316
./stop_instance.sh $instance_id
317+
instance_id=${{ needs.create-runners.outputs.p4d_instance_id_1 }}
318+
./stop_instance.sh $instance_id
319+
# instance_id=${{ needs.create-runners.outputs.p4de_instance_id_1 }}
320+
# ./stop_instance.sh $instance_id

tests/integration/llm/client.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -229,6 +229,16 @@ def get_model_name():
229229
"seq_length": [25],
230230
"tokenizer": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
231231
},
232+
"llama-4-scout-17b-16e-instruct": {
233+
"batch_size": [1, 2],
234+
"seq_length": [256],
235+
"tokenizer": "unsloth/Llama-4-Scout-17B-16E-Instruct",
236+
},
237+
"minimax-m2": {
238+
"batch_size": [1, 2],
239+
"seq_length": [256],
240+
"tokenizer": "MiniMaxAI/MiniMax-M2",
241+
},
232242
}
233243

234244
vllm_neo_model_spec = {
@@ -522,6 +532,10 @@ def get_model_name():
522532
"llama32-11b-multimodal": {
523533
"batch_size": [1],
524534
},
535+
"qwen3-vl-32b-instruct": {
536+
"batch_size": [1, 2],
537+
"tokenizer": "Qwen/Qwen2-VL-72B-Instruct"
538+
},
525539
}
526540

527541
text_embedding_model_spec = {

tests/integration/llm/prepare.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -442,6 +442,32 @@
442442
"option.max_rolling_batch_size": "1",
443443
"option.enforce_eager": True,
444444
},
445+
"qwen3-vl-32b-instruct": {
446+
"option.model_id": "s3://djl-llm/Qwen3-VL-32B-Instruct/",
447+
"option.task": "text-generation",
448+
"option.tensor_parallel_degree": 8,
449+
"option.max_rolling_batch_size": 4,
450+
"option.trust_remote_code": True,
451+
"option.limit_mm_per_prompt": '{"image": 4, "video": 0}',
452+
},
453+
"minimax-m2": {
454+
"option.model_id": "s3://djl-llm/MiniMax-M2/",
455+
"option.task": "text-generation",
456+
"option.tensor_parallel_degree": 8,
457+
"option.max_rolling_batch_size": 4,
458+
"option.trust_remote_code": True,
459+
"option.max_model_len": 16384,
460+
"option.gpu_memory_utilization": "0.9",
461+
},
462+
"llama-4-scout-17b-16e-instruct": {
463+
"option.model_id": "s3://djl-llm/Llama-4-Scout-17B-16E-Instruct/",
464+
"option.task": "text-generation",
465+
"option.tensor_parallel_degree": 8,
466+
"option.max_rolling_batch_size": 4,
467+
"option.trust_remote_code": True,
468+
"option.max_model_len": 16384,
469+
"option.gpu_memory_utilization": "0.9",
470+
},
445471
}
446472

447473
vllm_neo_model_list = {

tests/integration/pytest.ini

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@
22
log_cli = true
33
log_cli_level = INFO
44
markers =
5-
gpu: Runs on any gpu machine
6-
gpu_4: Runs on a machine with at least 4 gpus (includes gpu mark)
5+
gpu_4: Runs on a machine with 4 gpus
6+
gpu_8: Runs on a machine with 8 gpus
77

88
aarch64: Runs on aarch64
99
cpu: Tests cpu

0 commit comments

Comments
 (0)