Skip to content

Commit 704948d

Browse files
committed
[ci] Add integration tests for new large model architectures
1 parent 9da2977 commit 704948d

File tree

5 files changed

+146
-45
lines changed

5 files changed

+146
-45
lines changed

.github/workflows/integration.yml

Lines changed: 58 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ jobs:
5656
runs-on: [self-hosted, scheduler]
5757
steps:
5858
- name: Create new G6 instance
59-
id: create_gpu
59+
id: create_g6_1
6060
run: |
6161
cd /home/ubuntu/djl_benchmark_script/scripts
6262
token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \
@@ -65,7 +65,7 @@ jobs:
6565
| jq '.token' | tr -d '"' )
6666
./start_instance.sh action_g6 $token djl-serving
6767
- name: Create new G6 instance
68-
id: create_gpu2
68+
id: create_g6_2
6969
run: |
7070
cd /home/ubuntu/djl_benchmark_script/scripts
7171
token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \
@@ -74,7 +74,7 @@ jobs:
7474
| jq '.token' | tr -d '"' )
7575
./start_instance.sh action_g6 $token djl-serving
7676
- name: Create new G6 instance
77-
id: create_gpu3
77+
id: create_g6_3
7878
run: |
7979
cd /home/ubuntu/djl_benchmark_script/scripts
8080
token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \
@@ -83,7 +83,7 @@ jobs:
8383
| jq '.token' | tr -d '"' )
8484
./start_instance.sh action_g6 $token djl-serving
8585
- name: Create new G6 instance
86-
id: create_gpu4
86+
id: create_g6_4
8787
run: |
8888
cd /home/ubuntu/djl_benchmark_script/scripts
8989
token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \
@@ -110,14 +110,33 @@ jobs:
110110
--fail \
111111
| jq '.token' | tr -d '"' )
112112
./start_instance.sh action_cpu $token djl-serving
113+
- name: Create new P4D instance
114+
id: create_p4d_1
115+
run: |
116+
cd /home/ubuntu/djl_benchmark_script/scripts
117+
token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \
118+
https://api.github.com/repos/deepjavalibrary/djl-serving/actions/runners/registration-token \
119+
--fail \
120+
| jq '.token' | tr -d '"' )
121+
./start_instance.sh action_lmic_p4d $token djl-serving
122+
# - name: Create new P4DE instance
123+
# id: create_p4de_1
124+
# run: |
125+
# cd /home/ubuntu/djl_benchmark_script/scripts
126+
# token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \
127+
# https://api.github.com/repos/deepjavalibrary/djl-serving/actions/runners/registration-token \
128+
# --fail \
129+
# | jq '.token' | tr -d '"' )
130+
# ./start_instance.sh action_lmic_p4de $token djl-serving
113131
outputs:
114-
gpu_instance_id_1: ${{ steps.create_gpu.outputs.action_g6_instance_id }}
115-
gpu_instance_id_2: ${{ steps.create_gpu2.outputs.action_g6_instance_id }}
116-
gpu_instance_id_3: ${{ steps.create_gpu3.outputs.action_g6_instance_id }}
117-
gpu_instance_id_4: ${{ steps.create_gpu4.outputs.action_g6_instance_id }}
132+
g6_instance_id_1: ${{ steps.create_g6_1.outputs.action_g6_instance_id }}
133+
g6_instance_id_2: ${{ steps.create_g6_2.outputs.action_g6_instance_id }}
134+
g6_instance_id_3: ${{ steps.create_g6_3.outputs.action_g6_instance_id }}
135+
g6_instance_id_4: ${{ steps.create_g6_4.outputs.action_g6_instance_id }}
118136
aarch64_instance_id: ${{ steps.create_aarch64.outputs.action_graviton_instance_id }}
119-
120137
cpu_instance_id: ${{ steps.create_cpu.outputs.action_cpu_instance_id }}
138+
p4d_instance_id_1: ${{ steps.create_p4d_1.outputs.action_lmic_p4d_instance_id }}
139+
# p4de_instance_id_1: ${{ steps.create_p4de_1.outputs.action_lmic_p4de_instance_id }}
121140

122141
test:
123142
runs-on:
@@ -139,51 +158,59 @@ jobs:
139158
- test: TestCpuBoth
140159
instance: cpu
141160
failure-prefix: cpu
142-
- test: TestGpu
161+
- test: TestGpu_g6
143162
instance: g6
144163
failure-prefix: gpu
145164
- test: TestAarch64
146165
instance: aarch64
147166
failure-prefix: aarch64
148-
# - test: TestHfHandler
167+
# - test: TestHfHandler_g6
149168
# instance: g6
150169
# failure-prefix: lmi
151-
# - test: TestTrtLlmHandler1
170+
# - test: TestTrtLlmHandler1_g6
152171
# instance: g6
153172
# failure-prefix: trtllm
154-
# - test: TestTrtLlmHandler2
173+
# - test: TestTrtLlmHandler2_g6
155174
# instance: g6
156175
# failure-prefix: trtllm
157-
- test: TestVllm1
176+
- test: TestVllm1_g6
158177
instance: g6
159178
failure-prefix: lmi
160-
- test: TestVllm2
179+
- test: TestVllm2_g6
161180
instance: g6
162181
failure-prefix: lmi
163-
- test: TestVllmCustomHandlers
182+
- test: TestVllmCustomHandlers_g6
164183
instance: g6
165184
failure-prefix: lmi
166-
- test: TestVllmCustomFormatters
185+
- test: TestVllmCustomFormatters_g6
167186
instance: g6
168187
failure-prefix: lmi
169-
- test: TestVllmLora
188+
- test: TestVllmLora_g6
170189
instance: g6
171190
failure-prefix: lmi
172-
- test: TestVllmAsyncLora
191+
- test: TestVllmAsyncLora_g6
173192
instance: g6
174193
failure-prefix: lmi
175-
- test: TestMultiModalVllm
194+
- test: TestMultiModalVllm_g6
176195
instance: g6
177196
failure-prefix: lmi
178-
# - test: TestTextEmbedding
197+
# - test: TestTextEmbedding_g6
179198
# instance: g6
180199
# failure-prefix: lmi
181-
# - test: TestCorrectnessTrtLlm
200+
# - test: TestCorrectnessTrtLlm_g6
182201
# instance: g6
183202
# failure-prefix: trtllm
184-
- test: TestStatefulModel
203+
- test: TestStatefulModel_g6
185204
instance: g6
186205
failure-prefix: lmi
206+
# P4D instance tests
207+
- test: TestVllm_p4d
208+
instance: p4d
209+
failure-prefix: lmi
210+
# P4DE instance tests
211+
# - test: TestVllm_p4de
212+
# instance: p4de
213+
# failure-prefix: lmi
187214
outputs:
188215
failure_cpu: ${{ steps.test-failure.outputs.failure_cpu }}
189216
failure_gpu: ${{ steps.test-failure.outputs.failure_gpu }}
@@ -269,16 +296,19 @@ jobs:
269296
- name: Stop all instances
270297
run: |
271298
cd /home/ubuntu/djl_benchmark_script/scripts
272-
instance_id=${{ needs.create-runners.outputs.gpu_instance_id_1 }}
299+
instance_id=${{ needs.create-runners.outputs.g6_instance_id_1 }}
273300
./stop_instance.sh $instance_id
274-
instance_id=${{ needs.create-runners.outputs.gpu_instance_id_2 }}
301+
instance_id=${{ needs.create-runners.outputs.g6_instance_id_2 }}
275302
./stop_instance.sh $instance_id
276-
instance_id=${{ needs.create-runners.outputs.gpu_instance_id_3 }}
303+
instance_id=${{ needs.create-runners.outputs.g6_instance_id_3 }}
277304
./stop_instance.sh $instance_id
278-
instance_id=${{ needs.create-runners.outputs.gpu_instance_id_4 }}
305+
instance_id=${{ needs.create-runners.outputs.g6_instance_id_4 }}
279306
./stop_instance.sh $instance_id
280307
instance_id=${{ needs.create-runners.outputs.aarch64_instance_id }}
281308
./stop_instance.sh $instance_id
282-
283309
instance_id=${{ needs.create-runners.outputs.cpu_instance_id }}
284310
./stop_instance.sh $instance_id
311+
instance_id=${{ needs.create-runners.outputs.p4d_instance_id_1 }}
312+
./stop_instance.sh $instance_id
313+
# instance_id=${{ needs.create-runners.outputs.p4de_instance_id_1 }}
314+
# ./stop_instance.sh $instance_id

tests/integration/llm/client.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -229,6 +229,16 @@ def get_model_name():
229229
"seq_length": [25],
230230
"tokenizer": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
231231
},
232+
"llama-4-scout-17b-16e-instruct": {
233+
"batch_size": [1, 2],
234+
"seq_length": [256],
235+
"tokenizer": "unsloth/Llama-4-Scout-17B-16E-Instruct",
236+
},
237+
"minimax-m2": {
238+
"batch_size": [1, 2],
239+
"seq_length": [256],
240+
"tokenizer": "MiniMaxAI/MiniMax-M2",
241+
},
232242
}
233243

234244
vllm_neo_model_spec = {
@@ -522,6 +532,10 @@ def get_model_name():
522532
"llama32-11b-multimodal": {
523533
"batch_size": [1],
524534
},
535+
"qwen3-vl-32b-instruct": {
536+
"batch_size": [1, 2],
537+
"tokenizer": "Qwen/Qwen2-VL-72B-Instruct"
538+
},
525539
}
526540

527541
text_embedding_model_spec = {

tests/integration/llm/prepare.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -442,6 +442,32 @@
442442
"option.max_rolling_batch_size": "1",
443443
"option.enforce_eager": True,
444444
},
445+
"qwen3-vl-32b-instruct": {
446+
"option.model_id": "s3://djl-llm/Qwen3-VL-32B-Instruct/",
447+
"option.task": "text-generation",
448+
"option.tensor_parallel_degree": 8,
449+
"option.max_rolling_batch_size": 4,
450+
"option.trust_remote_code": True,
451+
"option.limit_mm_per_prompt": '{"image": 4, "video": 0}',
452+
},
453+
"minimax-m2": {
454+
"option.model_id": "s3://djl-llm/MiniMax-M2/",
455+
"option.task": "text-generation",
456+
"option.tensor_parallel_degree": 8,
457+
"option.max_rolling_batch_size": 4,
458+
"option.trust_remote_code": True,
459+
"option.max_model_len": 16384,
460+
"option.gpu_memory_utilization": "0.9",
461+
},
462+
"llama-4-scout-17b-16e-instruct": {
463+
"option.model_id": "s3://djl-llm/Llama-4-Scout-17B-16E-Instruct/",
464+
"option.task": "text-generation",
465+
"option.tensor_parallel_degree": 8,
466+
"option.max_rolling_batch_size": 4,
467+
"option.trust_remote_code": True,
468+
"option.max_model_len": 16384,
469+
"option.gpu_memory_utilization": "0.9",
470+
},
445471
}
446472

447473
vllm_neo_model_list = {

tests/integration/pytest.ini

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,8 @@ log_cli = true
33
log_cli_level = INFO
44
markers =
55
gpu: Runs on any gpu machine
6-
gpu_4: Runs on a machine with at least 4 gpus (includes gpu mark)
6+
gpu_4: Runs on a machine with at least 4 gpus
7+
gpu_8: Runs on a machine with 8 gpus
78

89
aarch64: Runs on aarch64
910
cpu: Tests cpu

0 commit comments

Comments
 (0)