Skip to content

Commit 0944ce3

Browse files
committed
[ci] Separate vllm async tests and remove async model options
1 parent 89403fe commit 0944ce3

File tree

3 files changed

+9
-13
lines changed

3 files changed

+9
-13
lines changed

.github/workflows/integration.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,9 @@ jobs:
157157
- test: TestVllm1
158158
instance: g6
159159
failure-prefix: lmi
160+
- test: TestVllm2
161+
instance: g6
162+
failure-prefix: lmi
160163
- test: TestVllmCustomHandlers
161164
instance: g6
162165
failure-prefix: lmi

tests/integration/llm/prepare.py

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1507,12 +1507,9 @@
15071507
handler_performance_model_list = {
15081508
"tiny-llama-vllm": {
15091509
"engine": "Python",
1510-
"option.rolling_batch": "disable",
1511-
"option.async_mode": True,
15121510
"option.model_id": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
15131511
"option.gpu_memory_utilization": "0.9",
15141512
"option.max_rolling_batch_size": 512,
1515-
"option.entryPoint": "djl_python.lmi_vllm.vllm_async_service",
15161513
},
15171514
"tiny-llama-trtllm": {
15181515
"engine": "Python",
@@ -1717,9 +1714,6 @@ def build_vllm_async_model_with_custom_handler(model, handler_type="success"):
17171714
)
17181715
options = vllm_model_list[model]
17191716
options["engine"] = "Python"
1720-
options["option.rolling_batch"] = "disable"
1721-
options["option.async_mode"] = "true"
1722-
options["option.entryPoint"] = "djl_python.lmi_vllm.vllm_async_service"
17231717
write_model_artifacts(options)
17241718

17251719
# Copy custom handler from examples
@@ -1736,9 +1730,6 @@ def build_vllm_async_model_custom_formatters(model, error_type=None):
17361730
)
17371731
options = vllm_model_list[model]
17381732
options["engine"] = "Python"
1739-
options["option.rolling_batch"] = "disable"
1740-
options["option.async_mode"] = "true"
1741-
options["option.entryPoint"] = "djl_python.lmi_vllm.vllm_async_service"
17421733
write_model_artifacts(options)
17431734

17441735
# Create custom formatter files based on error_type
@@ -1883,9 +1874,6 @@ def build_stateful_model(model):
18831874
)
18841875
options = stateful_model_list[model]
18851876
options["engine"] = "Python"
1886-
options["option.rolling_batch"] = "disable"
1887-
options["option.async_mode"] = "true"
1888-
options["option.entryPoint"] = "djl_python.lmi_vllm.vllm_async_service"
18891877
options["option.enable_stateful_sessions"] = "true"
18901878
options["option.sessions_path"] = "/tmp/djl_sessions"
18911879
write_model_artifacts(options)

tests/integration/tests.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -555,7 +555,7 @@ def test_gemma_2b(self):
555555

556556
def test_llama2_7b_chat(self):
557557
with Runner('lmi', 'llama2-7b-chat') as r:
558-
prepare.build_vllm_async_model("llama2-7b-chat")
558+
prepare.build_vllm_model("llama2-7b-chat")
559559
r.launch()
560560
client.run("vllm_chat llama2-7b-chat".split())
561561

@@ -587,6 +587,11 @@ def test_llama_68m_speculative_medusa(self):
587587
r.launch()
588588
client.run("vllm llama-68m-speculative-medusa".split())
589589

590+
591+
@pytest.mark.vllm
592+
@pytest.mark.gpu_4
593+
class TestVllm2:
594+
590595
def test_llama_68m_speculative_eagle(self):
591596
with Runner('lmi', 'llama-68m-speculative-eagle') as r:
592597
prepare.build_vllm_async_model("llama-68m-speculative-eagle")

0 commit comments

Comments
 (0)