Skip to content

Commit 23e8fe8

Browse files
author
Suma Kasa
committed
Make vllm_async_service default handler for vllm and add default rolling_batch and async model options
1 parent 1ef58cd commit 23e8fe8

File tree

3 files changed

+45
-4
lines changed

3 files changed

+45
-4
lines changed

engines/python/src/main/java/ai/djl/python/engine/PyModel.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,12 @@ public void load(Path modelPath, String prefix, Map<String, ?> options) throws I
187187
recommendedEntryPoint = "djl_python.transformers_neuronx";
188188
} else if ("trtllm".equals(features)) {
189189
recommendedEntryPoint = "djl_python.tensorrt_llm";
190+
} else if ("vllm".equals(features)) {
191+
recommendedEntryPoint = "djl_python.lmi_vllm.vllm_async_service";
192+
pyEnv.setAsyncMode(true);
193+
if (!properties.containsKey("rolling_batch")) {
194+
setProperty("rolling_batch", "disable");
195+
}
190196
} else if (pyEnv.getInitParameters().containsKey("model_id")
191197
|| Files.exists(modelPath.resolve("config.json"))) {
192198
recommendedEntryPoint = "djl_python.huggingface";

engines/python/src/test/java/ai/djl/python/engine/PyEngineTest.java

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -599,4 +599,43 @@ public void testRestartProcess() throws IOException, ModelException, Interrupted
599599
Assert.assertEquals(output.getCode(), 200);
600600
}
601601
}
602+
603+
@Test
604+
public void testVllmFeaturesRollingBatch() throws IOException, ModelException {
605+
System.setProperty("SERVING_FEATURES", "vllm");
606+
try {
607+
Criteria<Input, Output> criteria =
608+
Criteria.builder()
609+
.setTypes(Input.class, Output.class)
610+
.optModelPath(Paths.get("src/test/resources/echo"))
611+
.optEngine("Python")
612+
.build();
613+
try (ZooModel<Input, Output> model = criteria.loadModel()) {
614+
// Verify rolling_batch is set to disable when features=vllm
615+
Assert.assertEquals(model.getProperty("rolling_batch"), "disable");
616+
}
617+
} finally {
618+
System.clearProperty("SERVING_FEATURES");
619+
}
620+
}
621+
622+
@Test
623+
public void testVllmFeaturesRollingBatchOverride() throws IOException, ModelException {
624+
System.setProperty("SERVING_FEATURES", "vllm");
625+
try {
626+
Criteria<Input, Output> criteria =
627+
Criteria.builder()
628+
.setTypes(Input.class, Output.class)
629+
.optModelPath(Paths.get("src/test/resources/echo"))
630+
.optOption("rolling_batch", "vllm")
631+
.optEngine("Python")
632+
.build();
633+
try (ZooModel<Input, Output> model = criteria.loadModel()) {
634+
// Verify user override is respected
635+
Assert.assertEquals(model.getProperty("rolling_batch"), "vllm");
636+
}
637+
} finally {
638+
System.clearProperty("SERVING_FEATURES");
639+
}
640+
}
602641
}

tests/integration/llm/prepare.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1701,9 +1701,6 @@ def build_vllm_async_model(model):
17011701
)
17021702
options = vllm_model_list[model]
17031703
options["engine"] = "Python"
1704-
options["option.rolling_batch"] = "disable"
1705-
options["option.async_mode"] = "true"
1706-
options["option.entryPoint"] = "djl_python.lmi_vllm.vllm_async_service"
17071704

17081705
adapter_ids = options.pop("adapter_ids", [])
17091706
adapter_names = options.pop("adapter_names", [])
@@ -1780,7 +1777,6 @@ def build_vllm_model(model):
17801777
)
17811778
options = vllm_model_list[model]
17821779
options["engine"] = "Python"
1783-
options["option.rolling_batch"] = "vllm"
17841780

17851781
adapter_ids = options.pop("adapter_ids", [])
17861782
adapter_names = options.pop("adapter_names", [])

0 commit comments

Comments
 (0)