15071507handler_performance_model_list = {
15081508 "tiny-llama-vllm" : {
15091509 "engine" : "Python" ,
1510- "option.rolling_batch" : "disable" ,
1511- "option.async_mode" : True ,
15121510 "option.model_id" : "TinyLlama/TinyLlama-1.1B-Chat-v1.0" ,
15131511 "option.gpu_memory_utilization" : "0.9" ,
15141512 "option.max_rolling_batch_size" : 512 ,
1515- "option.entryPoint" : "djl_python.lmi_vllm.vllm_async_service" ,
15161513 },
15171514 "tiny-llama-trtllm" : {
15181515 "engine" : "Python" ,
@@ -1717,9 +1714,6 @@ def build_vllm_async_model_with_custom_handler(model, handler_type="success"):
17171714 )
17181715 options = vllm_model_list [model ]
17191716 options ["engine" ] = "Python"
1720- options ["option.rolling_batch" ] = "disable"
1721- options ["option.async_mode" ] = "true"
1722- options ["option.entryPoint" ] = "djl_python.lmi_vllm.vllm_async_service"
17231717 write_model_artifacts (options )
17241718
17251719 # Copy custom handler from examples
@@ -1736,9 +1730,6 @@ def build_vllm_async_model_custom_formatters(model, error_type=None):
17361730 )
17371731 options = vllm_model_list [model ]
17381732 options ["engine" ] = "Python"
1739- options ["option.rolling_batch" ] = "disable"
1740- options ["option.async_mode" ] = "true"
1741- options ["option.entryPoint" ] = "djl_python.lmi_vllm.vllm_async_service"
17421733 write_model_artifacts (options )
17431734
17441735 # Create custom formatter files based on error_type
@@ -1883,9 +1874,6 @@ def build_stateful_model(model):
18831874 )
18841875 options = stateful_model_list [model ]
18851876 options ["engine" ] = "Python"
1886- options ["option.rolling_batch" ] = "disable"
1887- options ["option.async_mode" ] = "true"
1888- options ["option.entryPoint" ] = "djl_python.lmi_vllm.vllm_async_service"
18891877 options ["option.enable_stateful_sessions" ] = "true"
18901878 options ["option.sessions_path" ] = "/tmp/djl_sessions"
18911879 write_model_artifacts (options )
0 commit comments