File tree Expand file tree Collapse file tree 2 files changed +8
-3
lines changed
Expand file tree Collapse file tree 2 files changed +8
-3
lines changed Original file line number Diff line number Diff line change @@ -29,14 +29,21 @@ def __init__(self, model_path) -> None:
2929 backend = backend_map [os .getenv ("backend" )]
3030 enable_prefix_caching = bool (os .getenv ("enable_prefix_caching" , False ))
3131 max_model_len = os .getenv ("max_model_len" , None )
32+ dtype = os .getenv ("dtype" , "auto" )
3233 logger .info (f"后端 { backend } " )
3334 if backend == "pytorch" :
34- backend_config = PytorchEngineConfig (tp = int (os .getenv ("num_gpus" , "1" )))
35+ backend_config = PytorchEngineConfig (
36+ tp = int (os .getenv ("num_gpus" , "1" )),
37+ dtype = dtype ,
38+ session_len = int (max_model_len ) if max_model_len else None ,
39+ enable_prefix_caching = enable_prefix_caching ,
40+ )
3541 if backend == "turbomind" :
3642 backend_config = TurbomindEngineConfig (
3743 tp = int (os .getenv ("num_gpus" , "1" )),
3844 enable_prefix_caching = enable_prefix_caching ,
3945 session_len = int (max_model_len ) if max_model_len else None ,
46+ dtype = dtype ,
4047 )
4148 pipeline_type , pipeline_class = get_task (model_path )
4249 logger .info (f"模型架构:{ pipeline_type } " )
Original file line number Diff line number Diff line change @@ -27,9 +27,7 @@ class VllmBackend(ModelBackend):
2727 def __init__ (self , model_path ) -> None :
2828 lora = os .getenv ("lora" , None )
2929 enable_prefix_caching = bool (os .getenv ("enable_prefix_caching" , False ))
30-
3130 max_model_len = os .getenv ("max_model_len" , None )
32-
3331 tensor_parallel_size = int (os .getenv ("num_gpus" , "1" ))
3432 dtype = os .getenv ("dtype" , "auto" )
3533 max_loras = 1
You can’t perform that action at this time.
0 commit comments