@@ -14,34 +14,25 @@ def start_vllm_server(
14
14
vllm_args ,
15
15
model_id ,
16
16
target ,
17
- server_wait_time ,
18
- gpu_count ,
17
+ server_wait_time ,
19
18
):
20
19
task = Task .current_task ()
21
20
22
- print ("Inside start vllm server" )
23
-
24
21
executable_path = os .path .dirname (sys .executable )
25
22
vllm_path = os .path .join (executable_path , "vllm" )
26
23
27
- available_gpus = list (range (torch .cuda .device_count ()))
28
- selected_gpus = available_gpus [:gpu_count ]
29
-
30
- subprocess_env = os .environ .copy ()
31
- subprocess_env ["CUDA_VISIBLE_DEVICES" ] = "," .join (str (i ) for i in selected_gpus )
24
+ num_gpus = torch .cuda .device_count ()
32
25
33
26
parsed_target = urlparse (target )
34
- print (f"vllm path is: { vllm_path } " )
35
27
36
28
server_command = [
37
29
f"{ vllm_path } " , "serve" ,
38
30
model_id ,
39
31
"--host" , parsed_target .hostname ,
40
32
"--port" , str (parsed_target .port ),
41
- "--tensor-parallel-size" , str (gpu_count ),
33
+ "--tensor-parallel-size" , str (num_gpus )
42
34
]
43
35
44
- print (server_command )
45
36
subprocess_env = os .environ .copy ()
46
37
47
38
for k , v in vllm_args .items ():
@@ -52,20 +43,17 @@ def start_vllm_server(
52
43
server_command .append (f"--{ k } " )
53
44
else :
54
45
server_command .extend ([f"--{ k } " , str (v )])
55
-
46
+
56
47
57
48
server_log_file_name = f"{ SERVER_LOG_PREFIX } _{ task .id } .txt"
58
49
server_log_file = open (server_log_file_name , "w" )
59
- print ("Server command:" , " " .join (server_command ))
60
- print (f"VLLM logs are located at: { server_log_file } in { os .getcwd ()} " )
61
50
server_process = subprocess .Popen (server_command , stdout = server_log_file , stderr = server_log_file , shell = False , env = subprocess_env )
62
51
63
52
delay = 5
64
53
server_initialized = False
65
54
for _ in range (server_wait_time // delay ):
66
55
try :
67
56
response = requests .get (target + "/models" )
68
- print (f"response: { response } " )
69
57
if response .status_code == 200 :
70
58
print ("Server initialized" )
71
59
server_initialized = True
0 commit comments