Skip to content

Commit 81a4fe5

Browse files
author
chibu
committed
revert to main vllm server script
1 parent 3d9b897 commit 81a4fe5

File tree

1 file changed

+4
-16
lines changed

1 file changed

+4
-16
lines changed

src/automation/vllm/server.py

Lines changed: 4 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -14,34 +14,25 @@ def start_vllm_server(
1414
vllm_args,
1515
model_id,
1616
target,
17-
server_wait_time,
18-
gpu_count,
17+
server_wait_time,
1918
):
2019
task = Task.current_task()
2120

22-
print("Inside start vllm server")
23-
2421
executable_path = os.path.dirname(sys.executable)
2522
vllm_path = os.path.join(executable_path, "vllm")
2623

27-
available_gpus = list(range(torch.cuda.device_count()))
28-
selected_gpus = available_gpus[:gpu_count]
29-
30-
subprocess_env = os.environ.copy()
31-
subprocess_env["CUDA_VISIBLE_DEVICES"] = ",".join(str(i) for i in selected_gpus)
24+
num_gpus = torch.cuda.device_count()
3225

3326
parsed_target = urlparse(target)
34-
print(f"vllm path is: {vllm_path}")
3527

3628
server_command = [
3729
f"{vllm_path}", "serve",
3830
model_id,
3931
"--host", parsed_target.hostname,
4032
"--port", str(parsed_target.port),
41-
"--tensor-parallel-size", str(gpu_count),
33+
"--tensor-parallel-size", str(num_gpus)
4234
]
4335

44-
print(server_command)
4536
subprocess_env = os.environ.copy()
4637

4738
for k, v in vllm_args.items():
@@ -52,20 +43,17 @@ def start_vllm_server(
5243
server_command.append(f"--{k}")
5344
else:
5445
server_command.extend([f"--{k}", str(v)])
55-
46+
5647

5748
server_log_file_name = f"{SERVER_LOG_PREFIX}_{task.id}.txt"
5849
server_log_file = open(server_log_file_name, "w")
59-
print("Server command:", " ".join(server_command))
60-
print(f"VLLM logs are located at: {server_log_file} in {os.getcwd()}")
6150
server_process = subprocess.Popen(server_command, stdout=server_log_file, stderr=server_log_file, shell=False, env=subprocess_env)
6251

6352
delay = 5
6453
server_initialized = False
6554
for _ in range(server_wait_time // delay):
6655
try:
6756
response = requests.get(target + "/models")
68-
print(f"response: {response}")
6957
if response.status_code == 200:
7058
print("Server initialized")
7159
server_initialized = True

0 commit comments

Comments
 (0)