Skip to content

Commit 287e63b

Browse files
author
chibu
committed
clean up debugs
1 parent 69638ea commit 287e63b

File tree

2 files changed

+5
-21
lines changed

2 files changed

+5
-21
lines changed

src/automation/tasks/scripts/guidellm_script.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
from pyhocon import ConfigFactory
77
from automation.configs import DEFAULT_GUIDELLM_SCENARIO
88

9-
def main():
9+
def main(configurations=None):
1010
task = Task.current_task()
1111

1212
args = task.get_parameters_as_dict(cast=True)
@@ -53,15 +53,12 @@ def clean_hocon_value(v):
5353
# Resolve model_id
5454
model_id = resolve_model_id(args["Args"]["model"], clearml_model, force_download)
5555

56-
gpu_count = int(guidellm_args.get("gpu_count", 1))
57-
5856
# Start vLLM server
5957
server_process, server_initialized, server_log = start_vllm_server(
6058
vllm_args,
6159
model_id,
6260
guidellm_args["target"],
6361
args["Args"]["server_wait_time"],
64-
gpu_count,
6562
)
6663

6764
if not server_initialized:
@@ -94,7 +91,6 @@ def clean_hocon_value(v):
9491
else:
9592
filepath = Path(os.path.join(".", "src", "automation", "standards", "benchmarking", f"{DEFAULT_GUIDELLM_SCENARIO}.json"))
9693
current_scenario = GenerativeTextScenario.from_file(filepath, dict(guidellm_args))
97-
print(current_scenario.model_fields)
9894

9995
# Ensure output_path is set and consistent
10096
output_path = Path(guidellm_args.get("output_path", "guidellm-output.json"))

src/automation/vllm/server.py

Lines changed: 4 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -14,34 +14,25 @@ def start_vllm_server(
1414
vllm_args,
1515
model_id,
1616
target,
17-
server_wait_time,
18-
gpu_count,
17+
server_wait_time,
1918
):
2019
task = Task.current_task()
2120

22-
print("Inside start vllm server")
23-
2421
executable_path = os.path.dirname(sys.executable)
2522
vllm_path = os.path.join(executable_path, "vllm")
2623

27-
available_gpus = list(range(torch.cuda.device_count()))
28-
selected_gpus = available_gpus[:gpu_count]
29-
30-
subprocess_env = os.environ.copy()
31-
subprocess_env["CUDA_VISIBLE_DEVICES"] = ",".join(str(i) for i in selected_gpus)
24+
num_gpus = torch.cuda.device_count()
3225

3326
parsed_target = urlparse(target)
34-
print(f"vllm path is: {vllm_path}")
3527

3628
server_command = [
3729
f"{vllm_path}", "serve",
3830
model_id,
3931
"--host", parsed_target.hostname,
4032
"--port", str(parsed_target.port),
41-
"--tensor-parallel-size", str(gpu_count),
33+
"--tensor-parallel-size", str(num_gpus)
4234
]
4335

44-
print(server_command)
4536
subprocess_env = os.environ.copy()
4637

4738
for k, v in vllm_args.items():
@@ -52,20 +43,17 @@ def start_vllm_server(
5243
server_command.append(f"--{k}")
5344
else:
5445
server_command.extend([f"--{k}", str(v)])
55-
46+
5647

5748
server_log_file_name = f"{SERVER_LOG_PREFIX}_{task.id}.txt"
5849
server_log_file = open(server_log_file_name, "w")
59-
print("Server command:", " ".join(server_command))
60-
print(f"VLLM logs are located at: {server_log_file} in {os.getcwd()}")
6150
server_process = subprocess.Popen(server_command, stdout=server_log_file, stderr=server_log_file, shell=False, env=subprocess_env)
6251

6352
delay = 5
6453
server_initialized = False
6554
for _ in range(server_wait_time // delay):
6655
try:
6756
response = requests.get(target + "/models")
68-
print(f"response: {response}")
6957
if response.status_code == 200:
7058
print("Server initialized")
7159
server_initialized = True

0 commit comments

Comments
 (0)