Skip to content

Commit c4617f7

Browse files
committed
AI-Dynamo updates.
Signed-off-by: Kapil Arya <kapila@nvidia.com>
1 parent e74d423 commit c4617f7

File tree

16 files changed

+1098
-373
lines changed

16 files changed

+1098
-373
lines changed

conf/experimental/ai_dynamo/test/vllm.toml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
2-
# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
# Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
33
# SPDX-License-Identifier: Apache-2.0
44
#
55
# Licensed under the Apache License, Version 2.0 (the "License");
@@ -46,6 +46,10 @@ docker_image_url = "nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.7.0"
4646
concurrency = 2
4747
extra-args = "--streaming -- -v --async"
4848

49+
[cmd_args.lmcache]
50+
51+
[cmd_args.lmbench]
52+
4953
[extra_env_vars]
5054
UCX_LOG_LEVEL = "warn"
5155
UCX_TLS = "cuda_copy,rc_x"

src/cloudai/systems/kubernetes/kubernetes_system.py

Lines changed: 62 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -298,25 +298,75 @@ def _run_genai_perf(self, job: KubernetesJob) -> None:
298298
raise TypeError("Test definition must be an instance of AIDynamoTestDefinition")
299299

300300
genai_perf_results_path = "/tmp/cloudai/genai-perf"
301+
frontend_pod = self._get_dynamo_pod_by_role(role="frontend")
301302

302-
genai_perf_cmd = ["genai-perf", "profile", f"--artifact-dir={genai_perf_results_path}"]
303-
for k, v in tdef.cmd_args.genai_perf.model_dump(
304-
exclude={"extra_args", "extra-args"}, exclude_none=True
305-
).items():
306-
genai_perf_cmd.append(f"--{k}={v}")
307-
if extra_args := tdef.cmd_args.genai_perf.extra_args:
308-
genai_perf_cmd.extend(extra_args.split())
309-
logging.debug(f"GenAI perf arguments: {genai_perf_cmd=}")
303+
# Copy wrapper script and calc_percentile_csv script to the pod
304+
wrapper_script_path = tdef.genai_perf_wrapper_script.installed_path
305+
calc_csv_script_path = tdef.calc_percentile_csv.installed_path
310306

311-
frontend_pod = self._get_dynamo_pod_by_role(role="frontend")
307+
pod_wrapper_path = "/tmp/genai_perf_wrapper.sh"
308+
pod_calc_csv_path = "/tmp/calc_percentile_csv.py"
309+
310+
logging.debug(f"Copying wrapper script {wrapper_script_path} to pod {frontend_pod}")
311+
cp_wrapper_cmd = f"kubectl cp {wrapper_script_path} {self.default_namespace}/{frontend_pod}:{pod_wrapper_path}"
312+
subprocess.run(cp_wrapper_cmd, shell=True, capture_output=True, text=True, check=True)
313+
314+
logging.debug(f"Copying calc_percentile_csv script {calc_csv_script_path} to pod {frontend_pod}")
315+
cp_calc_cmd = f"kubectl cp {calc_csv_script_path} {self.default_namespace}/{frontend_pod}:{pod_calc_csv_path}"
316+
subprocess.run(cp_calc_cmd, shell=True, capture_output=True, text=True, check=True)
312317

313-
logging.debug(f"Executing genai-perf in pod={frontend_pod} cmd={genai_perf_cmd}")
318+
# Make wrapper script executable
319+
chmod_cmd = ["chmod", "+x", pod_wrapper_path]
320+
logging.debug(f"Making wrapper script executable in pod {frontend_pod}")
321+
try:
322+
lazy.k8s.stream.stream(
323+
self.core_v1.connect_get_namespaced_pod_exec,
324+
name=frontend_pod,
325+
namespace=self.default_namespace,
326+
command=chmod_cmd,
327+
stderr=True,
328+
stdin=False,
329+
stdout=True,
330+
tty=False,
331+
)
332+
except lazy.k8s.client.ApiException as e:
333+
logging.error(f"Error making wrapper script executable in pod '{frontend_pod}': {e}")
334+
335+
# Build genai-perf command arguments
336+
genai_perf_cmd_parts = ["genai-perf", "profile", f"--artifact-dir={genai_perf_results_path}"]
337+
if tdef.cmd_args.genai_perf.args:
338+
for k, v in tdef.cmd_args.genai_perf.args.model_dump(exclude_none=True).items():
339+
genai_perf_cmd_parts.append(f"--{k}={v}")
340+
if extra_args := tdef.cmd_args.genai_perf.extra_args:
341+
if isinstance(extra_args, str):
342+
genai_perf_cmd_parts.extend(extra_args.split())
343+
else:
344+
genai_perf_cmd_parts.extend(extra_args)
345+
346+
# Build wrapper command with proper parameters
347+
report_file = "genai_perf_report.csv"
348+
wrapper_cmd = [
349+
"/bin/bash",
350+
pod_wrapper_path,
351+
"--result_dir",
352+
genai_perf_results_path,
353+
"--report_file",
354+
report_file,
355+
"--calc_percentile_csv_script",
356+
pod_calc_csv_path,
357+
"--gpus_per_node",
358+
str(self.gpus_per_node),
359+
"--",
360+
*genai_perf_cmd_parts,
361+
]
362+
363+
logging.debug(f"Executing genai-perf wrapper in pod={frontend_pod} cmd={wrapper_cmd}")
314364
try:
315365
genai_results = lazy.k8s.stream.stream(
316366
self.core_v1.connect_get_namespaced_pod_exec,
317367
name=frontend_pod,
318368
namespace=self.default_namespace,
319-
command=genai_perf_cmd,
369+
command=wrapper_cmd,
320370
stderr=True,
321371
stdin=False,
322372
stdout=True,
@@ -326,7 +376,7 @@ def _run_genai_perf(self, job: KubernetesJob) -> None:
326376
with (job.test_run.output_path / "genai_perf.log").open("w") as f:
327377
f.write(genai_results)
328378
except lazy.k8s.client.ApiException as e:
329-
logging.error(f"Error executing genai-perf command in pod '{frontend_pod}': {e}")
379+
logging.error(f"Error executing genai-perf wrapper command in pod '{frontend_pod}': {e}")
330380

331381
cp_logs_cmd = " ".join(
332382
[

src/cloudai/systems/slurm/slurm_command_gen_strategy.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,8 @@ def __init__(self, system: System, test_run: TestRun) -> None:
4949
super().__init__(system, test_run)
5050
self.system = cast(SlurmSystem, system)
5151
self.test_run = test_run
52+
self.container_install_path = "/cloudai_install"
53+
self.container_results_path = "/cloudai_run_results"
5254

5355
self._node_spec_cache: dict[str, tuple[int, list[str]]] = {}
5456

@@ -79,8 +81,8 @@ def container_mounts(self) -> list[str]:
7981
repo_mounts.append(f"{path}:{repo.container_mount}")
8082

8183
mounts = [
82-
f"{self.test_run.output_path.absolute()}:/cloudai_run_results",
83-
f"{self.system.install_path.absolute()}:/cloudai_install",
84+
f"{self.test_run.output_path.absolute()}:{self.container_results_path}",
85+
f"{self.system.install_path.absolute()}:{self.container_install_path}",
8486
f"{self.test_run.output_path.absolute()}",
8587
*tdef.extra_container_mounts,
8688
*repo_mounts,
@@ -302,7 +304,7 @@ def _ranks_mapping_cmd(self) -> str:
302304
def _metadata_cmd(self) -> str:
303305
(self.test_run.output_path.absolute() / "metadata").mkdir(parents=True, exist_ok=True)
304306
num_nodes, _ = self.get_cached_nodes_spec()
305-
metadata_script_path = "/cloudai_install"
307+
metadata_script_path = self.container_install_path
306308
if not self.image_path():
307309
metadata_script_path = str(self.system.install_path.absolute())
308310
return " ".join(

src/cloudai/workloads/ai_dynamo/__init__.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
2-
# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
# Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
33
# SPDX-License-Identifier: Apache-2.0
44
#
55
# Licensed under the Apache License, Version 2.0 (the "License");
@@ -19,7 +19,10 @@
1919
AIDynamoCmdArgs,
2020
AIDynamoTestDefinition,
2121
DecodeWorkerArgs,
22-
GenAIPerfArgs,
22+
GenAIPerf,
23+
LMBench,
24+
LMCache,
25+
LMCacheArgs,
2326
PrefillWorkerArgs,
2427
)
2528
from .kubernetes_json_gen_strategy import AIDynamoKubernetesJsonGenStrategy
@@ -34,6 +37,9 @@
3437
"AIDynamoSlurmCommandGenStrategy",
3538
"AIDynamoTestDefinition",
3639
"DecodeWorkerArgs",
37-
"GenAIPerfArgs",
40+
"GenAIPerf",
41+
"LMBench",
42+
"LMCache",
43+
"LMCacheArgs",
3844
"PrefillWorkerArgs",
3945
]

0 commit comments

Comments
 (0)