NVIDIA
diff --git a/‎conf/experimental/ai_dynamo/test/vllm.toml‎
Lines changed: 5 additions & 1 deletion b/‎conf/experimental/ai_dynamo/test/vllm.toml‎
Lines changed: 5 additions & 1 deletion
diff --git a/‎src/cloudai/systems/kubernetes/kubernetes_system.py‎
Lines changed: 62 additions & 12 deletions b/‎src/cloudai/systems/kubernetes/kubernetes_system.py‎
Lines changed: 62 additions & 12 deletions
diff --git a/‎src/cloudai/systems/slurm/slurm_command_gen_strategy.py‎
Lines changed: 5 additions & 3 deletions b/‎src/cloudai/systems/slurm/slurm_command_gen_strategy.py‎
Lines changed: 5 additions & 3 deletions
diff --git a/‎src/cloudai/workloads/ai_dynamo/__init__.py‎
Lines changed: 9 additions & 3 deletions b/‎src/cloudai/workloads/ai_dynamo/__init__.py‎
Lines changed: 9 additions & 3 deletions
@@ -1,5 +1,5 @@
 # SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
-# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -46,6 +46,10 @@ docker_image_url = "nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.7.0"
   concurrency = 2
   extra-args = "--streaming -- -v --async"
 
+  [cmd_args.lmcache]
+
+  [cmd_args.lmbench]
+
 [extra_env_vars]
 UCX_LOG_LEVEL = "warn"
 UCX_TLS = "cuda_copy,rc_x"
 
@@ -298,25 +298,75 @@ def _run_genai_perf(self, job: KubernetesJob) -> None:
             raise TypeError("Test definition must be an instance of AIDynamoTestDefinition")
 
         genai_perf_results_path = "/tmp/cloudai/genai-perf"
+        frontend_pod = self._get_dynamo_pod_by_role(role="frontend")
 
-        genai_perf_cmd = ["genai-perf", "profile", f"--artifact-dir={genai_perf_results_path}"]
-        for k, v in tdef.cmd_args.genai_perf.model_dump(
-            exclude={"extra_args", "extra-args"}, exclude_none=True
-        ).items():
-            genai_perf_cmd.append(f"--{k}={v}")
-        if extra_args := tdef.cmd_args.genai_perf.extra_args:
-            genai_perf_cmd.extend(extra_args.split())
-        logging.debug(f"GenAI perf arguments: {genai_perf_cmd=}")
+        # Copy wrapper script and calc_percentile_csv script to the pod
+        wrapper_script_path = tdef.genai_perf_wrapper_script.installed_path
+        calc_csv_script_path = tdef.calc_percentile_csv.installed_path
 
-        frontend_pod = self._get_dynamo_pod_by_role(role="frontend")
+        pod_wrapper_path = "/tmp/genai_perf_wrapper.sh"
+        pod_calc_csv_path = "/tmp/calc_percentile_csv.py"
+
+        logging.debug(f"Copying wrapper script {wrapper_script_path} to pod {frontend_pod}")
+        cp_wrapper_cmd = f"kubectl cp {wrapper_script_path} {self.default_namespace}/{frontend_pod}:{pod_wrapper_path}"
+        subprocess.run(cp_wrapper_cmd, shell=True, capture_output=True, text=True, check=True)
+
+        logging.debug(f"Copying calc_percentile_csv script {calc_csv_script_path} to pod {frontend_pod}")
+        cp_calc_cmd = f"kubectl cp {calc_csv_script_path} {self.default_namespace}/{frontend_pod}:{pod_calc_csv_path}"
+        subprocess.run(cp_calc_cmd, shell=True, capture_output=True, text=True, check=True)
 
-        logging.debug(f"Executing genai-perf in pod={frontend_pod} cmd={genai_perf_cmd}")
+        # Make wrapper script executable
+        chmod_cmd = ["chmod", "+x", pod_wrapper_path]
+        logging.debug(f"Making wrapper script executable in pod {frontend_pod}")
+        try:
+            lazy.k8s.stream.stream(
+                self.core_v1.connect_get_namespaced_pod_exec,
+                name=frontend_pod,
+                namespace=self.default_namespace,
+                command=chmod_cmd,
+                stderr=True,
+                stdin=False,
+                stdout=True,
+                tty=False,
+            )
+        except lazy.k8s.client.ApiException as e:
+            logging.error(f"Error making wrapper script executable in pod '{frontend_pod}': {e}")
+
+        # Build genai-perf command arguments
+        genai_perf_cmd_parts = ["genai-perf", "profile", f"--artifact-dir={genai_perf_results_path}"]
+        if tdef.cmd_args.genai_perf.args:
+            for k, v in tdef.cmd_args.genai_perf.args.model_dump(exclude_none=True).items():
+                genai_perf_cmd_parts.append(f"--{k}={v}")
+            if extra_args := tdef.cmd_args.genai_perf.extra_args:
+                if isinstance(extra_args, str):
+                    genai_perf_cmd_parts.extend(extra_args.split())
+                else:
+                    genai_perf_cmd_parts.extend(extra_args)
+
+        # Build wrapper command with proper parameters
+        report_file = "genai_perf_report.csv"
+        wrapper_cmd = [
+            "/bin/bash",
+            pod_wrapper_path,
+            "--result_dir",
+            genai_perf_results_path,
+            "--report_file",
+            report_file,
+            "--calc_percentile_csv_script",
+            pod_calc_csv_path,
+            "--gpus_per_node",
+            str(self.gpus_per_node),
+            "--",
+            *genai_perf_cmd_parts,
+        ]
+
+        logging.debug(f"Executing genai-perf wrapper in pod={frontend_pod} cmd={wrapper_cmd}")
         try:
             genai_results = lazy.k8s.stream.stream(
                 self.core_v1.connect_get_namespaced_pod_exec,
                 name=frontend_pod,
                 namespace=self.default_namespace,
-                command=genai_perf_cmd,
+                command=wrapper_cmd,
                 stderr=True,
                 stdin=False,
                 stdout=True,
@@ -326,7 +376,7 @@ def _run_genai_perf(self, job: KubernetesJob) -> None:
             with (job.test_run.output_path / "genai_perf.log").open("w") as f:
                 f.write(genai_results)
         except lazy.k8s.client.ApiException as e:
-            logging.error(f"Error executing genai-perf command in pod '{frontend_pod}': {e}")
+            logging.error(f"Error executing genai-perf wrapper command in pod '{frontend_pod}': {e}")
 
         cp_logs_cmd = " ".join(
             [
 
@@ -49,6 +49,8 @@ def __init__(self, system: System, test_run: TestRun) -> None:
         super().__init__(system, test_run)
         self.system = cast(SlurmSystem, system)
         self.test_run = test_run
+        self.container_install_path = "/cloudai_install"
+        self.container_results_path = "/cloudai_run_results"
 
         self._node_spec_cache: dict[str, tuple[int, list[str]]] = {}
 
@@ -79,8 +81,8 @@ def container_mounts(self) -> list[str]:
             repo_mounts.append(f"{path}:{repo.container_mount}")
 
         mounts = [
-            f"{self.test_run.output_path.absolute()}:/cloudai_run_results",
-            f"{self.system.install_path.absolute()}:/cloudai_install",
+            f"{self.test_run.output_path.absolute()}:{self.container_results_path}",
+            f"{self.system.install_path.absolute()}:{self.container_install_path}",
             f"{self.test_run.output_path.absolute()}",
             *tdef.extra_container_mounts,
             *repo_mounts,
@@ -302,7 +304,7 @@ def _ranks_mapping_cmd(self) -> str:
     def _metadata_cmd(self) -> str:
         (self.test_run.output_path.absolute() / "metadata").mkdir(parents=True, exist_ok=True)
         num_nodes, _ = self.get_cached_nodes_spec()
-        metadata_script_path = "/cloudai_install"
+        metadata_script_path = self.container_install_path
         if not self.image_path():
             metadata_script_path = str(self.system.install_path.absolute())
         return " ".join(
 
@@ -1,5 +1,5 @@
 # SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
-# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -19,7 +19,10 @@
     AIDynamoCmdArgs,
     AIDynamoTestDefinition,
     DecodeWorkerArgs,
-    GenAIPerfArgs,
+    GenAIPerf,
+    LMBench,
+    LMCache,
+    LMCacheArgs,
     PrefillWorkerArgs,
 )
 from .kubernetes_json_gen_strategy import AIDynamoKubernetesJsonGenStrategy
@@ -34,6 +37,9 @@
     "AIDynamoSlurmCommandGenStrategy",
     "AIDynamoTestDefinition",
     "DecodeWorkerArgs",
-    "GenAIPerfArgs",
+    "GenAIPerf",
+    "LMBench",
+    "LMCache",
+    "LMCacheArgs",
     "PrefillWorkerArgs",
 ]