Merge pull request #748 from NVIDIA/am/genai-k8s

amaslenn · web-flow · commit 82662c975f67 · 2025-12-15T19:56:11.000+01:00
diff --git a/src/cloudai/systems/kubernetes/kubernetes_system.py b/src/cloudai/systems/kubernetes/kubernetes_system.py
@@ -26,7 +26,6 @@
 if TYPE_CHECKING:
     import kubernetes as k8s
 
-
 from cloudai.core import BaseJob, System
 from cloudai.util.lazy_imports import lazy
 
@@ -325,54 +324,63 @@ def _check_model_server(self) -> bool:
             logging.warning("Invalid JSON response from model server")
             return False
 
+    def _get_frontend_pod_name(self) -> str:
+        for pod in self.core_v1.list_namespaced_pod(namespace=self.default_namespace).items:
+            labels = pod.metadata.labels
+            logging.debug(f"Found pod: {pod.metadata.name} with labels: {labels}")
+            if labels and str(labels.get("nvidia.com/dynamo-component", "")).lower() == "frontend":
+                return pod.metadata.name
+        raise RuntimeError("No frontend pod found for the job")
+
     def _run_genai_perf(self, job: KubernetesJob) -> None:
         from cloudai.workloads.ai_dynamo.ai_dynamo import AIDynamoTestDefinition
 
-        test_definition = job.test_run.test
-        if not isinstance(test_definition, AIDynamoTestDefinition):
+        tdef = job.test_run.test
+        if not isinstance(tdef, AIDynamoTestDefinition):
             raise TypeError("Test definition must be an instance of AIDynamoTestDefinition")
 
-        python_exec = test_definition.python_executable
-        if not python_exec or not python_exec.venv_path:
-            raise ValueError("Python executable path not set - executable may not be installed")
-
-        genai_perf_args_obj = test_definition.cmd_args.genai_perf
-        if not genai_perf_args_obj:
-            raise ValueError("GenAI perf args not set")
+        genai_perf_results_path = "/tmp/cloudai/genai-perf"
 
-        output_path = job.test_run.output_path
-        if not output_path:
-            raise ValueError("Output path not set")
-
-        genai_perf_args = genai_perf_args_obj.model_dump()
-        args = [f"--artifact-dir={output_path.absolute()}"]
-        extra_args = None
-
-        for k, v in genai_perf_args.items():
-            if k == "extra-args":
-                extra_args = str(v)
-            else:
-                args.append(f"--{k}={v}")
+        genai_perf_cmd = ["genai-perf", "profile", f"--artifact-dir={genai_perf_results_path}"]
+        for k, v in tdef.cmd_args.genai_perf.model_dump(
+            exclude={"extra_args", "extra-args"}, exclude_none=True
+        ).items():
+            genai_perf_cmd.append(f"--{k}={v}")
+        if extra_args := tdef.cmd_args.genai_perf.extra_args:
+            genai_perf_cmd.extend(extra_args.split())
+        logging.debug(f"GenAI perf arguments: {genai_perf_cmd=}")
 
-        if extra_args:
-            args.append(extra_args)
-        args_str = " ".join(args)
+        frontend_pod = self._get_frontend_pod_name()
 
-        venv_path = python_exec.venv_path.absolute()
-        cmd = f"{venv_path}/bin/genai-perf profile {args_str}"
-        logging.debug(f"Running GenAI performance test: {cmd}")
-        result: subprocess.CompletedProcess | None = None
+        logging.debug(f"Executing genai-perf in pod={frontend_pod} cmd={genai_perf_cmd}")
         try:
-            result = subprocess.run(cmd, shell=True, capture_output=True, text=True, check=True)
-            logging.debug("GenAI performance test completed successfully")
-        except subprocess.CalledProcessError as e:
-            logging.error(f"GenAI performance test failed: {e.stderr}")
-
-        if result:
-            with (job.test_run.output_path / "stdout.txt").open("w") as f:
-                f.write(result.stdout)
-            with (job.test_run.output_path / "stderr.txt").open("w") as f:
-                f.write(result.stderr)
+            genai_results = lazy.k8s.stream.stream(
+                self.core_v1.connect_get_namespaced_pod_exec,
+                name=frontend_pod,
+                namespace=self.default_namespace,
+                command=genai_perf_cmd,
+                stderr=True,
+                stdin=False,
+                stdout=True,
+                tty=False,
+                _request_timeout=60 * 10,
+            )
+            with (job.test_run.output_path / "genai_perf.log").open("w") as f:
+                f.write(genai_results)
+        except lazy.k8s.client.ApiException as e:
+            logging.error(f"Error executing genai-perf command in pod '{frontend_pod}': {e}")
+
+        cp_logs_cmd = " ".join(
+            [
+                "kubectl",
+                "cp",
+                f"{self.default_namespace}/{frontend_pod}:{genai_perf_results_path}",
+                str(job.test_run.output_path / "genai-perf"),
+            ]
+        )
+        logging.debug(f"Copying genai-perf results with command: {cp_logs_cmd}")
+        p = subprocess.run(cp_logs_cmd, shell=True, capture_output=True, text=True)
+        logging.debug(f"Returned code {p.returncode}, stdout: {p.stdout}, stderr: {p.stderr}")
 
     def _check_deployment_conditions(self, conditions: list) -> bool:
         logging.debug(f"Checking deployment conditions: {conditions}")
diff --git a/src/cloudai/workloads/ai_dynamo/ai_dynamo.py b/src/cloudai/workloads/ai_dynamo/ai_dynamo.py
@@ -20,7 +20,7 @@
 
 from pydantic import AliasChoices, BaseModel, ConfigDict, Field
 
-from cloudai.core import DockerImage, File, GitRepo, HFModel, Installable, JobStatusResult, PythonExecutable, TestRun
+from cloudai.core import DockerImage, File, GitRepo, HFModel, Installable, JobStatusResult, TestRun
 from cloudai.models.workload import CmdArgs, TestDefinition
 
 from .report_generation_strategy import CSV_FILES_PATTERN, JSON_FILES_PATTERN
@@ -106,6 +106,12 @@ class GenAIPerfArgs(BaseModel):
 
     model_config = ConfigDict(extra="allow")
 
+    extra_args: str | None = Field(
+        default=None,
+        serialization_alias="extra-args",
+        validation_alias=AliasChoices("extra-args", "extra_args"),
+    )
+
 
 class AIDynamoCmdArgs(CmdArgs):
     """Arguments for AI Dynamo."""
@@ -126,11 +132,6 @@ class AIDynamoTestDefinition(TestDefinition):
     dynamo_repo: GitRepo = GitRepo(
         url="https://github.com/ai-dynamo/dynamo.git", commit="f7e468c7e8ff0d1426db987564e60572167e8464"
     )
-    genai_perf_repo: GitRepo = GitRepo(
-        url="https://github.com/triton-inference-server/perf_analyzer.git",
-        commit="3c0bc9efa1844a82dfcc911f094f5026e6dd9214",
-    )
-    _python_executable: Optional[PythonExecutable] = None
     _hf_model: HFModel | None = None
 
     @property
@@ -147,15 +148,7 @@ def hf_model(self) -> HFModel:
 
     @property
     def installables(self) -> list[Installable]:
-        return [self.docker_image, self.script, self.dynamo_repo, self.python_executable, self.hf_model]
-
-    @property
-    def python_executable(self) -> PythonExecutable:
-        if not self._python_executable:
-            self._python_executable = PythonExecutable(
-                GitRepo(url=self.genai_perf_repo.url, commit=self.genai_perf_repo.commit),
-            )
-        return self._python_executable
+        return [self.docker_image, self.script, self.dynamo_repo, self.hf_model]
 
     def was_run_successful(self, tr: TestRun) -> JobStatusResult:
         output_path = tr.output_path
diff --git a/src/cloudai/workloads/ai_dynamo/kubernetes_json_gen_strategy.py b/src/cloudai/workloads/ai_dynamo/kubernetes_json_gen_strategy.py
@@ -14,9 +14,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import logging
-import subprocess
-from pathlib import Path
 from typing import Any, Dict, cast
 
 import yaml
@@ -32,31 +29,6 @@ class AIDynamoKubernetesJsonGenStrategy(JsonGenStrategy):
 
     DEPLOYMENT_FILE_NAME = "deployment.yaml"
 
-    def _install_python_packages(self, repo_root: Path, venv_pip: Path) -> None:
-        installs = [
-            ("perf_analyzer", repo_root),
-            ("genai-perf", repo_root / "genai-perf"),
-        ]
-
-        for package, path in installs:
-            install_cmd = f"cd {path} && {venv_pip} install ."
-            logging.info(f"Installing {package} with command: {install_cmd}")
-            subprocess.run(install_cmd, shell=True, capture_output=True, text=True, check=True)
-
-    def _setup_genai(self, td: AIDynamoTestDefinition) -> None:
-        python_exec = td.python_executable
-        if not python_exec.venv_path:
-            raise ValueError(
-                f"The virtual environment for git repo {python_exec.git_repo} does not exist. "
-                "Please ensure to run installation before running the test."
-            )
-
-        venv_pip = python_exec.venv_path.absolute() / "bin" / "pip"
-        assert python_exec.git_repo.installed_path
-        repo_root = python_exec.git_repo.installed_path.absolute()
-
-        self._install_python_packages(repo_root, venv_pip)
-
     def gen_frontend_dict(self) -> dict[str, Any]:
         system = cast(KubernetesSystem, self.system)
         tdef = cast(AIDynamoTestDefinition, self.test_run.test)
@@ -113,21 +85,19 @@ def gen_json(self) -> Dict[Any, Any]:
         td = cast(AIDynamoTestDefinition, self.test_run.test)
         k8s_system = cast(KubernetesSystem, self.system)
 
-        self._setup_genai(td)
-
         deployment = {
             "apiVersion": "nvidia.com/v1alpha1",
             "kind": "DynamoGraphDeployment",
             "metadata": {"name": k8s_system.default_namespace},
             "spec": {
                 "services": {
-                    "Frontend": self.gen_frontend_dict(),
-                    "VllmDecodeWorker": self.gen_decode_dict(),
+                    "frontend": self.gen_frontend_dict(),
+                    "decode": self.gen_decode_dict(),
                 },
             },
         }
         if td.cmd_args.dynamo.prefill_worker:
-            deployment["spec"]["services"]["VllmPrefillWorker"] = self.gen_prefill_dict()
+            deployment["spec"]["services"]["prefill"] = self.gen_prefill_dict()
 
         with (self.test_run.output_path / self.DEPLOYMENT_FILE_NAME).open("w") as f:
             yaml.safe_dump(deployment, f)
diff --git a/tests/json_gen_strategy/test_ai_dynamo.py b/tests/json_gen_strategy/test_ai_dynamo.py
@@ -187,7 +187,6 @@ def test_gen_json(json_gen: AIDynamoKubernetesJsonGenStrategy) -> None:
     k8s_system = cast(KubernetesSystem, json_gen.system)
     tdef = cast(AIDynamoTestDefinition, json_gen.test_run.test)
     json_gen.test_run.output_path.mkdir(parents=True, exist_ok=True)
-    json_gen._setup_genai = lambda td: None
 
     deployment = json_gen.gen_json()
 
@@ -196,11 +195,11 @@ def test_gen_json(json_gen: AIDynamoKubernetesJsonGenStrategy) -> None:
     assert deployment.get("metadata", {}).get("name") == k8s_system.default_namespace
 
     if tdef.cmd_args.dynamo.prefill_worker:
-        assert "VllmPrefillWorker" in deployment.get("spec", {}).get("services", {})
+        assert "prefill" in deployment.get("spec", {}).get("services", {})
     else:
         assert "spec" in deployment
         assert "services" in deployment["spec"]
-        assert "VllmPrefillWorker" not in deployment["spec"]["services"]
+        assert "prefill" not in deployment["spec"]["services"]
 
     with open(json_gen.test_run.output_path / json_gen.DEPLOYMENT_FILE_NAME, "r") as f:
         content = yaml.safe_load(f)