VectorInstitute · XkunW · Apr 10, 2025 · Apr 4, 2025 · Apr 4, 2025 · Apr 4, 2025
diff --git a/tests/vec_inf/cli/test_cli.py b/tests/vec_inf/cli/test_cli.py
@@ -226,13 +226,12 @@ def base_patches(test_paths, mock_truediv, debug_helper):
             "pathlib.Path.parent", return_value=debug_helper.config_file.parent.parent
         ),
         patch("pathlib.Path.__truediv__", side_effect=mock_truediv),
-        patch("pathlib.Path.iterdir", return_value=[]),  # Mock empty directory listing
+        patch("pathlib.Path.iterdir", return_value=[]),
         patch("json.dump"),
         patch("pathlib.Path.touch"),
         patch("vec_inf.client._utils.Path", return_value=test_paths["weights_dir"]),
-        patch(
-            "pathlib.Path.home", return_value=Path("/home/user")
-        ),  # Mock home directory
+        patch("pathlib.Path.home", return_value=Path("/home/user")),
+        patch("pathlib.Path.rename"),
     ]
 
 
@@ -246,25 +245,25 @@ def apply_base_patches(base_patches):
         yield
 
 
-def test_launch_command_success(runner, mock_launch_output, path_exists, debug_helper):
+def test_launch_command_success(
+    runner,
+    mock_launch_output,
+    path_exists,
+    debug_helper,
+    mock_truediv,
+    test_paths,
+    base_patches,
+):
     """Test successful model launch with minimal required arguments."""
-    test_log_dir = Path("/tmp/test_vec_inf_logs")
+    with ExitStack() as stack:
+        # Apply all base patches
+        for patch_obj in base_patches:
+            stack.enter_context(patch_obj)
+
+        # Apply specific patches for this test
+        mock_run = stack.enter_context(patch("vec_inf.client._utils.run_bash_command"))
+        stack.enter_context(patch("pathlib.Path.exists", new=path_exists))
 
-    with (
-        patch("vec_inf.client._utils.run_bash_command") as mock_run,
-        patch("pathlib.Path.mkdir"),
-        patch("builtins.open", debug_helper.tracked_mock_open),
-        patch("pathlib.Path.open", debug_helper.tracked_mock_open),
-        patch("pathlib.Path.exists", new=path_exists),
-        patch("pathlib.Path.expanduser", return_value=test_log_dir),
-        patch("pathlib.Path.resolve", return_value=debug_helper.config_file.parent),
-        patch(
-            "pathlib.Path.parent", return_value=debug_helper.config_file.parent.parent
-        ),
-        patch("json.dump"),
-        patch("pathlib.Path.touch"),
-        patch("pathlib.Path.__truediv__", return_value=test_log_dir),
-    ):
         expected_job_id = "14933053"
         mock_run.return_value = mock_launch_output(expected_job_id)
 
@@ -277,25 +276,24 @@ def test_launch_command_success(runner, mock_launch_output, path_exists, debug_h
 
 
 def test_launch_command_with_json_output(
-    runner, mock_launch_output, path_exists, debug_helper
+    runner,
+    mock_launch_output,
+    path_exists,
+    debug_helper,
+    mock_truediv,
+    test_paths,
+    base_patches,
 ):
     """Test JSON output format for launch command."""
-    test_log_dir = Path("/tmp/test_vec_inf_logs")
-    with (
-        patch("vec_inf.client._utils.run_bash_command") as mock_run,
-        patch("pathlib.Path.mkdir"),
-        patch("builtins.open", debug_helper.tracked_mock_open),
-        patch("pathlib.Path.open", debug_helper.tracked_mock_open),
-        patch("pathlib.Path.exists", new=path_exists),
-        patch("pathlib.Path.expanduser", return_value=test_log_dir),
-        patch("pathlib.Path.resolve", return_value=debug_helper.config_file.parent),
-        patch(
-            "pathlib.Path.parent", return_value=debug_helper.config_file.parent.parent
-        ),
-        patch("json.dump"),
-        patch("pathlib.Path.touch"),
-        patch("pathlib.Path.__truediv__", return_value=test_log_dir),
-    ):
+    with ExitStack() as stack:
+        # Apply all base patches
+        for patch_obj in base_patches:
+            stack.enter_context(patch_obj)
+
+        # Apply specific patches for this test
+        mock_run = stack.enter_context(patch("vec_inf.client._utils.run_bash_command"))
+        stack.enter_context(patch("pathlib.Path.exists", new=path_exists))
+
         expected_job_id = "14933051"
         mock_run.return_value = mock_launch_output(expected_job_id)
 
@@ -319,7 +317,7 @@ def test_launch_command_with_json_output(
         assert output.get("slurm_job_id") == expected_job_id
         assert output.get("model_name") == "Meta-Llama-3.1-8B"
         assert output.get("model_type") == "LLM"
-        assert str(test_log_dir) in output.get("log_dir", "")
+        assert str(test_paths["log_dir"]) in output.get("log_dir", "")
 
 
 def test_launch_command_no_model_weights_parent_dir(runner, debug_helper, base_patches):

diff --git a/vec_inf/cli/_helper.py b/vec_inf/cli/_helper.py
@@ -1,6 +1,6 @@
 """Helper classes for the CLI."""
 
-import os
+from pathlib import Path
 from typing import Any, Union
 
 import click
@@ -59,9 +59,10 @@ def format_table_output(self) -> Table:
             )
         if self.params.get("enforce_eager"):
             table.add_row("Enforce Eager", self.params["enforce_eager"])
-
-        # Add path details
-        table.add_row("Model Weights Directory", os.environ.get("MODEL_WEIGHTS"))
+        table.add_row(
+            "Model Weights Directory",
+            str(Path(self.params["model_weights_parent_dir"], self.model_name)),
+        )
         table.add_row("Log Directory", self.params["log_dir"])
 
         return table

diff --git a/vec_inf/client/_helper.py b/vec_inf/client/_helper.py
@@ -25,12 +25,14 @@
     ModelType,
     StatusResponse,
 )
+from vec_inf.client._slurm_script_generator import SlurmScriptGenerator
 from vec_inf.client._vars import (
     BOOLEAN_FIELDS,
     LD_LIBRARY_PATH,
     REQUIRED_FIELDS,
+    SINGULARITY_IMAGE,
     SRC_DIR,
-    VLLM_TASK_MAP,
+    VLLM_NCCL_SO_PATH,
 )
 
 
@@ -50,6 +52,7 @@ def __init__(self, model_name: str, kwargs: Optional[dict[str, Any]]):
         self.model_name = model_name
         self.kwargs = kwargs or {}
         self.slurm_job_id = ""
+        self.slurm_script_path = Path("")
         self.model_config = self._get_model_configuration()
         self.params = self._get_launch_params()
 
@@ -137,31 +140,9 @@ def _get_launch_params(self) -> dict[str, Any]:
 
     def _set_env_vars(self) -> None:
         """Set environment variables for the launch command."""
-        os.environ["MODEL_NAME"] = self.model_name
-        os.environ["MAX_MODEL_LEN"] = self.params["max_model_len"]
-        os.environ["MAX_LOGPROBS"] = self.params["vocab_size"]
-        os.environ["DATA_TYPE"] = self.params["data_type"]
-        os.environ["MAX_NUM_SEQS"] = self.params["max_num_seqs"]
-        os.environ["GPU_MEMORY_UTILIZATION"] = self.params["gpu_memory_utilization"]
-        os.environ["TASK"] = VLLM_TASK_MAP[self.params["model_type"]]
-        os.environ["PIPELINE_PARALLELISM"] = self.params["pipeline_parallelism"]
-        os.environ["COMPILATION_CONFIG"] = self.params["compilation_config"]
-        os.environ["SRC_DIR"] = SRC_DIR
-        os.environ["MODEL_WEIGHTS"] = str(
-            Path(self.params["model_weights_parent_dir"], self.model_name)
-        )
         os.environ["LD_LIBRARY_PATH"] = LD_LIBRARY_PATH
-        os.environ["VENV_BASE"] = self.params["venv"]
-        os.environ["LOG_DIR"] = self.params["log_dir"]
-
-        if self.params.get("enable_prefix_caching"):
-            os.environ["ENABLE_PREFIX_CACHING"] = self.params["enable_prefix_caching"]
-        if self.params.get("enable_chunked_prefill"):
-            os.environ["ENABLE_CHUNKED_PREFILL"] = self.params["enable_chunked_prefill"]
-        if self.params.get("max_num_batched_tokens"):
-            os.environ["MAX_NUM_BATCHED_TOKENS"] = self.params["max_num_batched_tokens"]
-        if self.params.get("enforce_eager"):
-            os.environ["ENFORCE_EAGER"] = self.params["enforce_eager"]
+        os.environ["VLLM_NCCL_SO_PATH"] = VLLM_NCCL_SO_PATH
+        os.environ["SINGULARITY_IMAGE"] = SINGULARITY_IMAGE
 
     def _build_launch_command(self) -> str:
         """Construct the full launch command with parameters."""
@@ -187,10 +168,10 @@ def _build_launch_command(self) -> str:
             ]
         )
         # Add slurm script
-        slurm_script = "vllm.slurm"
-        if int(self.params["num_nodes"]) > 1:
-            slurm_script = "multinode_vllm.slurm"
-        command_list.append(f"{SRC_DIR}/{slurm_script}")
+        self.slurm_script_path = SlurmScriptGenerator(
+            self.params, SRC_DIR
+        ).write_to_log_dir()
+        command_list.append(str(self.slurm_script_path))
         return " ".join(command_list)
 
     def launch(self) -> LaunchResponse:
@@ -207,15 +188,22 @@ def launch(self) -> LaunchResponse:
         self.slurm_job_id = command_output.split(" ")[-1].strip().strip("\n")
         self.params["slurm_job_id"] = self.slurm_job_id
 
-        # Create log directory and job json file
+        # Create log directory and job json file, move slurm script to job log directory
+        job_log_dir = Path(
+            self.params["log_dir"], f"{self.model_name}.{self.slurm_job_id}"
+        )
+        job_log_dir.mkdir(parents=True, exist_ok=True)
+
         job_json = Path(
-            self.params["log_dir"],
-            f"{self.model_name}.{self.slurm_job_id}",
+            job_log_dir,
             f"{self.model_name}.{self.slurm_job_id}.json",
         )
-        job_json.parent.mkdir(parents=True, exist_ok=True)
         job_json.touch(exist_ok=True)
 
+        self.slurm_script_path.rename(
+            job_log_dir / f"{self.model_name}.{self.slurm_job_id}.slurm"
+        )
+
         with job_json.open("w") as file:
             json.dump(self.params, file, indent=4)