Update testing for trtllm-bench dataset.

FrankD412 · FrankD412 · commit 5fe028cd4180 · 2025-11-19T15:54:56.000-08:00
Signed-off-by: Frank Di Natale &lt;3429989+FrankD412@users.noreply.github.com&gt;
diff --git a/tests/integration/defs/perf/README_release_test.md b/tests/integration/defs/perf/README_release_test.md
@@ -24,27 +24,25 @@ For trtllm-bench, the test extracts the following key performance metrics from l
 
 #### Without LoRA
 ```python
-prepare_data_script = os.path.join(self._llm_root, "benchmarks", "cpp", "prepare_dataset.py")
 data_cmd += [
-    "python3", prepare_data_script, "--stdout",
-    f"--tokenizer={tokenizer_dir}", f"token-norm-dist",
-    f"--num-requests={self._config.num_reqs}",
-    f"--input-mean={input_len}", f"--output-mean={output_len}",
-    f"--input-stdev={istdev}", f"--output-stdev={ostdev}",
-    f" > {dataset_path}"
+    "trtllm-bench", f"--model={tokenizer_dir}",
+        "dataset", "--output", dataset_path, "token-norm-dist",
+        f"--num-requests={self._config.num_reqs}",
+        f"--input-mean={input_len}", f"--output-mean={output_len}",
+        f"--input-stdev={istdev}", f"--output-stdev={ostdev}"
 ]
 ```
 
 #### With LoRA
 ```python
-"python3", prepare_data_script, f"--stdout",
+"trtllm-bench", f"--model={tokenizer_dir}",
+    "dataset", "--output", dataset_path,
     f"--rand-task-id 0 {nloras-1}",
-    f"--tokenizer={tokenizer_dir}", f"--lora-dir={lora_dir}",
+    f"--lora-dir={lora_dir}",
     f"token-norm-dist",
     f"--num-requests={self._config.num_reqs}",
     f"--input-mean={input_len}", f"--output-mean={output_len}",
-    f"--input-stdev={istdev}", f"--output-stdev={ostdev}",
-    f" > {dataset_path}"
+    f"--input-stdev={istdev}", f"--output-stdev={ostdev}"
 ```
 
 ### 2.2 PyTorch Configuration Generation
diff --git a/tests/integration/defs/perf/test_perf.py b/tests/integration/defs/perf/test_perf.py
@@ -1628,6 +1628,12 @@ def get_prepare_data_command(self, engine_dir, input_len,
                                          "llama-7b-hf")
         if not os.path.exists(engine_dir):
             os.makedirs(engine_dir, exist_ok=True)
+        if self._config.num_loras > 0:
+            istdev = 16
+            ostdev = 24
+            nloras = self._config.num_loras
+            dataset_path = os.path.join(engine_dir, "synthetic_data.json")
+
         if self._config.num_loras > 0:
             istdev = 16
             ostdev = 24
@@ -1653,14 +1659,13 @@ def get_prepare_data_command(self, engine_dir, input_len,
                     self.lora_dirs.append(f"{lora_dir}/{i}")
                     data_cmd += [f"ln -sf {lora_path} {lora_dir}/{i}", ";"]
                 data_cmd += [
-                    "python3", prepare_data_script, f"--stdout",
-                    f"--rand-task-id 0 {nloras-1}",
-                    f"--tokenizer={tokenizer_dir}", f"--lora-dir={lora_dir}",
+                    "trtllm-bench", f"--model={tokenizer_dir}", "dataset",
+                    "--output", f"{dataset_path}",
+                    f"--rand-task-id 0 {nloras-1}", f"--lora-dir={lora_dir}",
                     f"token-norm-dist",
                     f"--num-requests={self._config.num_reqs}",
                     f"--input-mean={input_len}", f"--output-mean={output_len}",
-                    f"--input-stdev={istdev}", f"--output-stdev={ostdev}",
-                    f" > {dataset_path}"
+                    f"--input-stdev={istdev}", f"--output-stdev={ostdev}"
                 ]
 
             else:
@@ -1673,12 +1678,11 @@ def get_prepare_data_command(self, engine_dir, input_len,
             dataset_path = os.path.join(engine_dir, "synthetic_data.json")
             if self._build_script == 'trtllm-bench':
                 data_cmd += [
-                    "python3", prepare_data_script, "--stdout",
-                    f"--tokenizer={tokenizer_dir}", f"token-norm-dist",
+                    "trtllm-bench", f"--model={tokenizer_dir}", "dataset",
+                    "--output", f"{dataset_path}", "token-norm-dist",
                     f"--num-requests={self._config.num_reqs}",
                     f"--input-mean={input_len}", f"--output-mean={output_len}",
-                    f"--input-stdev={istdev}", f"--output-stdev={ostdev}",
-                    f" > {dataset_path}"
+                    f"--input-stdev={istdev}", f"--output-stdev={ostdev}"
                 ]
             else:
                 data_cmd += [
diff --git a/tests/integration/defs/perf/utils.py b/tests/integration/defs/perf/utils.py
@@ -450,8 +450,8 @@ def run_ex(self,
         self._gpu_clock_lock = gpu_clock_lock
         tmpDir = temp_wd(self.get_working_dir())
 
-        is_prepare_dataset_cmd = 'prepare_dataset' in commands.get_cmd_str(
-            cmd_idx)
+        cmd_str = commands.get_cmd_str(cmd_idx)
+        is_prepare_dataset_cmd = 'prepare_dataset' in cmd_str or "dataset --output" in cmd_str
 
         # Start the timer.
         self._start_timestamp = datetime.utcnow()
diff --git a/tests/integration/defs/test_e2e.py b/tests/integration/defs/test_e2e.py
@@ -489,16 +489,15 @@ def __call__(self):
         return self.run_bench()
 
     def prepare_dataset(self):
-        dataset_tool = Path(self.llm_root, "benchmarks", "cpp",
-                            "prepare_dataset.py")
-
         # Generate a small dataset to run a test.
         self.work_dir.mkdir(parents=True)
         command = [
-            f"{dataset_tool.resolve()}",
-            "--stdout",
-            "--tokenizer",
+            "trtllm-bench",
+            "--model",
             f"{self.model_path}",
+            "dataset",
+            "--output",
+            f"{self.dataset_path}",
             "token-norm-dist",
             "--input-mean",
             "128",
@@ -512,13 +511,6 @@ def prepare_dataset(self):
             str(self.num_requests),
         ]
         print(f"Running command: {' '.join(command)}")
-        dataset_output = self.llm_venv.run_cmd(
-            command,
-            caller=check_output,
-        )
-        # Grab the stdout and write it to a dataset file for passing to suite.
-        with open(self.dataset_path, "w") as dataset:
-            dataset.write(dataset_output)
 
     def build_engine(self):
         if self.skip_engine_build:
@@ -769,7 +761,6 @@ def trtllm_bench_prolog(
     stream_mode = "streaming" if streaming else "non-streaming"
     benchmark_name = f"trtllm-bench-sanity-{quant_name}-{stream_mode}"
     benchmark_name += "-pytorch-backend" if skip_engine_build else benchmark_name
-    dataset_tool = Path(llm_root, "benchmarks", "cpp", "prepare_dataset.py")
 
     work_dir = Path(tempfile.TemporaryDirectory().name
                     ) if skip_engine_build else Path(engine_dir)
@@ -778,29 +769,25 @@ def trtllm_bench_prolog(
     shutil.rmtree(work_dir, ignore_errors=True)
     # Generate a small dataset to run a test.
     work_dir.mkdir(parents=True)
-    dataset_output = llm_venv.run_cmd(
-        [
-            f"{dataset_tool.resolve()}",
-            "--stdout",
-            "--tokenizer",
-            f"{model_path}",
-            "token-norm-dist",
-            "--input-mean",
-            "128",
-            "--output-mean",
-            "128",
-            "--input-stdev",
-            "0",
-            "--output-stdev",
-            "0",
-            "--num-requests",
-            "10",
-        ],
-        caller=check_output,
-    )
-    # Grab the stdout and write it to a dataset file for passing to suite.
-    with open(dataset_path, "w") as dataset:
-        dataset.write(dataset_output)
+    llm_venv.run_cmd([
+        "trtllm-bench",
+        "--model",
+        f"{model_path}",
+        "dataset",
+        "--output",
+        f"{dataset_path}",
+        "token-norm-dist",
+        "--input-mean",
+        "128",
+        "--output-mean",
+        "128",
+        "--input-stdev",
+        "0",
+        "--output-stdev",
+        "0",
+        "--num-requests",
+        "10",
+    ])
 
     if not skip_engine_build:
         build_cmd = \
diff --git a/tests/unittest/_torch/auto_deploy/unit/singlegpu/test_ad_trtllm_bench.py b/tests/unittest/_torch/auto_deploy/unit/singlegpu/test_ad_trtllm_bench.py
@@ -42,16 +42,16 @@ def run_benchmark(
 def prepare_dataset(root_dir: str, temp_dir: str, model_path_or_name: str):
     _DATASET_NAME = "synthetic_128_128.txt"
     dataset_path = Path(temp_dir, _DATASET_NAME)
-    dataset_tool = Path(root_dir, "benchmarks", "cpp", "prepare_dataset.py")
     script_dir = Path(root_dir, "benchmarks", "cpp")
 
     # Generate a small dataset to run a test - matching workload configuration
     command = [
-        "python3",
-        f"{dataset_tool}",
-        "--stdout",
-        "--tokenizer",
+        "trtllm-bench",
+        "--model",
         model_path_or_name,
+        "dataset",
+        "--output",
+        f"{dataset_path}",
         "token-norm-dist",
         "--input-mean",
         "128",
@@ -70,9 +70,7 @@ def prepare_dataset(root_dir: str, temp_dir: str, model_path_or_name: str):
     )
     if result.returncode != 0:
         raise RuntimeError(f"Failed to prepare dataset: {result.stderr}")
-    # Grab the stdout and write it to a dataset file for passing to suite.
-    with open(dataset_path, "w") as dataset:
-        dataset.write(result.stdout)
+
     return dataset_path
 
 
diff --git a/tests/unittest/tools/test_prepare_dataset.py b/tests/unittest/tools/test_prepare_dataset.py
@@ -48,32 +48,31 @@ def temp_lora_dir(self) -> str:
                 task_dir.mkdir(parents=True, exist_ok=True)
             yield str(lora_dir)
 
-    def _build_base_command(self, llm_root: Path) -> List[str]:
+    def _build_base_command(self, output_path: Path) -> List[str]:
         """
         Build the base command for running prepare_dataset.py.
 
         Args:
-            llm_root: Path to the TensorRT LLM root directory
+            output_path: Path to the output dataset file
 
         Returns:
             List[str]: Base command components
 
         Raises:
             pytest.skip: If LLM_MODELS_ROOT is not available
         """
-        script_path = llm_root / _PREPARE_DATASET_SCRIPT_PATH
-        cmd = ["python3", str(script_path)]
+        cmd = ["trtllm-bench"]
 
         # Add required tokenizer argument
         model_cache = llm_models_root()
         if model_cache is None:
             pytest.skip("LLM_MODELS_ROOT not available")
 
         tokenizer_dir = model_cache / _TOKENIZER_SUBPATH
-        cmd.extend(["--tokenizer", str(tokenizer_dir)])
+        cmd.extend(["--model", str(tokenizer_dir)])
 
         # Always add --stdout flag since we parse stdout output
-        cmd.extend(["--stdout"])
+        cmd.extend(["dataset", "--output", f"{output_path}"])
 
         return cmd
 
@@ -109,7 +108,7 @@ def _add_synthetic_data_arguments(self, cmd: List[str]) -> None:
             str(_DEFAULT_OUTPUT_STDEV)
         ])
 
-    def _run_prepare_dataset(self, llm_root: Path, **kwargs) -> str:
+    def _run_prepare_dataset(self, **kwargs) -> str:
         """
         Execute prepare_dataset.py with specified parameters and capture
         output.
@@ -124,13 +123,20 @@ def _run_prepare_dataset(self, llm_root: Path, **kwargs) -> str:
         Raises:
             subprocess.CalledProcessError: If the command execution fails
         """
-        cmd = self._build_base_command(llm_root)
-        self._add_lora_arguments(cmd, **kwargs)
-        self._add_synthetic_data_arguments(cmd)
+        with tempfile.TemporaryDirectory() as temp_dir:
+            output_path = Path(temp_dir) / "dataset.jsonl"
+            cmd = self._build_base_command(output_path)
+            self._add_lora_arguments(cmd, **kwargs)
+            self._add_synthetic_data_arguments(cmd)
+
+            # Execute command and capture output
+            subprocess.run(cmd, check=True, cwd=temp_dir)
+
+            data = ""
+            with open(output_path, "r") as f:
+                data = f.read()
 
-        # Execute command and capture output
-        result = subprocess.run(cmd, capture_output=True, text=True, check=True)
-        return result.stdout
+            return data
 
     def _parse_json_output(self, output: str) -> List[Dict[str, Any]]:
         """
@@ -198,7 +204,7 @@ def _validate_lora_request(self,
             },
             id="random_task_id")
     ])
-    def test_lora_metadata_generation(self, llm_root: Path, temp_lora_dir: str,
+    def test_lora_metadata_generation(self, temp_lora_dir: str,
                                       test_params: Dict) -> None:
         """Test LoRA metadata generation with various configurations."""
         # Extract test parameters
@@ -213,7 +219,7 @@ def test_lora_metadata_generation(self, llm_root: Path, temp_lora_dir: str,
         if rand_task_id is not None:
             kwargs["rand_task_id"] = rand_task_id
 
-        output = self._run_prepare_dataset(llm_root, **kwargs)
+        output = self._run_prepare_dataset(**kwargs)
         json_data = self._parse_json_output(output)
 
         assert len(json_data) > 0, f"No JSON data generated for {description}"