fix test

jenchen13 · jenchen13 · commit a106dd9d0834 · 2025-10-09T20:38:17.000Z
Signed-off-by: Jennifer Chen &lt;jennifchen@nvidia.com&gt;
diff --git a/examples/nemo_run/qat/README.md b/examples/nemo_run/qat/README.md
@@ -59,13 +59,12 @@ You can run the example either locally  or on a [Slurm cluster](ADVANCED.md).
 To run the example locally, launch a [NeMo container](https://catalog.ngc.nvidia.com/orgs/nvidia/containers/nemo) with version 25.09 or higher. Clone the `TensorRT-Model-Optimizer` repository and `NeMo` repository (checkout a specific commit for NeMo), then mount it onto your docker container.
 
 - `git clone https://github.com/NVIDIA/TensorRT-Model-Optimizer.git`
-- `git clone https://github.com/NVIDIA-NeMo/NeMo.git`
 - `git clone https://github.com/NVIDIA/Megatron-LM.git`
 
 Example docker command:
 
 ```bash
-docker run -v  /home/user/:/home/user/ -v /home/user/NeMo:/opt/NeMo -v /home/user/TensorRT-Model-Optimizer/modelopt/:/usr/local/lib/python3.12/dist-packages/modelopt -v /home/user/Megatron-LM:/opt/megatron-lm --gpus all -it --shm-size 20g --rm nvcr.io/nvidia/nemo:25.09 bash
+docker run -v  /home/user/:/home/user/  -v /home/user/TensorRT-Model-Optimizer/modelopt/:/usr/local/lib/python3.12/dist-packages/modelopt -v /home/user/Megatron-LM:/opt/megatron-lm --gpus all -it --shm-size 20g --rm nvcr.io/nvidia/nemo:25.09 bash
 ```
 
 You will also need to set your Huggingface token with `export HF_TOKEN=<your-token>`. You may also need to enable write access to the docker container to the `examples/nemo_run` folder by doing `chmod 777 nemo_run` so that logs can be written.
diff --git a/tests/_test_utils/torch_dist/plugins/megatron_common.py b/tests/_test_utils/torch_dist/plugins/megatron_common.py
@@ -379,9 +379,7 @@ def run_mcore_inference(
     )
 
     # Note: This is returned in all TP ranks or last PP stage in PP models
-    print("inference_input size", inference_input["tokens"].shape)
     logits = wrapped_model.run_one_forward_step(inference_input)
-    print("logits size", logits.shape)
     logits = broadcast_from_last_pipeline_stage(
         [batch_size, model.max_sequence_length, model.vocab_size],
         dtype=torch.bfloat16 if model.config.bf16 else torch.float32,
diff --git a/tests/gpu/torch/quantization/plugins/test_apex.py b/tests/gpu/torch/quantization/plugins/test_apex.py
@@ -23,7 +23,7 @@
 from _test_utils.torch_quantization.models import RegularQuantModelForTP
 from _test_utils.torch_quantization.quantize_common import (
     auto_quantize_helper,
-    tensor_parallel_test_helper,
+    data_tensor_context_parallel_test_helper,
 )
 
 import modelopt.torch.quantization as mtq
@@ -58,7 +58,11 @@ def forward(self, x):
                 x = x[0]
         return x
 
-    def get_dummy_input(self):
+    def get_dummy_input(self, seed: int | None = None):
+        if seed is not None:
+            gen = torch.Generator()
+            gen.manual_seed(seed)
+            return torch.randn(1, 4, 32, generator=gen)
         return torch.randn(1, 4, 32)
 
 
@@ -106,8 +110,11 @@ def _test_tensor_parallel_helper(config, rank, size):
     model_parallel_cuda_manual_seed(SEED)
     model = ApexModel().cuda()
 
-    tensor_parallel_test_helper(
-        model, config, get_tensor_model_parallel_group(), get_data_parallel_group()
+    data_tensor_context_parallel_test_helper(
+        model,
+        config,
+        tp_group=get_tensor_model_parallel_group(),
+        dp_group=get_data_parallel_group(),
     )
 
 

Original file line number	Diff line number	Diff line change
`@@ -379,9 +379,7 @@ def run_mcore_inference(`
`379`	`379`	`)`
`380`	`380`
`381`	`381`	`# Note: This is returned in all TP ranks or last PP stage in PP models`
`382`		`- print("inference_input size", inference_input["tokens"].shape)`
`383`	`382`	`logits = wrapped_model.run_one_forward_step(inference_input)`
`384`		`- print("logits size", logits.shape)`
`385`	`383`	`logits = broadcast_from_last_pipeline_stage(`
`386`	`384`	`[batch_size, model.max_sequence_length, model.vocab_size],`
`387`	`385`	`dtype=torch.bfloat16 if model.config.bf16 else torch.float32,`