fix test

jenchen13 · jenchen13 · commit 1f7d17ecca4b · 2025-09-26T01:07:13.000Z
Signed-off-by: Jennifer Chen &lt;jennifchen@nvidia.com&gt;
diff --git a/tests/_test_utils/torch_quantization/quantize_common.py b/tests/_test_utils/torch_quantization/quantize_common.py
@@ -23,6 +23,7 @@
 import modelopt.torch.opt as mto
 import modelopt.torch.quantization as mtq
 from modelopt.torch.quantization.backends.gemm_registry import enable_real_quant_gemm
+from modelopt.torch.quantization.nn.modules.tensor_quantizer import SequentialQuantizer
 from modelopt.torch.quantization.utils import is_quantized_linear
 from modelopt.torch.utils import torch_to
 
@@ -150,56 +151,35 @@ def forward_loop(model):
     dist.destroy_process_group()
 
 
-def data_parallel_test_helper(model, config, dp_group):
+def dp_cp_parallel_test_helper(model, config, group):
     calib_data = model.get_dummy_input().cuda()
 
     def forward_loop(model):
         model(calib_data)
 
     model = mtq.quantize(model, config, forward_loop)
 
-    # Input quantizer amax
-    if config not in [mtq.INT4_BLOCKWISE_WEIGHT_ONLY_CFG, mtq.INT4_AWQ_CFG]:
-        fc1_amax = model.fc1.input_quantizer.amax.clone()
-        dist.all_reduce(fc1_amax, op=dist.ReduceOp.MAX, group=dp_group)
-        assert torch.allclose(fc1_amax, model.fc1.input_quantizer.amax)
-        fc2_amax = model.fc2.input_quantizer.amax.clone()
-        dist.all_reduce(fc2_amax, op=dist.ReduceOp.MAX, group=dp_group)
-        assert torch.allclose(fc2_amax, model.fc2.input_quantizer.amax)
-
-    # Weight quantizer amax
-    fc1_amax = model.fc1.weight_quantizer.amax.clone()
-    dist.all_reduce(fc1_amax, op=dist.ReduceOp.MAX, group=dp_group)
-    assert torch.allclose(fc1_amax, model.fc1.weight_quantizer.amax)
-    fc2_amax = model.fc2.weight_quantizer.amax.clone()
-    dist.all_reduce(fc2_amax, op=dist.ReduceOp.MAX, group=dp_group)
-    assert torch.allclose(fc2_amax, model.fc2.weight_quantizer.amax)
-
-
-def context_parallel_test_helper(model, config, cp_group):
-    calib_data = model.get_dummy_input().cuda()
-
-    def forward_loop(model):
-        model(calib_data)
-
-    model = mtq.quantize(model, config, forward_loop)
+    def reduce_amax(quantizer):
+        amax = quantizer.amax.clone()
+        dist.all_reduce(amax, op=dist.ReduceOp.MAX, group=group)
+        assert torch.allclose(amax, quantizer.amax)
 
     # Input quantizer amax
     if config not in [mtq.INT4_BLOCKWISE_WEIGHT_ONLY_CFG, mtq.INT4_AWQ_CFG]:
-        fc1_amax = model.fc1.input_quantizer.amax.clone()
-        dist.all_reduce(fc1_amax, op=dist.ReduceOp.MAX, group=cp_group)
-        assert torch.allclose(fc1_amax, model.fc1.input_quantizer.amax)
-        fc2_amax = model.fc2.input_quantizer.amax.clone()
-        dist.all_reduce(fc2_amax, op=dist.ReduceOp.MAX, group=cp_group)
-        assert torch.allclose(fc2_amax, model.fc2.input_quantizer.amax)
+        reduce_amax(model.fc1.input_quantizer)
+        reduce_amax(model.fc2.input_quantizer)
 
     # Weight quantizer amax
-    fc1_weight_amax = model.fc1.weight_quantizer.amax.clone()
-    dist.all_reduce(fc1_weight_amax, op=dist.ReduceOp.MAX, group=cp_group)
-    assert torch.allclose(fc1_weight_amax, model.fc1.weight_quantizer.amax)
-    fc2_weight_amax = model.fc2.weight_quantizer.amax.clone()
-    dist.all_reduce(fc2_weight_amax, op=dist.ReduceOp.MAX, group=cp_group)
-    assert torch.allclose(fc2_weight_amax, model.fc2.weight_quantizer.amax)
+    if isinstance(model.fc1.weight_quantizer, SequentialQuantizer):
+        for quantizer in model.fc1.weight_quantizer:
+            reduce_amax(quantizer)
+    else:
+        reduce_amax(model.fc1.weight_quantizer)
+    if isinstance(model.fc2.weight_quantizer, SequentialQuantizer):
+        for quantizer in model.fc2.weight_quantizer:
+            reduce_amax(quantizer)
+    else:
+        reduce_amax(model.fc2.weight_quantizer)
 
 
 def data_tensor_context_parallel_test_helper(model, config, dp_group, tp_group, cp_group):
@@ -212,29 +192,29 @@ def forward_loop(model):
 
     model = mtq.quantize(model, config, forward_loop)
 
+    def reduce_amax(quantizer):
+        amax = quantizer.amax.clone()
+        dist.all_reduce(amax, op=dist.ReduceOp.MAX, group=tp_group)
+        dist.all_reduce(amax, op=dist.ReduceOp.MAX, group=cp_group)
+        dist.all_reduce(amax, op=dist.ReduceOp.MAX, group=dp_group)
+        assert torch.allclose(amax, quantizer.amax)
+
     # Input quantizer amax
     if config not in [mtq.INT4_BLOCKWISE_WEIGHT_ONLY_CFG, mtq.INT4_AWQ_CFG]:
-        fc1_amax = model.fc1.input_quantizer.amax.clone()
-        dist.all_reduce(fc1_amax, op=dist.ReduceOp.MAX, group=tp_group)
-        dist.all_reduce(fc1_amax, op=dist.ReduceOp.MAX, group=cp_group)
-        dist.all_reduce(fc1_amax, op=dist.ReduceOp.MAX, group=dp_group)
-        assert torch.allclose(fc1_amax, model.fc1.input_quantizer.amax)
-        fc2_amax = model.fc2.input_quantizer.amax.clone()
-        dist.all_reduce(fc2_amax, op=dist.ReduceOp.MAX, group=tp_group)
-        dist.all_reduce(fc2_amax, op=dist.ReduceOp.MAX, group=cp_group)
-        dist.all_reduce(fc2_amax, op=dist.ReduceOp.MAX, group=dp_group)
-        assert torch.allclose(fc2_amax, model.fc2.input_quantizer.amax)
-
-    fc1_amax = model.fc1.weight_quantizer.amax.clone()
-    dist.all_reduce(fc1_amax, op=dist.ReduceOp.MAX, group=tp_group)
-    dist.all_reduce(fc1_amax, op=dist.ReduceOp.MAX, group=cp_group)
-    dist.all_reduce(fc1_amax, op=dist.ReduceOp.MAX, group=dp_group)
-    assert torch.allclose(fc1_amax, model.fc1.weight_quantizer.amax)
-    fc2_amax = model.fc2.weight_quantizer.amax.clone()
-    dist.all_reduce(fc2_amax, op=dist.ReduceOp.MAX, group=tp_group)
-    dist.all_reduce(fc2_amax, op=dist.ReduceOp.MAX, group=cp_group)
-    dist.all_reduce(fc2_amax, op=dist.ReduceOp.MAX, group=dp_group)
-    assert torch.allclose(fc2_amax, model.fc2.weight_quantizer.amax)
+        reduce_amax(model.fc1.input_quantizer)
+        reduce_amax(model.fc2.input_quantizer)
+
+    if isinstance(model.fc1.weight_quantizer, SequentialQuantizer):
+        for quantizer in model.fc1.weight_quantizer:
+            reduce_amax(quantizer)
+    else:
+        reduce_amax(model.fc1.weight_quantizer)
+
+    if isinstance(model.fc2.weight_quantizer, SequentialQuantizer):
+        for quantizer in model.fc2.weight_quantizer:
+            reduce_amax(quantizer)
+    else:
+        reduce_amax(model.fc2.weight_quantizer)
 
 
 def auto_quantize_helper(model):
diff --git a/tests/gpu/torch/quantization/plugins/test_megatron.py b/tests/gpu/torch/quantization/plugins/test_megatron.py
@@ -31,9 +31,8 @@
 from _test_utils.torch_quantization.quant_utils import get_model_size
 from _test_utils.torch_quantization.quantize_common import (
     auto_quantize_helper,
-    context_parallel_test_helper,
-    data_parallel_test_helper,
     data_tensor_context_parallel_test_helper,
+    dp_cp_parallel_test_helper,
     tensor_parallel_test_helper,
 )
 from packaging.version import Version
@@ -128,7 +127,7 @@ def _test_data_parallel_helper(config, rank, size):
     initialize_for_megatron(seed=SEED)
     model = MegatronModel().cuda()
 
-    data_parallel_test_helper(model, config, get_data_parallel_group())
+    dp_cp_parallel_test_helper(model, config, get_data_parallel_group())
 
 
 @pytest.mark.parametrize(
@@ -152,7 +151,7 @@ def _test_context_parallel_helper(config, rank, size):
     initialize_for_megatron(context_parallel_size=size, seed=SEED)
     model = MegatronModel(cp_size=size).cuda()
 
-    context_parallel_test_helper(model, config, get_context_parallel_group())
+    dp_cp_parallel_test_helper(model, config, get_context_parallel_group())
 
 
 @pytest.mark.parametrize(