catch input.numel()<1 in _RealQuantMegatronParallelLinear

yeyu-nvidia · yeyu-nvidia · commit 760c583a3048 · 2025-09-08T14:47:59.000-07:00
Signed-off-by: Ye Yu &lt;yeyu@nvidia.com&gt;
diff --git a/modelopt/torch/quantization/plugins/megatron.py b/modelopt/torch/quantization/plugins/megatron.py
@@ -418,8 +418,10 @@ class forward(). This is not desired since _forward_impl introduces much more ar
             while the original forward only takes 1 positional argument. We must above the fallback path
             in RealQuantLinear.forward().
         """
-        if self._should_run_real_quant_gemm and self.get_real_quant_gemm_impl(
-            input, *args, **kwargs
+        if (
+            self._should_run_real_quant_gemm
+            and self.get_real_quant_gemm_impl(input, *args, **kwargs)
+            and input.numel() > 1
         ):
             allreduce_dgrad = kwargs.get("allreduce_dgrad", False)
             tp_group = kwargs.get("tp_group")