minor

realAsma · realAsma · commit 32f8ec993f94 · 2025-09-23T17:53:57.000Z
diff --git a/modelopt/torch/quantization/model_calib.py b/modelopt/torch/quantization/model_calib.py
@@ -369,13 +369,13 @@ def postprocess(module):
     for name, module in model.named_modules():
         if is_quantized_linear(module):
             if not hasattr(module.input_quantizer, "_amax"):
-                print_rank_0(f"Warning: {name} is not calibrated, skip smoothing")
+                warnings.warn(f"{name} is not calibrated, skip smoothing")
                 continue
             if module.input_quantizer.num_bits != 8 or module.weight_quantizer.num_bits != 8:
-                print_rank_0(f"Warning: only int8 smoothing is supported, skip {name}")
+                warnings.warn(f"Only int8 smoothing is supported, skip {name}")
                 continue
             if module.input_quantizer.axis != -1:
-                print_rank_0(f"Warning: only per-channel smoothing is supported, skip {name}")
+                warnings.warn(f"Only per-channel smoothing is supported, skip {name}")
                 continue
 
             assert module.input_quantizer._amax.numel() > 1, (
diff --git a/modelopt/torch/quantization/model_quant.py b/modelopt/torch/quantization/model_quant.py
@@ -108,7 +108,7 @@ def forward_loop(model):
     for name, module in model.named_modules():
         if isinstance(module, TensorQuantizer):
             for attr_name in ["_amax", "_bias", "_pre_quant_scale"]:
-                module.validate_attr(attr_name=attr_name, raise_error=True, name=name)
+                module.validate_attr(attr_name=attr_name, warn_error=True, name=name)
 
     # TODO: Re-enable when the CUDA error: unspecified launch failure is fixed.
     # clear_cuda_cache()