AWQ support

meenchen · meenchen · commit 43cd4b0c61f4 · 2025-10-08T17:37:41.000Z
Signed-off-by: weimingc &lt;17592131+meenchen@users.noreply.github.com&gt;
diff --git a/modelopt/torch/quantization/plugins/vllm.py b/modelopt/torch/quantization/plugins/vllm.py
@@ -61,9 +61,19 @@ def apply(
         x = layer.input_quantizer(x)
         if layer.weight_quantizer.is_enabled:
             original_weight = layer.weight
-            layer.weight = layer.weight_quantizer(layer.weight)
+            _data = None
+            # for parameter, we keep the original data, otherwise we modify the weight
+            quantized_tensor = layer.weight_quantizer(layer.weight)
+            if isinstance(original_weight, torch.nn.Parameter):
+                _data = original_weight.data
+                layer.weight.data = quantized_tensor
+            else:
+                layer.weight = quantized_tensor
             output = self.quant_method.apply(layer, x, bias)
-            layer.weight = original_weight
+            if _data is not None:
+                layer.weight.data = _data
+            else:
+                layer.weight = original_weight
         else:
             output = self.quant_method.apply(layer, x, bias)
         output = layer.output_quantizer(output)