Skip to content

Commit 43cd4b0

Browse files
committed
AWQ support
Signed-off-by: weimingc <[email protected]>
1 parent 900f276 commit 43cd4b0

File tree

1 file changed

+12
-2
lines changed
  • modelopt/torch/quantization/plugins

1 file changed

+12
-2
lines changed

modelopt/torch/quantization/plugins/vllm.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,9 +61,19 @@ def apply(
6161
x = layer.input_quantizer(x)
6262
if layer.weight_quantizer.is_enabled:
6363
original_weight = layer.weight
64-
layer.weight = layer.weight_quantizer(layer.weight)
64+
_data = None
65+
# for parameter, we keep the original data, otherwise we modify the weight
66+
quantized_tensor = layer.weight_quantizer(layer.weight)
67+
if isinstance(original_weight, torch.nn.Parameter):
68+
_data = original_weight.data
69+
layer.weight.data = quantized_tensor
70+
else:
71+
layer.weight = quantized_tensor
6572
output = self.quant_method.apply(layer, x, bias)
66-
layer.weight = original_weight
73+
if _data is not None:
74+
layer.weight.data = _data
75+
else:
76+
layer.weight = original_weight
6777
else:
6878
output = self.quant_method.apply(layer, x, bias)
6979
output = layer.output_quantizer(output)

0 commit comments

Comments
 (0)