We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent bc54694 commit 345e4beCopy full SHA for 345e4be
modelopt/torch/quantization/plugins/vllm.py
@@ -61,7 +61,13 @@ def apply(
61
x = layer.input_quantizer(x)
62
if layer.weight_quantizer.is_enabled:
63
original_weight = layer.weight
64
- layer.weight = layer.weight_quantizer(layer.weight)
+ quantized_tensor = layer.weight_quantizer(layer.weight)
65
+ # parameterize the quantized weight
66
+ if isinstance(original_weight, torch.nn.Parameter):
67
+ quantized_tensor = torch.nn.Parameter(
68
+ quantized_tensor, requires_grad=original_weight.requires_grad
69
+ )
70
+ layer.weight = quantized_tensor
71
output = self.quant_method.apply(layer, x, bias)
72
layer.weight = original_weight
73
else:
0 commit comments