[Bug] Fix AttributeError: 'QKVParallelLinear' object has no attribute 'orig_dtype' (#25958)

yewentao256 · web-flow · commit e6a226efba6f · 2025-09-30T11:13:03.000-07:00
Signed-off-by: yewentao256 &lt;zhyanwentao@126.com&gt;
diff --git a/vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w8a8_fp8.py b/vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w8a8_fp8.py
@@ -66,6 +66,7 @@ def create_weights(self, layer: torch.nn.Module,
         output_size_per_partition = sum(output_partition_sizes)
         layer.logical_widths = output_partition_sizes
         layer.weight_block_size = None
+        layer.orig_dtype = params_dtype
 
         if self.strategy == QuantizationStrategy.BLOCK:
             assert self.weight_block_size is not None