Skip to content

Commit b6ba595

Browse files
authored
Default to dequantize if cpu in device_map for mxfp4 (#39993)
* default to dq if cpu * an other check * style * revert some changes
1 parent a5fac1c commit b6ba595

File tree

1 file changed

+8
-1
lines changed

1 file changed

+8
-1
lines changed

src/transformers/quantizers/quantizer_mxfp4.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,14 @@ def validate_environment(self, *args, **kwargs):
6161
return
6262

6363
if not torch.cuda.is_available():
64-
raise RuntimeError("Using MXFP4 quantized models requires a GPU")
64+
if self.pre_quantized:
65+
logger.warning_once(
66+
"Using MXFP4 quantized models requires a GPU, we will default to dequantizing the model to bf16"
67+
)
68+
self.quantization_config.dequantize = True
69+
return
70+
else:
71+
raise RuntimeError("Quantizing a model using MXFP4 requires a GPU")
6572

6673
if not is_accelerate_available():
6774
raise ImportError("Using mxfp4 requires Accelerate: `pip install accelerate`")

0 commit comments

Comments
 (0)