update

DN6 · DN6 · commit c4b6e24fe553 · 2025-02-20T05:23:49.000+01:00
diff --git a/docs/source/en/quantization/quanto.md b/docs/source/en/quantization/quanto.md
@@ -91,19 +91,30 @@ model = FluxTransformer2DModel.from_pretrained("<your quantized model save path>
 
 ## Using `torch.compile` with Quanto
 
-Currently the Quanto backend only supports `torch.compile` for `int8` weights and activations.
+Currently the Quanto backend supports `torch.compile` for the following quantization types:
+
+- `int8` weights 
 
 ```python
 import torch
-from diffusers import FluxTransformer2DModel, QuantoConfig
+from diffusers import FluxPipeline, FluxTransformer2DModel, QuantoConfig
 
 model_id = "black-forest-labs/FLUX.1-dev"
 quantization_config = QuantoConfig(weights="int8")
-transformer = FluxTransformer2DModel.from_pretrained(model_id, quantization_config=quantization_config, torch_dtype=torch.bfloat16)
+transformer = FluxTransformer2DModel.from_pretrained(
+    model_id,
+    subfolder="transformer",
+    quantization_config=quantization_config,
+    torch_dtype=torch.bfloat16,
+)
 transformer = torch.compile(transformer, mode="max-autotune", fullgraph=True)
 
-pipe = FluxPipeline.from_pretrained(model_id, transformer=transformer, torch_dtype=torch_dtype)
+pipe = FluxPipeline.from_pretrained(
+    model_id, transformer=transformer, torch_dtype=torch_dtype
+)
 pipe.to("cuda")
+images = pipe("A cat holding a sign that says hello").images[0]
+images.save("flux-quanto.png")
 ```
 
 ## Supported Quantization Types
diff --git a/setup.py b/setup.py
@@ -236,6 +236,11 @@ def run(self):
 )
 extras["torch"] = deps_list("torch", "accelerate")
 
+extras["bitsandbytes"] = deps_list("bitsandbytes", "accelerate")
+extras["gguf"] = deps_list("gguf", "accelerate")
+extras["quanto"] = deps_list("quanto", "accelerate")
+extras["torchao"] = deps_list("torchao", "accelerate")
+
 if os.name == "nt":  # windows
     extras["flax"] = []  # jax is not supported on windows
 else:
diff --git a/src/diffusers/quantizers/auto.py b/src/diffusers/quantizers/auto.py
@@ -37,16 +37,16 @@
     "bitsandbytes_4bit": BnB4BitDiffusersQuantizer,
     "bitsandbytes_8bit": BnB8BitDiffusersQuantizer,
     "gguf": GGUFQuantizer,
-    "torchao": TorchAoHfQuantizer,
     "quanto": QuantoQuantizer,
+    "torchao": TorchAoHfQuantizer,
 }
 
 AUTO_QUANTIZATION_CONFIG_MAPPING = {
     "bitsandbytes_4bit": BitsAndBytesConfig,
     "bitsandbytes_8bit": BitsAndBytesConfig,
     "gguf": GGUFQuantizationConfig,
-    "torchao": TorchAoConfig,
     "quanto": QuantoConfig,
+    "torchao": TorchAoConfig,
 }
 
 
diff --git a/src/diffusers/quantizers/quantization_config.py b/src/diffusers/quantizers/quantization_config.py
@@ -695,14 +695,14 @@ class QuantoConfig(QuantizationConfigMixin):
 
     def __init__(
         self,
-        weights="int8",
-        activations=None,
+        weights_dtype="int8",
+        activations_dtype=None,
         modules_to_not_convert: Optional[List] = None,
         **kwargs,
     ):
         self.quant_method = QuantizationMethod.QUANTO
-        self.weights = weights
-        self.activations = activations
+        self.weights_dtype = weights_dtype
+        self.activations_dtype = activations_dtype
         self.modules_to_not_convert = modules_to_not_convert
 
         self.post_init()
@@ -713,8 +713,8 @@ def post_init(self):
         """
         accepted_weights = ["float8", "int8", "int4", "int2"]
         accepted_activations = [None, "int8", "float8"]
-        if self.weights not in accepted_weights:
-            raise ValueError(f"Only support weights in {accepted_weights} but found {self.weights}")
+        if self.weights_dtype not in accepted_weights:
+            raise ValueError(f"Only support weights in {accepted_weights} but found {self.weights_dtype}")
 
-        if self.activations not in accepted_activations:
-            raise ValueError(f"Only support weights in {accepted_activations} but found {self.activations}")
+        if self.activations_dtype not in accepted_activations:
+            raise ValueError(f"Only support weights in {accepted_activations} but found {self.activations_dtype}")
diff --git a/src/diffusers/quantizers/quanto/utils.py b/src/diffusers/quantizers/quanto/utils.py
@@ -39,8 +39,8 @@ def _replace_layers(model, quantization_config, modules_to_not_convert):
                         out_features=module.out_features,
                         bias=module.bias is not None,
                         dtype=module.weight.dtype,
-                        weights=_get_weight_type(quantization_config.weights),
-                        activations=_get_activation_type(quantization_config.activations),
+                        weights=_get_weight_type(quantization_config.weights_dtype),
+                        activations=_get_activation_type(quantization_config.activations_dtype),
                     )
                     model._modules[name] = qlinear
                     model._modules[name].source_cls = type(module)
diff --git a/tests/quantization/quanto/test_quanto.py b/tests/quantization/quanto/test_quanto.py