From 909e7152a0ebc808b72e502c36d998e8a2f87504 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Mon, 25 Aug 2025 14:53:15 +0200 Subject: [PATCH 1/5] allow non list components_to_quantize. --- src/diffusers/quantizers/pipe_quant_config.py | 5 ++++- .../test_pipeline_level_quantization.py | 16 ++++++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/src/diffusers/quantizers/pipe_quant_config.py b/src/diffusers/quantizers/pipe_quant_config.py index 5d02de16fd1c..f75a337341a9 100644 --- a/src/diffusers/quantizers/pipe_quant_config.py +++ b/src/diffusers/quantizers/pipe_quant_config.py @@ -48,12 +48,15 @@ def __init__( self, quant_backend: str = None, quant_kwargs: Dict[str, Union[str, float, int, dict]] = None, - components_to_quantize: Optional[List[str]] = None, + components_to_quantize: Optional[Union[List[str], str]] = None, quant_mapping: Dict[str, Union[DiffQuantConfigMixin, "TransformersQuantConfigMixin"]] = None, ): self.quant_backend = quant_backend # Initialize kwargs to be {} to set to the defaults. self.quant_kwargs = quant_kwargs or {} + if components_to_quantize: + if isinstance(components_to_quantize, str): + components_to_quantize = [components_to_quantize] self.components_to_quantize = components_to_quantize self.quant_mapping = quant_mapping self.config_mapping = {} # book-keeping Example: `{module_name: quant_config}` diff --git a/tests/quantization/test_pipeline_level_quantization.py b/tests/quantization/test_pipeline_level_quantization.py index e91fe6d4cbab..31b2bc1ff759 100644 --- a/tests/quantization/test_pipeline_level_quantization.py +++ b/tests/quantization/test_pipeline_level_quantization.py @@ -298,3 +298,19 @@ def _parse_config_string(self, config_string: str) -> tuple[str, dict]: data = json.loads(json_part) return data + + def test_single_component_to_quantize(self): + component_to_quantize = "transformer" + quant_config = PipelineQuantizationConfig( + quant_backend="bitsandbytes_8bit", + quant_kwargs={"load_in_8bit": True}, + components_to_quantize=component_to_quantize, + ) + pipe = DiffusionPipeline.from_pretrained( + self.model_name, + quantization_config=quant_config, + torch_dtype=torch.bfloat16, + ) + for name, component in pipe.components.items(): + if name == component_to_quantize: + self.assertTrue(hasattr(component.config, "quantization_config")) From dea27450f243e1084dcebbffa895b7b73f442113 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Mon, 25 Aug 2025 14:54:58 +0200 Subject: [PATCH 2/5] up --- docs/source/en/quantization/overview.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/source/en/quantization/overview.md b/docs/source/en/quantization/overview.md index 12c39f52e4f3..3370cb1d036c 100644 --- a/docs/source/en/quantization/overview.md +++ b/docs/source/en/quantization/overview.md @@ -34,7 +34,7 @@ Initialize [`~quantizers.PipelineQuantizationConfig`] with the following paramet > [!TIP] > These `quant_kwargs` arguments are different for each backend. Refer to the [Quantization API](../api/quantization) docs to view the arguments for each backend. -- `components_to_quantize` specifies which components of the pipeline to quantize. Typically, you should quantize the most compute intensive components like the transformer. The text encoder is another component to consider quantizing if a pipeline has more than one such as [`FluxPipeline`]. The example below quantizes the T5 text encoder in [`FluxPipeline`] while keeping the CLIP model intact. +- `components_to_quantize` specifies which component(s) of the pipeline to quantize. Typically, you should quantize the most compute intensive components like the transformer. The text encoder is another component to consider quantizing if a pipeline has more than one such as [`FluxPipeline`]. The example below quantizes the T5 text encoder in [`FluxPipeline`] while keeping the CLIP model intact. The example below loads the bitsandbytes backend with the following arguments from [`~quantizers.quantization_config.BitsAndBytesConfig`], `load_in_4bit`, `bnb_4bit_quant_type`, and `bnb_4bit_compute_dtype`. @@ -62,6 +62,8 @@ pipe = DiffusionPipeline.from_pretrained( image = pipe("photo of a cute dog").images[0] ``` +`components_to_quantize` doesn't have to be a list. You can also pass: `components_to_quantize="transformers"`. + ### Advanced quantization The `quant_mapping` argument provides more options for how to quantize each individual component in a pipeline, like combining different quantization backends. From b6f37a0932d4a539fb8200d7637f9c9fe9970e46 Mon Sep 17 00:00:00 2001 From: Sayak Paul Date: Mon, 25 Aug 2025 18:14:44 +0200 Subject: [PATCH 3/5] Apply suggestions from code review --- docs/source/en/quantization/overview.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/en/quantization/overview.md b/docs/source/en/quantization/overview.md index 3370cb1d036c..cab7cba06929 100644 --- a/docs/source/en/quantization/overview.md +++ b/docs/source/en/quantization/overview.md @@ -62,7 +62,7 @@ pipe = DiffusionPipeline.from_pretrained( image = pipe("photo of a cute dog").images[0] ``` -`components_to_quantize` doesn't have to be a list. You can also pass: `components_to_quantize="transformers"`. +`components_to_quantize` doesn't have to be a list. You can also pass: `components_to_quantize="transformer"`. ### Advanced quantization From 15619e7dac7b1e33d737030f69f0309105f87a7a Mon Sep 17 00:00:00 2001 From: Sayak Paul Date: Fri, 5 Sep 2025 06:46:18 +0530 Subject: [PATCH 4/5] Apply suggestions from code review Co-authored-by: Steven Liu <59462357+stevhliu@users.noreply.github.com> --- docs/source/en/quantization/overview.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/source/en/quantization/overview.md b/docs/source/en/quantization/overview.md index cab7cba06929..38abeeac6d4d 100644 --- a/docs/source/en/quantization/overview.md +++ b/docs/source/en/quantization/overview.md @@ -36,6 +36,8 @@ Initialize [`~quantizers.PipelineQuantizationConfig`] with the following paramet - `components_to_quantize` specifies which component(s) of the pipeline to quantize. Typically, you should quantize the most compute intensive components like the transformer. The text encoder is another component to consider quantizing if a pipeline has more than one such as [`FluxPipeline`]. The example below quantizes the T5 text encoder in [`FluxPipeline`] while keeping the CLIP model intact. + `components_to_quantize` accepts either a list for multiple models or a string for a single model. + The example below loads the bitsandbytes backend with the following arguments from [`~quantizers.quantization_config.BitsAndBytesConfig`], `load_in_4bit`, `bnb_4bit_quant_type`, and `bnb_4bit_compute_dtype`. ```py @@ -62,7 +64,6 @@ pipe = DiffusionPipeline.from_pretrained( image = pipe("photo of a cute dog").images[0] ``` -`components_to_quantize` doesn't have to be a list. You can also pass: `components_to_quantize="transformer"`. ### Advanced quantization From 1d8ad5d08e04e1dd583ca8272d823f33a534ecad Mon Sep 17 00:00:00 2001 From: Steven Liu <59462357+stevhliu@users.noreply.github.com> Date: Thu, 4 Sep 2025 18:29:55 -0700 Subject: [PATCH 5/5] [docs] components_to_quantize (#12287) init Co-authored-by: Sayak Paul --- docs/source/en/api/pipelines/cogvideox.md | 2 +- docs/source/en/api/pipelines/hunyuan_video.md | 6 +++--- docs/source/en/using-diffusers/text-img2vid.md | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/source/en/api/pipelines/cogvideox.md b/docs/source/en/api/pipelines/cogvideox.md index 157e987efdb0..ec673e0763c5 100644 --- a/docs/source/en/api/pipelines/cogvideox.md +++ b/docs/source/en/api/pipelines/cogvideox.md @@ -50,7 +50,7 @@ from diffusers.utils import export_to_video pipeline_quant_config = PipelineQuantizationConfig( quant_backend="torchao", quant_kwargs={"quant_type": "int8wo"}, - components_to_quantize=["transformer"] + components_to_quantize="transformer" ) # fp8 layerwise weight-casting diff --git a/docs/source/en/api/pipelines/hunyuan_video.md b/docs/source/en/api/pipelines/hunyuan_video.md index df52c49b3694..cdd81495b621 100644 --- a/docs/source/en/api/pipelines/hunyuan_video.md +++ b/docs/source/en/api/pipelines/hunyuan_video.md @@ -54,7 +54,7 @@ pipeline_quant_config = PipelineQuantizationConfig( "bnb_4bit_quant_type": "nf4", "bnb_4bit_compute_dtype": torch.bfloat16 }, - components_to_quantize=["transformer"] + components_to_quantize="transformer" ) pipeline = HunyuanVideoPipeline.from_pretrained( @@ -91,7 +91,7 @@ pipeline_quant_config = PipelineQuantizationConfig( "bnb_4bit_quant_type": "nf4", "bnb_4bit_compute_dtype": torch.bfloat16 }, - components_to_quantize=["transformer"] + components_to_quantize="transformer" ) pipeline = HunyuanVideoPipeline.from_pretrained( @@ -139,7 +139,7 @@ export_to_video(video, "output.mp4", fps=15) "bnb_4bit_quant_type": "nf4", "bnb_4bit_compute_dtype": torch.bfloat16 }, - components_to_quantize=["transformer"] + components_to_quantize="transformer" ) pipeline = HunyuanVideoPipeline.from_pretrained( diff --git a/docs/source/en/using-diffusers/text-img2vid.md b/docs/source/en/using-diffusers/text-img2vid.md index ade3e0de329f..9b69a2fded5c 100644 --- a/docs/source/en/using-diffusers/text-img2vid.md +++ b/docs/source/en/using-diffusers/text-img2vid.md @@ -98,7 +98,7 @@ pipeline_quant_config = PipelineQuantizationConfig( "bnb_4bit_quant_type": "nf4", "bnb_4bit_compute_dtype": torch.bfloat16 }, - components_to_quantize=["transformer"] + components_to_quantize="transformer" ) pipeline = HunyuanVideoPipeline.from_pretrained(