From 022df035c083f677d8b8175d78eff6f8923ce07c Mon Sep 17 00:00:00 2001 From: Mengni Wang Date: Wed, 24 Sep 2025 22:13:11 -0400 Subject: [PATCH 1/9] support flux Signed-off-by: Mengni Wang --- auto_round/__init__.py | 2 +- auto_round/__main__.py | 80 +++- auto_round/autoround.py | 88 ++++- auto_round/compressors/__init__.py | 2 + auto_round/compressors/base.py | 103 +++-- auto_round/compressors/config.py | 35 ++ auto_round/compressors/diffusion/__init__.py | 17 + .../compressors/diffusion/compressor.py | 368 ++++++++++++++++++ auto_round/compressors/diffusion/dataset.py | 92 +++++ auto_round/compressors/diffusion/eval.py | 103 +++++ auto_round/utils.py | 85 +++- test/test_cuda/test_diffusion.py | 77 ++++ 12 files changed, 997 insertions(+), 55 deletions(-) create mode 100644 auto_round/compressors/diffusion/__init__.py create mode 100644 auto_round/compressors/diffusion/compressor.py create mode 100644 auto_round/compressors/diffusion/dataset.py create mode 100644 auto_round/compressors/diffusion/eval.py create mode 100644 test/test_cuda/test_diffusion.py diff --git a/auto_round/__init__.py b/auto_round/__init__.py index 15bbc373d..a3a3b24bc 100644 --- a/auto_round/__init__.py +++ b/auto_round/__init__.py @@ -14,7 +14,7 @@ from auto_round.autoround import AutoRound # support for old api -from auto_round.autoround import AutoRoundLLM, AutoRoundMLLM, AutoRoundAdam +from auto_round.autoround import AutoRoundLLM, AutoRoundMLLM, AutoRoundAdam, AutoRoundDiffusion from auto_round.utils import LazyImport diff --git a/auto_round/__main__.py b/auto_round/__main__.py index 1671d157d..aa84373df 100644 --- a/auto_round/__main__.py +++ b/auto_round/__main__.py @@ -230,6 +230,25 @@ def __init__(self, *args, **kwargs): help="the template for building training dataset. It can be a custom one.", ) + ## ===================== diffusion model ================== + self.add_argument( + "--guidance_scale", + default=7.5, + type=float, + ) + + self.add_argument( + "--num_inference_steps", + default=50, + type=int, + ) + + self.add_argument( + "--generator_seed", + default=None, + type=int, + ) + ## ======================= eval ======================= self.add_argument( "--tasks", @@ -258,6 +277,21 @@ def __init__(self, *args, **kwargs): "--eval_model_dtype", default=None, type=str, help="the torch_dytpe to load the model for evaluation." ) + ## ======================= diffusion model eval ======================= + self.add_argument("--prompt_file", default=None, type=str, help="the prompt file to load prmpt.") + + self.add_argument("--prompt", default=None, type=str, help="the prompt for test.") + + self.add_argument( + "--metrics", + "--metric", + default="clip", + help="support clip, clip-iqa, imagereward", + ) + + self.add_argument( + "--image_save_dir", default="./tmp_image_save", type=str, help="path to save generated images" + ) def setup_parser(): parser = BasicArgumentParser() @@ -427,6 +461,7 @@ def tune(args): ) from auto_round.compressors import ( + DiffusionExtraConfig, ExtraConfig, MLLMExtraConfig, SchemeExtraConfig, @@ -463,9 +498,13 @@ def tune(args): mllm_config = MLLMExtraConfig( quant_nontext_module=args.quant_nontext_module, extra_data_dir=args.extra_data_dir, template=args.template ) + diffusion_config = DiffusionExtraConfig( + guidance_scale=args.guidance_scale, num_inference_steps=args.num_inference_steps, generator_seed=args.generator_seed + ) extra_config.tuning_config = tuning_config extra_config.scheme_config = scheme_config extra_config.mllm_config = mllm_config + extra_config.diffusion_config = diffusion_config autoround: BaseCompressor = AutoRound( model=model_name, @@ -522,6 +561,45 @@ def tune(args): model.eval() clear_memory() + eval_model_dtype = get_model_dtype(args.eval_model_dtype, "auto") + + # diffusion model has different evaluation path + if getattr(autoround, "diffusion", False): + pipe = autoround.pipe + pipe.to(model.dtype) + pipe.transformer = model + device_str = detect_device(device_str) + pipe = pipe.to(device_str) + if pipe.dtype != eval_model_dtype and eval_model_dtype != "auto": + pipe.to(getattr(torch, eval_model_dtype)) + + gen_kwargs = { + "guidance_scale": args.guidance_scale, + "output_type": "pil", + "num_inference_steps": args.num_inference_steps, + "generator": ( + None + if args.generator_seed is None + else torch.Generator(device=pipe.device).manual_seed(args.generator_seed) + ), + } + if not os.path.exists(args.image_save_dir): + os.makedirs(args.image_save_dir) + + if args.prompt is not None: + outputs = pipe(prompt=args.prompt, **gen_kwargs) + outputs.images[0].save(os.path.join(args.image_save_dir, "img.png")) + logger.info( + f"Image generated with prompt {args.prompt} is saved as {os.path.join(args.image_save_dir, 'img.png')}" + ) + + if args.prompt_file is not None: + from auto_round.compressors.diffusion import diffusion_eval + + metrics = args.metrics.split(",") + diffusion_eval(pipe, args.prompt_file, metrics, args.image_save_dir, 1, gen_kwargs) + return + lm_eval_version = get_library_version("lm-eval") eval_folder = folders[-1] @@ -543,8 +621,6 @@ def tune(args): import time - eval_model_dtype = get_model_dtype(args.eval_model_dtype, "auto") - if autoround.act_bits <= 8 or eval_gguf_model: if eval_gguf_model: # for file in os.listdir(eval_folder): diff --git a/auto_round/autoround.py b/auto_round/autoround.py index 4074213a9..1b7f76380 100644 --- a/auto_round/autoround.py +++ b/auto_round/autoround.py @@ -20,13 +20,14 @@ from auto_round.compressors import ( AdamCompressor, BaseCompressor, + DiffusionCompressor, ExtraConfig, LLMCompressor, MLLMCompressor, ) from auto_round.logger import deprecated, logger from auto_round.schemes import QuantizationScheme -from auto_round.utils import is_mllm_model +from auto_round.utils import is_mllm_model, is_diffusion_model class AutoRound: @@ -145,6 +146,11 @@ def __new__( if (extra_config and not extra_config.mllm_config.is_default()) or is_mllm_model(model): logger.info("using MLLM mode for multimodal model.") model_cls.append(MLLMCompressor) + extra_config.diffusion_config = None + elif (extra_config and not extra_config.diffusion_config.is_default()) or is_diffusion_model(model): + logger.info("using Diffusion mode for diffusion model.") + model_cls.append(DiffusionCompressor) + extra_config.mllm_config = None else: if extra_config: extra_config.mllm_config = None @@ -540,3 +546,83 @@ def __init__( seed=seed, **kwargs, ) + + +@deprecated("AutoRound") +class AutoRoundDiffusion(DiffusionCompressor): + """Class for automatic rounding-based quantization with Diffusion models. + + Args: + model: The PyTorch model to be quantized. + tokenizer: An optional tokenizer for processing input data, is not used for diffusion models. + guidance_scale (float): Control how much the image generation process follows the text prompt. + The more it is, the more closely it follows the prompt (default is 7.5). + num_inference_steps (int): The reference number of denoising steps (default is 50). + generator_seed (int): A sees that controls the initial noise from which an image is generated (default is None). + scheme: (str| dict | QuantizationScheme ): A preset scheme that defines the quantization configurations. + layer_config (dict): Configuration for weight quantization (default is None). + dataset: The path or name of the calib dataset. + iters (int): Number of iterations (default is 200). + seqlen (int): Length of the sequence. + nsamples (int): Number of samples (default is 128). + batch_size (int): Batch size for training (default is 8). + gradient_accumulate_steps (int): Number of gradient accumulation steps (default is 1). + low_gpu_mem_usage (bool): Whether to use low GPU memory (default is False). + device_map (str | dict | int | torch.device, optional): Device placement map. Defaults to 0. + enable_torch_compile (bool): Whether to enable torch compile to optimize quant_block/layer + **kwargs: Additional keyword arguments. + """ + + bits: int | None + group_size: int | None + sym: bool | None + data_type: str | None + act_bits: int | None + act_group_size: int | None + act_sym: bool | None + act_data_type: str | None + act_dynamic: bool | None + super_bits: int | None + super_group_size: int | None + + def __init__( + self, + model: Union[object, str], + tokenizer=None, + guidance_scale: float = 7.5, + num_inference_steps: int = 50, + generator_seed: int = None, + scheme: Union[str, dict, QuantizationScheme] = "W8A16", + layer_config: dict[str, Union[str, dict, QuantizationScheme]] = None, + dataset: Union[str, list, tuple, torch.utils.data.DataLoader] = "coco2014", + iters: int = 200, + seqlen: int = 2048, + nsamples: int = 128, + batch_size: int = 8, + gradient_accumulate_steps: int = 1, + low_gpu_mem_usage: bool = False, + device_map: Union[str, torch.device, int, dict] = 0, + enable_torch_compile: bool = False, + seed: int = 42, + **kwargs, + ): + super().__init__( + model=model, + tokenizer=None, + guidance_scale=guidance_scale, + num_inference_steps=num_inference_steps, + generator_seed=generator_seed, + scheme=scheme, + layer_config=layer_config, + dataset=dataset, + iters=iters, + seqlen=seqlen, + nsamples=nsamples, + batch_size=batch_size, + gradient_accumulate_steps=gradient_accumulate_steps, + low_gpu_mem_usage=low_gpu_mem_usage, + device_map=device_map, + enable_torch_compile=enable_torch_compile, + seed=seed, + **kwargs, + ) \ No newline at end of file diff --git a/auto_round/compressors/__init__.py b/auto_round/compressors/__init__.py index 35bbc5666..dbf47b9c2 100644 --- a/auto_round/compressors/__init__.py +++ b/auto_round/compressors/__init__.py @@ -14,7 +14,9 @@ from auto_round.compressors.base import * from auto_round.compressors.mllm.compressor import MLLMCompressor +from auto_round.compressors.diffusion.compressor import DiffusionCompressor from auto_round.compressors.config import ( + DiffusionExtraConfig, ExtraConfig, MLLMExtraConfig, SchemeExtraConfig, diff --git a/auto_round/compressors/base.py b/auto_round/compressors/base.py index 970b5d359..fd8371c57 100644 --- a/auto_round/compressors/base.py +++ b/auto_round/compressors/base.py @@ -18,6 +18,7 @@ import sys import time import traceback +from collections import defaultdict from dataclasses import asdict, fields from enum import Enum from typing import Any, Callable, Union @@ -229,6 +230,7 @@ def __init__( device = kwargs.pop("device", None) self.quant_lm_head = kwargs.pop("quant_lm_head", False) self.mllm = kwargs.pop("mllm") if "mllm" in kwargs else False + self.diffusion = kwargs.pop("diffusion") if "diffusion" in kwargs else False # Scale factor for RAM usage per parameter. self.mem_per_param_scale = kwargs.pop("mem_per_param_scale", None) fp_layers = kwargs.pop("fp_layers", None) @@ -276,7 +278,7 @@ def __init__( device="cpu", low_cpu_mem_mode=low_cpu_mem_usage, # always load cpu first ) - elif tokenizer is None and iters > 0: + elif tokenizer is None and not self.diffusion and iters > 0: raise ValueError("A tokenizer must be set for non-str model input") self.low_cpu_mem_usage = bool(low_cpu_mem_usage) if unsupport_meta_device(model): @@ -342,7 +344,7 @@ def __init__( model, tokenizer, low_cpu_mem_usage = llm_load_model( model, device=device, low_cpu_mem_mode=low_cpu_mem_usage ) - elif tokenizer is None and iters > 0: + elif tokenizer is None and not self.diffusion and iters > 0: raise ValueError("A tokenizer must be set for non-str model input") self.low_cpu_mem_usage = bool(low_cpu_mem_usage) if unsupport_meta_device(model): @@ -1703,6 +1705,19 @@ def _quantize_via_rtn_blockwise(self, all_to_quantized_module_names: list[str]) cnt = 1 cnt += 1 + def _update_inputs(self, inputs, q_inputs): + keys = inputs.keys() + input_id_str = [key for key in keys if key.startswith("hidden_state")] + if len(input_id_str) != 1: + raise RuntimeError( + "hidden_states arg mismatch error," + "please raise an issue in https://github.com/intel/auto-round/issues" + ) + inputs["input_ids"] = inputs.pop(input_id_str[0], None) + if q_inputs is not None: + q_inputs["input_ids"] = q_inputs.pop(input_id_str[0], None) + return inputs, q_inputs["input_ids"] + def quantize(self) -> tuple[torch.nn.Module, dict[str, Any]]: """Quantize the model and return the quantized model along with layer configurations.The entry of AutoRound. Returns: @@ -1790,7 +1805,7 @@ def quantize(self) -> tuple[torch.nn.Module, dict[str, Any]]: if len(all_blocks) > 1: pbar = tqdm(range(0, sum([len(i) for i in all_blocks]), self.nblocks)) else: - pbar = None # move the alg warning outside pbar + pbar = tqdm(range(0, len(all_blocks[0]), self.nblocks)) # move the alg warning outside pbar for block_names in all_blocks: inputs = all_inputs[block_names[0]] @@ -1799,16 +1814,8 @@ def quantize(self) -> tuple[torch.nn.Module, dict[str, Any]]: if all_q_inputs is not None: q_inputs = all_q_inputs[block_names[0]] all_q_inputs.pop(block_names[0]) - keys = inputs.keys() - input_id_str = [key for key in keys if key.startswith("hidden_state")] - if len(input_id_str) != 1: - raise RuntimeError( - "hidden_states arg mismatch error," - "please raise an issue in https://github.com/intel/auto-round/issues" - ) - inputs["input_ids"] = inputs.pop(input_id_str[0], None) - if q_inputs is not None: - q_inputs["input_ids"] = q_inputs.pop(input_id_str[0], None) + + inputs, q_inputs = self._update_inputs(inputs, q_inputs) clear_memory(self.inputs) @@ -1822,7 +1829,7 @@ def quantize(self) -> tuple[torch.nn.Module, dict[str, Any]]: self.model, inputs, block_names, - q_input=q_inputs["input_ids"] if q_inputs is not None else None, + q_input=q_inputs if q_inputs is not None else None, nblocks=self.nblocks, device=self.device, pbar=pbar, @@ -1832,6 +1839,8 @@ def quantize(self) -> tuple[torch.nn.Module, dict[str, Any]]: f"Expected exactly one packing format when 'is_packing_immediate' is True, " f"but got {len(self.formats)} formats." ) + pbar.set_description("Quantizing done") + pbar.close() self._quantize_layers(layer_names, all_inputs) ##TODO pack layer immediately @@ -2692,6 +2701,25 @@ def get_act_max_hook(module, input, output): continue return hook_handles + def _get_current_output(self, output, indices): + current_output = [output[x] for x in indices] + current_output = torch.cat(current_output, dim=self.batch_dim) + return current_output + + def _get_current_q_output(self, block, input_ids, input_others, indices, device): + current_input_ids, current_input_others = self._sampling_inputs( + input_ids, + input_others, + indices, + seqlen=self.seqlen, + batch_dim=self.batch_dim, + share_cache_keys=self.shared_cache_keys, + ) + output_q = block_forward( + block, current_input_ids, current_input_others, self.amp, self.amp_dtype, device + ) + return output_q + def _quantize_block( self, block: torch.nn.Module, @@ -2837,22 +2865,12 @@ def _quantize_block( for tmp_step in range(self.gradient_accumulate_steps): indices = whole_indices[tmp_step * self.batch_size : (tmp_step + 1) * self.batch_size] - current_input_ids, current_input_others = self._sampling_inputs( - input_ids, - input_others, - indices, - seqlen=self.seqlen, - batch_dim=self.batch_dim, - share_cache_keys=self.shared_cache_keys, - ) - current_output = [output[x] for x in indices] - current_output = torch.cat(current_output, dim=self.batch_dim) + current_output = self._get_current_output(output, indices) + current_output = to_device(current_output, device) - output_q = block_forward( - block, current_input_ids, current_input_others, self.amp, self.amp_dtype, device - ) + output_q = self._get_current_q_output(block, input_ids, input_others, indices, device) if self.amp: with autocast(device_type=device.split(":")[0], dtype=self.amp_dtype): loss = mse_loss(output_q, current_output) # pylint: disable=not-callable @@ -2931,6 +2949,12 @@ def _quantize_block( clear_memory(input_ids) return None, output + def _split_inputs(self, inputs): + input_ids = inputs["input_ids"] + inputs.pop("input_ids", None) + input_others = inputs + return input_ids, input_others + def _quantize_blocks( self, model: torch.nn.Module, @@ -2956,16 +2980,14 @@ def _quantize_blocks( clear_memory() for n, m in model.named_parameters(): m.requires_grad_(False) - input_ids = inputs["input_ids"] - inputs.pop("input_ids", None) - input_others = inputs + + input_ids, input_others = self._split_inputs(inputs) clear_memory() input_ids = to_device(input_ids, self.cache_device) input_others = to_device(input_others, self.cache_device) # As in calibration phase, we may use bf16 for calibration due to low_gpu_memory usage tmp_dtype = self.amp_dtype if self.amp else torch.float32 - for i in range(len(input_ids)): - input_ids[i] = input_ids[i].to(tmp_dtype) + input_ids = to_dtype(input_ids, tmp_dtype) for key in input_others.keys(): if isinstance(input_others[key], torch.Tensor) and ( @@ -3063,9 +3085,9 @@ def _quantize_blocks( PACKING_LAYER_WITH_FORMAT[target_backend]( tmp_m.tmp_name, self.model, self.formats[0], device=self.device ) - pbar.set_description("Quantizing done") - pbar.update(1) - pbar.close() + if pbar is not None: + pbar.update(1) + self.model = mv_module_from_gpu(self.model, self.low_cpu_mem_usage) for n, m in self.model.named_modules(): if hasattr(m, "name"): @@ -3324,9 +3346,14 @@ def _sampling_inputs( current_input_ids: The sampled input IDs. current_input_others: The sampled other input data. """ - current_input_ids = [input_ids[i] for i in indices] - - current_input_ids = torch.cat(current_input_ids, dim=batch_dim) + if isinstance(input_ids, list): + current_input_ids = [input_ids[i] for i in indices] + current_input_ids = torch.cat(current_input_ids, dim=batch_dim) + elif isinstance(input_ids, dict): + current_input_ids = defaultdict(list) + for k in input_ids.keys(): + current_input_ids[k].extend([input_ids[k][i] for i in indices]) + current_input_ids[k] = torch.cat(current_input_ids[k], dim=batch_dim) current_input_others = {"positional_inputs": input_others["positional_inputs"]} for key in input_others.keys(): diff --git a/auto_round/compressors/config.py b/auto_round/compressors/config.py index cc9081ce9..f0fd0205d 100644 --- a/auto_round/compressors/config.py +++ b/auto_round/compressors/config.py @@ -26,6 +26,7 @@ class ExtraConfig: _scheme_config = None _tuning_config = None _mllm_config = None + _diffusion_config = None def __init__( self, @@ -64,6 +65,10 @@ def __init__( quant_nontext_module: bool = False, extra_data_dir: str = None, template: str = None, + # diffusion + guidance_scale: float = 7.5, + num_inference_steps: int = 50, + generator_seed: int = None, ): """Initialize @@ -102,6 +107,10 @@ def __init__( quant_nontext_module: Whether to quantize nontext module. extra_data_dir: The path of extra data such as images, audio and videos. template: The path or name of template used to specify process for different MLLMs. + guidance_scale (float): Control how much the image generation process follows the text prompt. + The more it is, the more closely it follows the prompt (default is 7.5). + num_inference_steps (int): The reference number of denoising steps (default is 50). + generator_seed (int): A sees that controls the initial noise from which an image is generated (default is None). """ self.tuning_config = TuningExtraConfig( amp=amp, @@ -141,6 +150,11 @@ def __init__( extra_data_dir=extra_data_dir, template=template, ) + self.diffusion_config = DiffusionExtraConfig( + guidance_scale=guidance_scale, + num_inference_steps=num_inference_steps, + generator_seed=generator_seed, + ) @property def tuning_config(self): @@ -178,6 +192,20 @@ def mllm_config(self, config: MLLMExtraConfig): ), f"mllm_config should be MLLMExtraConfig, but got {config.__class__.__name__}" self._mllm_config = config + @property + def diffusion_config(self): + return self._diffusion_config + + @diffusion_config.setter + def diffusion_config(self, config: DiffusionExtraConfig): + if config is None: + self._diffusion_config = None + else: + assert isinstance( + config, DiffusionExtraConfig + ), f"diffusion_config should be DiffusionExtraConfig, but got {config.__class__.__name__}" + self._diffusion_config = config + def to_dict(self): output_dict = {} for config in self.__dict__.values(): @@ -260,3 +288,10 @@ class MLLMExtraConfig(BaseExtraConfig): quant_nontext_module: bool = False extra_data_dir: str = None template: str = None + + +@dataclass +class DiffusionExtraConfig(BaseExtraConfig): + guidance_scale: float = 7.5 + num_inference_steps: int = 50 + generator_seed: int = None \ No newline at end of file diff --git a/auto_round/compressors/diffusion/__init__.py b/auto_round/compressors/diffusion/__init__.py new file mode 100644 index 000000000..b084e94f2 --- /dev/null +++ b/auto_round/compressors/diffusion/__init__.py @@ -0,0 +1,17 @@ +# Copyright (c) 2025 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from auto_round.compressors.diffusion.dataset import get_diffusion_dataloader +from auto_round.compressors.diffusion.compressor import DiffusionCompressor +from auto_round.compressors.diffusion.eval import diffusion_eval diff --git a/auto_round/compressors/diffusion/compressor.py b/auto_round/compressors/diffusion/compressor.py new file mode 100644 index 000000000..70f3f697c --- /dev/null +++ b/auto_round/compressors/diffusion/compressor.py @@ -0,0 +1,368 @@ +# Copyright (c) 2025 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from collections import defaultdict +from copy import deepcopy +from typing import Union + +import torch +from tqdm import tqdm + +from auto_round.compressors.base import BaseCompressor +from auto_round.compressors.diffusion.dataset import get_diffusion_dataloader +from auto_round.logger import logger +from auto_round.low_cpu_mem.utils import get_layers_before_block +from auto_round.schemes import QuantizationScheme +from auto_round.utils import ( + block_forward, + clear_memory, + extract_block_names_to_str, + find_matching_blocks, + get_block_names, + diffusion_load_model, + LazyImport +) + +pipeline_utils = LazyImport("diffusers.pipelines.pipeline_utils") + +output_configs = { + "FluxTransformerBlock": ["encoder_hidden_states", "hidden_states"], + "FluxSingleTransformerBlock": ["encoder_hidden_states", "hidden_states"], +} + +class DiffusionCompressor(BaseCompressor): + """Class for automatic rounding-based quantization with Diffusion models. + + Args: + model: The PyTorch model to be quantized. + tokenizer: An optional tokenizer for processing input data, is not used for diffusion models. + guidance_scale (float): Control how much the image generation process follows the text prompt. + The more it is, the more closely it follows the prompt (default is 7.5). + num_inference_steps (int): The reference number of denoising steps (default is 50). + generator_seed (int): A sees that controls the initial noise from which an image is generated (default is None). + scheme: (str| dict | QuantizationScheme ): A preset scheme that defines the quantization configurations. + layer_config (dict): Configuration for weight quantization (default is None). + dataset: The path or name of the calib dataset. + iters (int): Number of iterations (default is 200). + seqlen (int): Length of the sequence. + nsamples (int): Number of samples (default is 128). + batch_size (int): Batch size for training (default is 8). + gradient_accumulate_steps (int): Number of gradient accumulation steps (default is 1). + low_gpu_mem_usage (bool): Whether to use low GPU memory (default is False). + device_map (str | dict | int | torch.device, optional): Device placement map. Defaults to 0. + enable_torch_compile (bool): Whether to enable torch compile to optimize quant_block/layer + **kwargs: Additional keyword arguments. + """ + + bits: int | None + group_size: int | None + sym: bool | None + data_type: str | None + act_bits: int | None + act_group_size: int | None + act_sym: bool | None + act_data_type: str | None + act_dynamic: bool | None + super_bits: int | None + super_group_size: int | None + + def __init__( + self, + model: Union[object, str], + tokenizer=None, + guidance_scale: float = 7.5, + num_inference_steps: int = 50, + generator_seed: int = None, + scheme: Union[str, dict, QuantizationScheme] = "W8A16", + layer_config: dict[str, Union[str, dict, QuantizationScheme]] = None, + dataset: Union[str, list, tuple, torch.utils.data.DataLoader] = "coco2014", + iters: int = 200, + seqlen: int = 2048, + nsamples: int = 128, + batch_size: int = 8, + gradient_accumulate_steps: int = 1, + low_gpu_mem_usage: bool = False, + device_map: Union[str, torch.device, int, dict] = 0, + enable_torch_compile: bool = False, + seed: int = 42, + **kwargs, + ): + self.guidance_scale = guidance_scale + self.num_inference_steps = num_inference_steps + self.generator_seed = generator_seed + + to_quant_block_names: Union[str, list, None] = kwargs.pop("to_quant_block_names", None) + if device_map is None: + device_map = 0 + self._set_device(device_map) + + if isinstance(model, str): + pipe, model = diffusion_load_model(model, device=self.device) + elif isinstance(model, pipeline_utils.DiffusionPipeline): + pipe = model + model = pipe.transformer + else: + raise ValueError(f"Only support str or DiffusionPipeline class for model, but get {type(model)}") + + self.model = model + self.pipe = pipe + + all_blocks = get_block_names(model) + self.quant_block_list = find_matching_blocks(model, all_blocks, to_quant_block_names) + if to_quant_block_names is None: + to_quant_block_names = extract_block_names_to_str(self.quant_block_list) + + if iters > 0 and batch_size != 1: + logger.warning( + f"reset batch_size({batch_size}) to 1 and " + f"gradient_accumulate_steps({gradient_accumulate_steps}) " + f"to {batch_size * gradient_accumulate_steps}, " + f"because batch_size={batch_size} cannot be used for calibrating non-text modules." + ) + gradient_accumulate_steps = batch_size * gradient_accumulate_steps + batch_size = 1 + + seqlen = 2048 if seqlen is None else seqlen + + if nsamples % batch_size != 0: + nsamples = (nsamples // batch_size + 1) * batch_size + logger.warning(f"'nsamples' is not divisible by 'batch_size', will adjusted to {nsamples}") + + kwargs["diffusion"] = True + super(DiffusionCompressor, self).__init__( + model=model, + tokenizer=None, + scheme=scheme, + layer_config=layer_config, + dataset=dataset, + iters=iters, + seqlen=seqlen, + nsamples=nsamples, + batch_size=batch_size, + gradient_accumulate_steps=gradient_accumulate_steps, + low_gpu_mem_usage=low_gpu_mem_usage, + device_map=device_map, + enable_torch_compile=enable_torch_compile, + seed=seed, + to_quant_block_names=to_quant_block_names, + **kwargs, + ) + + def _update_inputs(self, inputs, q_inputs): + # flux transformer model's blocks will update hidden_states and encoder_hidden_states + input_id_str = [key for key in inputs.keys() if "hidden_state" in key] + if q_inputs is not None: + q_inputs = {k: q_inputs.pop(k, None) for k in input_id_str} + return inputs, q_inputs + + def _split_inputs(self, inputs): + input_id_str = [key for key in inputs.keys() if "hidden_state" in key] + input_ids = {k: inputs.pop(k, None) for k in input_id_str} + input_others = inputs + return input_ids, input_others + + def _get_current_output(self, output, indices): + if isinstance(output, list): + current_output = [output[x] for x in indices] + current_output = torch.cat(current_output, dim=self.batch_dim) + + elif isinstance(output, dict): + assert "hidden_states" in output + current_output = [output["hidden_states"][x] for x in indices] + current_output = torch.cat(current_output, dim=self.batch_dim) + return current_output + + def _get_current_q_output(self, block, input_ids, input_others, indices, device): + output_config = output_configs.get(block.__class__.__name__, []) + idx = None if "hidden_states" not in output_config else output_config.index("hidden_states") + current_input_ids, current_input_others = self._sampling_inputs( + input_ids, + input_others, + indices, + seqlen=self.seqlen, + batch_dim=self.batch_dim, + share_cache_keys=self.shared_cache_keys, + ) + if isinstance(current_input_ids, dict): + hidden_states = current_input_ids.pop("hidden_states") + current_input_others.update(current_input_ids) + current_input_ids = hidden_states + output_q = block_forward( + block, current_input_ids, current_input_others, self.amp, self.amp_dtype, device, idx + ) + return output_q + + @torch.no_grad() + def _get_block_outputs( + self, + block: torch.nn.Module, + input_ids: torch.Tensor, + input_others: torch.Tensor, + bs: int, + device: Union[str, torch.device], + cache_device: Union[str, torch.device], + save_output: bool = True, + ): + """Compute the output of a given block of the model for a given input. + + Args: + block: The block of the model. + input_ids: The input tensor containing tokenized input ids. + input_others: A dictionary containing additional input data. + bs: The batch size for computing the output. + device: The device for computation. + cache_device: The device for storing the output. + batch_dim: The batch dimension of the output tensor. + + Returns: + The output tensor of the block. + """ + + output = defaultdict(list) + nsamples = len(input_ids) + output_config = output_configs.get(block.__class__.__name__, []) + + for i in range(0, nsamples, bs): + end_index = min(nsamples, i + bs) + indices = torch.arange(i, end_index).to(torch.long) + tmp_input_ids, tmp_input_others = self._sampling_inputs( + input_ids, input_others, indices, self.seqlen, self.batch_dim, share_cache_keys=self.shared_cache_keys + ) + if isinstance(tmp_input_ids, dict): + hidden_states = tmp_input_ids.pop("hidden_states") + tmp_input_others.update(tmp_input_ids) + tmp_input_ids = hidden_states + + tmp_output = block_forward(block, tmp_input_ids, tmp_input_others, self.amp, self.amp_dtype, device, None) + assert len(output_config) == len(tmp_output) + tmp_output = dict(zip(output_config, tmp_output)) + + if save_output: + for name, out in tmp_output.items(): + if self.batch_size == 1: + output[name].append(out.to(cache_device)) + else: + output[name].extend( + list(torch.split(out.to(cache_device), 1, dim=self.batch_dim)) + ) + if self.low_gpu_mem_usage: + clear_memory() + + return output + + def calib(self, nsamples, bs): + """Perform calibration for quantization. + + This method calibrates the model for quantization by processing a specified + number of samples from the calibration dataset. It ensures that the data is + properly formatted and feeds it to the model. If the number of samples processed + is less than the specified number, it logs a warning. If no samples are processed, + it logs an error and exits. + Args: + nsamples (int): The number of samples to use for calibration. + bs (int): The number of samples to use for calibration + """ + logger.warning("Diffusion model will catch nsamples * num_inference_steps inputs, " + "you can reduce nsamples or num_inference_steps if OOM or take too much time.") + if isinstance(self.dataset, str): + dataset = self.dataset.replace(" ", "") + self.dataloader, self.batch_size, self.gradient_accumulate_steps = get_diffusion_dataloader( + dataset=dataset, + bs=self.batch_size, + seed=self.seed, + nsamples=self.nsamples, + gradient_accumulate_steps=self.gradient_accumulate_steps, + ) + else: + self.dataloader = self.dataset + total_cnt = 0 + + if self.low_cpu_mem_usage: + embed_layers = get_layers_before_block(self.model) + for n, m in embed_layers: + m = m.to(self.device) + + total = nsamples if not hasattr(self.dataloader, "len") else min(nsamples, len(self.dataloader)) + if self.pipe.dtype != self.model.dtype: + self.pipe.to(self.model.dtype) + if self.pipe.device != self.model.device: + self.pipe.to(self.model.device) + with tqdm(range(1, total + 1), desc="cache block inputs") as pbar: + for ids, prompts in self.dataloader: + if isinstance(prompts, tuple): + prompts = list(prompts) + try: + self.pipe( + prompt=prompts, + guidance_scale=self.guidance_scale, + num_inference_steps=self.num_inference_steps, + generator=( + None + if self.generator_seed is None + else torch.Generator(device=self.pipe.device).manual_seed(self.generator_seed) + ), + ) + except NotImplementedError: + pass + except Exception as error: + raise error + step = len(prompts) + total_cnt += step + pbar.update(step) + if total_cnt >= nsamples: + break + if total_cnt == 0: + logger.error( + f"no data has been cached, please provide more data with sequence length >={self.seqlen} in the " + f"dataset or decease the sequence length" + ) + exit(-1) + elif total_cnt < nsamples: + logger.warning( + f"Insufficient number of samples collected may affect the quantization. " + f"target samples count is {nsamples}, while valid samples count is {total_cnt}" + ) + if total_cnt < self.batch_size: + raise ValueError( + f"valid samples is less than batch_size({self.batch_size})," + " please adjust self.batch_size or seqlen." + ) + max_len = (total_cnt // self.batch_size) * self.batch_size + for k, v in self.inputs.items(): + for key in v: + if isinstance(v[key], list) and len(v[key]) == total_cnt: + self.inputs[k][key] = v[key][:max_len] + + # clean embed weight to save memory + if self.low_cpu_mem_usage: + for n, m in embed_layers: + m = m.to("meta") + # torch.cuda.empty_cache() + + def save_quantized(self, output_dir=None, format="auto_round", inplace=True, **kwargs): + """Save the quantized model to the specified output directory in the specified format. + + Args: + output_dir (str, optional): The directory to save the quantized model. Defaults to None. + format (str, optional): The format in which to save the model. Defaults to "auto_round". + inplace (bool, optional): Whether to modify the model in place. Defaults to True. + **kwargs: Additional keyword arguments specific to the export format. + + Returns: + object: The compressed model object. + """ + compressed_model = super().save_quantized( + output_dir=output_dir, format=format, inplace=inplace, **kwargs + ) + return compressed_model \ No newline at end of file diff --git a/auto_round/compressors/diffusion/dataset.py b/auto_round/compressors/diffusion/dataset.py new file mode 100644 index 000000000..957d0d886 --- /dev/null +++ b/auto_round/compressors/diffusion/dataset.py @@ -0,0 +1,92 @@ +# Copyright (c) 2025 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from typing import Dict + +import pandas as pd +import torch +from torch.utils.data import DataLoader, Dataset +from transformers import set_seed + +from auto_round.utils import logger + + +class DiffusionDataset(Dataset): + """Dataset for supervised fine-tuning.""" + + COCO_URL = { + "coco2014": "https://github.com/mlcommons/inference/raw/refs/heads/master/text_to_image/coco2014/captions/captions_source.tsv" + } + + def __init__( + self, + dataset_path, + nsamples=128, + ) -> None: + super().__init__() + self.captions = [] + self.caption_ids = [] + + if os.path.exists(dataset_path): + logger.info(f"use dataset {dataset_path}, loading from disk...") + df = pd.read_csv(dataset_path, sep="\t") + else: + from io import StringIO + + import requests + + dataset_path = "coco2014" + + if dataset_path in self.COCO_URL: + logger.info(f"use dataset {dataset_path}, downloading ...") + text_data = requests.get(self.COCO_URL[dataset_path]).text + df = pd.read_csv(StringIO(text_data), sep="\t") + else: + raise KeyError(f"{dataset_path} is not support, we support {self.COCO_URL.keys()}.") + for index, row in df.iterrows(): + if nsamples > 0 and index + 1 > nsamples: + break + assert "id" in row and "caption" in row + caption_id = row["id"] + caption_text = row["caption"] + self.caption_ids.append(caption_id) + self.captions.append(caption_text) + + def __len__(self): + return len(self.captions) + + def __getitem__(self, i) -> Dict[str, torch.Tensor]: + return self.caption_ids[i], self.captions[i] + + +def get_diffusion_dataloader( + dataset="coco2014", + bs=1, + seed=42, + nsamples=128, + gradient_accumulate_steps=1, +): + """Generate a DataLoader for calibration using specified parameters. + Args: + Dataset_name (str): The name or path of the dataset. + bs (int, optional): The batch size. Defaults to 1. + Returns: + DataLoader: The DataLoader for the calibrated datasets. + """ + dataset = DiffusionDataset(dataset, nsamples) + set_seed(seed) + dataloader_params = {"batch_size": bs, "shuffle": True} + + return DataLoader(dataset, **dataloader_params), bs, gradient_accumulate_steps \ No newline at end of file diff --git a/auto_round/compressors/diffusion/eval.py b/auto_round/compressors/diffusion/eval.py new file mode 100644 index 000000000..64f81bc85 --- /dev/null +++ b/auto_round/compressors/diffusion/eval.py @@ -0,0 +1,103 @@ +# Copyright (c) 2025 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import numpy as np +import torch +from PIL import Image +from tqdm import tqdm + +from auto_round.compressors.diffusion.dataset import get_diffusion_dataloader +from auto_round.utils import LazyImport + +metrics = LazyImport("torchmetrics.multimodal") +reward = LazyImport("ImageReward") + + +def compute_clip(prompts, images, device: str = "cuda"): + clip_model = metrics.CLIPScore(model_name_or_path="openai/clip-vit-large-patch14").to(device) + for prompt, img_path in tqdm(zip(prompts, images), desc="Computing CLIP score"): + image_data = Image.open(img_path).convert("RGB") + image_tensor = torch.from_numpy(np.array(image_data)).permute(2, 0, 1) + clip_model.update(image_tensor.to(torch.float32).to(device).unsqueeze(0), prompt) + result = clip_model.compute().mean().item() + return {"CLIP": result} + + +def compute_clip_iqa(prompts, images, device: str = "cuda"): + clip_model = metrics.CLIPImageQualityAssessment(model_name_or_path="openai/clip-vit-large-patch14").to(device) + for prompt, img_path in tqdm(zip(prompts, images), desc="Computing CLIP-IQA score"): + image_data = Image.open(img_path).convert("RGB") + image_tensor = torch.from_numpy(np.array(image_data)).permute(2, 0, 1) + clip_model.update(image_tensor.to(torch.float32).to(device).unsqueeze(0)) + result = clip_model.compute().mean().item() + return {"CLIP-IQA": result} + + +def compute_image_reward_metrics(prompts, images, device="cuda"): + image_reward_model = reward.load("ImageReward-v1.0", device=device) + scores = [] + for prompt, img_path in tqdm(zip(prompts, images), desc="Computing image reward metrics"): + score = image_reward_model.score(prompt, img_path) + scores.append(score) + return {"ImageReward": np.mean(scores)} + + +metric_map = { + "clip": compute_clip, + "clip-iqa": compute_clip_iqa, + "imagereward": compute_image_reward_metrics, +} + + +def diffusion_eval( + pipe, + prompt_file, + metrics, + image_save_dir, + batch_size, + gen_kwargs, +): + dataloader, _, _ = get_diffusion_dataloader(prompt_file, nsamples=-1, bs=batch_size) + prompt_list = [] + image_list = [] + for image_ids, prompts in dataloader: + prompt_list.extend(prompts) + + new_ids = [] + new_prompts = [] + for idx, image_id in enumerate(image_ids): + image_id = image_id.item() + image_list.append(os.path.join(image_save_dir, str(image_id) + ".png")) + + if os.path.exists(os.path.join(image_save_dir, str(image_id) + ".png")): + continue + new_ids.append(image_id) + new_prompts.append(prompts[idx]) + + if len(new_prompts) == 0: + continue + + output = pipe(prompt=new_prompts, **gen_kwargs) + for idx, image_id in enumerate(new_ids): + output.images[idx].save(os.path.join(image_save_dir, str(image_id) + ".png")) + + result = {} + for metric in metrics: + result.update(metric_map[metric](prompt_list, image_list, pipe.device)) + + import tabulate + + print(tabulate.tabulate(result.items(), tablefmt="grid")) \ No newline at end of file diff --git a/auto_round/utils.py b/auto_round/utils.py index b472e4d88..08e0ef322 100644 --- a/auto_round/utils.py +++ b/auto_round/utils.py @@ -388,13 +388,21 @@ def get_block_names(model, quant_vision=False): """ from auto_round.special_model_handler import SPECIAL_MULTIMODAL_BLOCK - def _get_llm_block_names(model): - block_names = [] + def _search_block(name, module): + if hasattr(type(module), "__name__") and "ModuleList" in type(module).__name__: + return [(name, module)] target_modules = [] - for n, m in model.named_modules(): + for n, m in module.named_children(): if hasattr(type(m), "__name__") and "ModuleList" in type(m).__name__: - target_modules.append((n, m)) - break ## only find the first modulelist, may be not robust + target_modules.append((".".join(filter(None, (name, n))), m)) + else: + target_modules.extend(_search_block(".".join(filter(None, (name, n))), m)) + return target_modules + + def _get_llm_block_names(model): + block_names = [] + target_modules = _search_block("", model) + for i, target_m in enumerate(target_modules): block_names.append([]) for n, m in target_m[1].named_children(): @@ -441,7 +449,15 @@ def collect_best_params(block): return params -def block_forward(block, input_ids, input_others, amp=False, amp_dtype=torch.float16, device=torch.device("cpu")): +def block_forward( + block, + input_ids, + input_others, + amp=False, + amp_dtype=torch.float16, + device=torch.device("cpu"), + output_return_id=0, + ): """Performs a forward pass through a block with the given inputs. Args: @@ -451,6 +467,7 @@ def block_forward(block, input_ids, input_others, amp=False, amp_dtype=torch.flo amp: A boolean indicating whether to use automatic mixed precision. amp_dtype: The data type for automatic mixed precision. device: The target device. + output_return_id: if the output has more than one tenor, return the specified idx tensor. Returns: output: The output of the forward pass. @@ -467,8 +484,8 @@ def block_forward(block, input_ids, input_others, amp=False, amp_dtype=torch.flo output = block(input_ids, *input_tuple, **input_others) else: output = block(input_ids, *input_tuple, **input_others) - if isinstance(output, list) or isinstance(output, tuple): - output = output[0] + if isinstance(output_return_id, int) and (isinstance(output, list) or isinstance(output, tuple)): + output = output[output_return_id] return output @@ -1597,6 +1614,30 @@ def mllm_load_model( return model, processor, tokenizer, image_processor +def diffusion_load_model( + pretrained_model_name_or_path, + device="cpu", + torch_dtype="auto", + use_auto_mapping=True, + trust_remote_code=True, + model_dtype=None, + **kwargs, +): + device_str, use_auto_mapping = get_device_and_parallelism(device) + torch_dtype = "auto" + if device_str is not None and "hpu" in device_str: + torch_dtype = torch.bfloat16 + + pipelines = LazyImport("diffusers.pipelines") + + pipe = pipelines.auto_pipeline.AutoPipelineForText2Image.from_pretrained( + pretrained_model_name_or_path, torch_dtype=torch_dtype + ) + pipe = _to_model_dtype(pipe, model_dtype) + model = pipe.transformer + return pipe, model.to(device) + + def is_pure_text_model(model): """verify on: phi-3.5, Mistral-Small-3.1, gemma-3, qwen2-vl,""" if hasattr(model, "config") and hasattr(model.config, "vision_config"): @@ -2723,11 +2764,12 @@ def is_mllm_model(model_or_path: Union[str, torch.nn.Module]): return True if os.path.exists(os.path.join(model_path, "processor_config.json")): return True - with open(os.path.join(model_path, "config.json")) as f: - config = json.load(f) - for key in config.keys(): - if any([k in key for k in MM_KEYS]): - return True + if os.path.exists(os.path.join(model_path, "config.json")): + with open(os.path.join(model_path, "config.json")) as f: + config = json.load(f) + for key in config.keys(): + if any([k in key for k in MM_KEYS]): + return True if isinstance(model_or_path, torch.nn.Module): for name, module in model_or_path.named_modules(): @@ -2735,3 +2777,20 @@ def is_mllm_model(model_or_path: Union[str, torch.nn.Module]): return True return False + + +def is_diffusion_model(model_or_path: Union[str, object]): + if isinstance(model_or_path, str): + if not os.path.isdir(model_or_path): + try: + from huggingface_hub import hf_hub_download + index_file = hf_hub_download(model_or_path, "model_index.json") + except: + index_file = None + + elif os.path.exists(os.path.join(model_or_path, "model_index.json")): + index_file = os.path.join(model_or_path, "model_index.json") + return index_file is not None + else: + pipeline_utils = LazyImport("diffusers.pipelines.pipeline_utils") + return isinstance(model_or_path, pipeline_utils.DiffusionPipeline) diff --git a/test/test_cuda/test_diffusion.py b/test/test_cuda/test_diffusion.py new file mode 100644 index 000000000..bec281151 --- /dev/null +++ b/test/test_cuda/test_diffusion.py @@ -0,0 +1,77 @@ +import copy +import os +import re +import shutil +import sys +import unittest + +import requests + +sys.path.insert(0, "../..") + +from PIL import Image + +from auto_round import AutoRoundConfig +from auto_round.testing_utils import require_gptqmodel, require_optimum, require_vlm_env + + +class TestAutoRound(unittest.TestCase): + @classmethod + def setUpClass(self): + self.save_dir = "./saved" + self.model_name = "/dataset/FLUX.1-dev" + + @classmethod + def tearDownClass(self): + shutil.rmtree(self.save_dir, ignore_errors=True) + shutil.rmtree("runs", ignore_errors=True) + + @require_optimum + def test_diffusion_tune(self): + + from diffusers import AutoPipelineForText2Image + + from auto_round import AutoRoundDiffusion + + ## load the model + pipe = AutoPipelineForText2Image.from_pretrained(self.model_name) + model = pipe.transformer + + layer_config = {} + # skip some layers since it takes much time + for n, m in model.named_modules(): + if m.__class__.__name__ != "Linear": + continue + match = re.search(r"blocks\.(\d+)", n) + if match and int(match.group(1)) > 0: + layer_config[n] = {"bits": 16, "act_bits": 16, "data_type": "float", "act_data_type": "float"} + + ## quantize the model + autoround = AutoRoundDiffusion( + pipe, + tokenizer=None, + scheme="MXFP4", + iters=1, + nsamples=1, + layer_config=layer_config, + dataset="/dataset/captions_source.tsv", + ) + # skip model saving since it taks much time + autoround.quantize() + shutil.rmtree(self.save_dir, ignore_errors=True) + + def test_block_name(self): + from diffusers import AutoPipelineForText2Image + + from auto_round.utils import get_block_names + + pipe = AutoPipelineForText2Image.from_pretrained(self.model_name) + model = pipe.transformer + + block_name = get_block_names(model) + self.assertTrue(len(block_name) == 2) + self.assertTrue(any(["context_embedder" not in n for n in block_name])) + + +if __name__ == "__main__": + unittest.main() \ No newline at end of file From 128a4d9873e1479bccf626092df73a5f49399740 Mon Sep 17 00:00:00 2001 From: Mengni Wang Date: Wed, 24 Sep 2025 22:42:56 -0400 Subject: [PATCH 2/9] add dependence Signed-off-by: Mengni Wang --- auto_round/compressors/diffusion/eval.py | 4 +++- test/test_cuda/requirements_diffusion.txt | 3 +++ 2 files changed, 6 insertions(+), 1 deletion(-) create mode 100644 test/test_cuda/requirements_diffusion.txt diff --git a/auto_round/compressors/diffusion/eval.py b/auto_round/compressors/diffusion/eval.py index 64f81bc85..d2b5d3082 100644 --- a/auto_round/compressors/diffusion/eval.py +++ b/auto_round/compressors/diffusion/eval.py @@ -12,8 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +import importlib import os - import numpy as np import torch from PIL import Image @@ -70,6 +70,8 @@ def diffusion_eval( batch_size, gen_kwargs, ): + if not importlib.util.find_spec("clip") or not importlib.util.find_spec("ImageReward") or not importlib.util.find_spec("torchmetrics"): + raise ImportError("Please make sure clip, image-reward and torchmetrics are installed for diffusion model evaluation.") dataloader, _, _ = get_diffusion_dataloader(prompt_file, nsamples=-1, bs=batch_size) prompt_list = [] image_list = [] diff --git a/test/test_cuda/requirements_diffusion.txt b/test/test_cuda/requirements_diffusion.txt new file mode 100644 index 000000000..55908f6ac --- /dev/null +++ b/test/test_cuda/requirements_diffusion.txt @@ -0,0 +1,3 @@ +diffusers +image-reward +clip \ No newline at end of file From 5e89c184304e582f3ac18150ae647061ed960e86 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 25 Sep 2025 02:46:42 +0000 Subject: [PATCH 3/9] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- auto_round/__main__.py | 5 +++- auto_round/autoround.py | 4 +-- auto_round/compressors/base.py | 4 +-- auto_round/compressors/config.py | 2 +- .../compressors/diffusion/compressor.py | 27 +++++++++---------- auto_round/compressors/diffusion/dataset.py | 2 +- auto_round/compressors/diffusion/eval.py | 13 ++++++--- auto_round/utils.py | 27 ++++++++++--------- test/test_cuda/test_diffusion.py | 2 +- 9 files changed, 46 insertions(+), 40 deletions(-) diff --git a/auto_round/__main__.py b/auto_round/__main__.py index 0af4e926e..b806b583b 100644 --- a/auto_round/__main__.py +++ b/auto_round/__main__.py @@ -293,6 +293,7 @@ def __init__(self, *args, **kwargs): "--image_save_dir", default="./tmp_image_save", type=str, help="path to save generated images" ) + def setup_parser(): parser = BasicArgumentParser() @@ -502,7 +503,9 @@ def tune(args): quant_nontext_module=args.quant_nontext_module, extra_data_dir=args.extra_data_dir, template=args.template ) diffusion_config = DiffusionExtraConfig( - guidance_scale=args.guidance_scale, num_inference_steps=args.num_inference_steps, generator_seed=args.generator_seed + guidance_scale=args.guidance_scale, + num_inference_steps=args.num_inference_steps, + generator_seed=args.generator_seed, ) extra_config.tuning_config = tuning_config extra_config.scheme_config = scheme_config diff --git a/auto_round/autoround.py b/auto_round/autoround.py index 1b7f76380..e25e2125d 100644 --- a/auto_round/autoround.py +++ b/auto_round/autoround.py @@ -27,7 +27,7 @@ ) from auto_round.logger import deprecated, logger from auto_round.schemes import QuantizationScheme -from auto_round.utils import is_mllm_model, is_diffusion_model +from auto_round.utils import is_diffusion_model, is_mllm_model class AutoRound: @@ -625,4 +625,4 @@ def __init__( enable_torch_compile=enable_torch_compile, seed=seed, **kwargs, - ) \ No newline at end of file + ) diff --git a/auto_round/compressors/base.py b/auto_round/compressors/base.py index e622af4d0..54c4fc033 100644 --- a/auto_round/compressors/base.py +++ b/auto_round/compressors/base.py @@ -2709,9 +2709,7 @@ def _get_current_q_output(self, block, input_ids, input_others, indices, device) batch_dim=self.batch_dim, share_cache_keys=self.shared_cache_keys, ) - output_q = block_forward( - block, current_input_ids, current_input_others, self.amp, self.amp_dtype, device - ) + output_q = block_forward(block, current_input_ids, current_input_others, self.amp, self.amp_dtype, device) return output_q def _quantize_block( diff --git a/auto_round/compressors/config.py b/auto_round/compressors/config.py index c4f3e5486..27beee378 100644 --- a/auto_round/compressors/config.py +++ b/auto_round/compressors/config.py @@ -297,4 +297,4 @@ class MLLMExtraConfig(BaseExtraConfig): class DiffusionExtraConfig(BaseExtraConfig): guidance_scale: float = 7.5 num_inference_steps: int = 50 - generator_seed: int = None \ No newline at end of file + generator_seed: int = None diff --git a/auto_round/compressors/diffusion/compressor.py b/auto_round/compressors/diffusion/compressor.py index 70f3f697c..980820500 100644 --- a/auto_round/compressors/diffusion/compressor.py +++ b/auto_round/compressors/diffusion/compressor.py @@ -25,13 +25,13 @@ from auto_round.low_cpu_mem.utils import get_layers_before_block from auto_round.schemes import QuantizationScheme from auto_round.utils import ( + LazyImport, block_forward, clear_memory, + diffusion_load_model, extract_block_names_to_str, find_matching_blocks, get_block_names, - diffusion_load_model, - LazyImport ) pipeline_utils = LazyImport("diffusers.pipelines.pipeline_utils") @@ -41,6 +41,7 @@ "FluxSingleTransformerBlock": ["encoder_hidden_states", "hidden_states"], } + class DiffusionCompressor(BaseCompressor): """Class for automatic rounding-based quantization with Diffusion models. @@ -122,7 +123,7 @@ def __init__( self.quant_block_list = find_matching_blocks(model, all_blocks, to_quant_block_names) if to_quant_block_names is None: to_quant_block_names = extract_block_names_to_str(self.quant_block_list) - + if iters > 0 and batch_size != 1: logger.warning( f"reset batch_size({batch_size}) to 1 and " @@ -198,9 +199,7 @@ def _get_current_q_output(self, block, input_ids, input_others, indices, device) hidden_states = current_input_ids.pop("hidden_states") current_input_others.update(current_input_ids) current_input_ids = hidden_states - output_q = block_forward( - block, current_input_ids, current_input_others, self.amp, self.amp_dtype, device, idx - ) + output_q = block_forward(block, current_input_ids, current_input_others, self.amp, self.amp_dtype, device, idx) return output_q @torch.no_grad() @@ -253,9 +252,7 @@ def _get_block_outputs( if self.batch_size == 1: output[name].append(out.to(cache_device)) else: - output[name].extend( - list(torch.split(out.to(cache_device), 1, dim=self.batch_dim)) - ) + output[name].extend(list(torch.split(out.to(cache_device), 1, dim=self.batch_dim))) if self.low_gpu_mem_usage: clear_memory() @@ -273,8 +270,10 @@ def calib(self, nsamples, bs): nsamples (int): The number of samples to use for calibration. bs (int): The number of samples to use for calibration """ - logger.warning("Diffusion model will catch nsamples * num_inference_steps inputs, " - "you can reduce nsamples or num_inference_steps if OOM or take too much time.") + logger.warning( + "Diffusion model will catch nsamples * num_inference_steps inputs, " + "you can reduce nsamples or num_inference_steps if OOM or take too much time." + ) if isinstance(self.dataset, str): dataset = self.dataset.replace(" ", "") self.dataloader, self.batch_size, self.gradient_accumulate_steps = get_diffusion_dataloader( @@ -362,7 +361,5 @@ def save_quantized(self, output_dir=None, format="auto_round", inplace=True, **k Returns: object: The compressed model object. """ - compressed_model = super().save_quantized( - output_dir=output_dir, format=format, inplace=inplace, **kwargs - ) - return compressed_model \ No newline at end of file + compressed_model = super().save_quantized(output_dir=output_dir, format=format, inplace=inplace, **kwargs) + return compressed_model diff --git a/auto_round/compressors/diffusion/dataset.py b/auto_round/compressors/diffusion/dataset.py index 957d0d886..cb37d6b7f 100644 --- a/auto_round/compressors/diffusion/dataset.py +++ b/auto_round/compressors/diffusion/dataset.py @@ -89,4 +89,4 @@ def get_diffusion_dataloader( set_seed(seed) dataloader_params = {"batch_size": bs, "shuffle": True} - return DataLoader(dataset, **dataloader_params), bs, gradient_accumulate_steps \ No newline at end of file + return DataLoader(dataset, **dataloader_params), bs, gradient_accumulate_steps diff --git a/auto_round/compressors/diffusion/eval.py b/auto_round/compressors/diffusion/eval.py index d2b5d3082..5baed978b 100644 --- a/auto_round/compressors/diffusion/eval.py +++ b/auto_round/compressors/diffusion/eval.py @@ -14,6 +14,7 @@ import importlib import os + import numpy as np import torch from PIL import Image @@ -70,8 +71,14 @@ def diffusion_eval( batch_size, gen_kwargs, ): - if not importlib.util.find_spec("clip") or not importlib.util.find_spec("ImageReward") or not importlib.util.find_spec("torchmetrics"): - raise ImportError("Please make sure clip, image-reward and torchmetrics are installed for diffusion model evaluation.") + if ( + not importlib.util.find_spec("clip") + or not importlib.util.find_spec("ImageReward") + or not importlib.util.find_spec("torchmetrics") + ): + raise ImportError( + "Please make sure clip, image-reward and torchmetrics are installed for diffusion model evaluation." + ) dataloader, _, _ = get_diffusion_dataloader(prompt_file, nsamples=-1, bs=batch_size) prompt_list = [] image_list = [] @@ -102,4 +109,4 @@ def diffusion_eval( import tabulate - print(tabulate.tabulate(result.items(), tablefmt="grid")) \ No newline at end of file + print(tabulate.tabulate(result.items(), tablefmt="grid")) diff --git a/auto_round/utils.py b/auto_round/utils.py index 059428086..e7907d81d 100644 --- a/auto_round/utils.py +++ b/auto_round/utils.py @@ -468,14 +468,14 @@ def collect_best_params(block): def block_forward( - block, - input_ids, - input_others, - amp=False, - amp_dtype=torch.float16, - device=torch.device("cpu"), - output_return_id=0, - ): + block, + input_ids, + input_others, + amp=False, + amp_dtype=torch.float16, + device=torch.device("cpu"), + output_return_id=0, +): """Performs a forward pass through a block with the given inputs. Args: @@ -2802,11 +2802,12 @@ def is_mllm_model(model_or_path: Union[str, torch.nn.Module]): def is_diffusion_model(model_or_path: Union[str, object]): if isinstance(model_or_path, str): if not os.path.isdir(model_or_path): - try: - from huggingface_hub import hf_hub_download - index_file = hf_hub_download(model_or_path, "model_index.json") - except: - index_file = None + try: + from huggingface_hub import hf_hub_download + + index_file = hf_hub_download(model_or_path, "model_index.json") + except: + index_file = None elif os.path.exists(os.path.join(model_or_path, "model_index.json")): index_file = os.path.join(model_or_path, "model_index.json") diff --git a/test/test_cuda/test_diffusion.py b/test/test_cuda/test_diffusion.py index bec281151..a3a736324 100644 --- a/test/test_cuda/test_diffusion.py +++ b/test/test_cuda/test_diffusion.py @@ -74,4 +74,4 @@ def test_block_name(self): if __name__ == "__main__": - unittest.main() \ No newline at end of file + unittest.main() From f0000245842e33769845f48a4a2722d83d30d86d Mon Sep 17 00:00:00 2001 From: "Wang, Mengni" Date: Thu, 25 Sep 2025 10:47:51 +0800 Subject: [PATCH 4/9] Update autoround.py --- auto_round/autoround.py | 1 + 1 file changed, 1 insertion(+) diff --git a/auto_round/autoround.py b/auto_round/autoround.py index e25e2125d..fa71c3550 100644 --- a/auto_round/autoround.py +++ b/auto_round/autoround.py @@ -154,6 +154,7 @@ def __new__( else: if extra_config: extra_config.mllm_config = None + extra_config.diffusion_config = None model_cls.append(LLMCompressor) if enable_adam: From 41eb9ef67e42e6d33f56f4801422e998506e3a73 Mon Sep 17 00:00:00 2001 From: Mengni Wang Date: Thu, 25 Sep 2025 07:29:27 -0400 Subject: [PATCH 5/9] refine code Signed-off-by: Mengni Wang --- .azure-pipelines/scripts/ut/run_ut_cuda.sh | 1 + auto_round/autoround.py | 4 +-- auto_round/compressors/base.py | 21 +++++++++----- auto_round/compressors/config.py | 2 +- .../compressors/diffusion/compressor.py | 28 ++++++++++--------- auto_round/compressors/diffusion/dataset.py | 5 +++- auto_round/utils.py | 9 ++++-- test/test_cuda/test_diffusion.py | 9 ++++++ 8 files changed, 52 insertions(+), 27 deletions(-) diff --git a/.azure-pipelines/scripts/ut/run_ut_cuda.sh b/.azure-pipelines/scripts/ut/run_ut_cuda.sh index 02073e958..8580f760d 100644 --- a/.azure-pipelines/scripts/ut/run_ut_cuda.sh +++ b/.azure-pipelines/scripts/ut/run_ut_cuda.sh @@ -46,6 +46,7 @@ function run_unit_test() { CMAKE_ARGS="-DGGML_CUDA=on -DLLAVA_BUILD=off" uv pip install llama-cpp-python uv pip install 'git+https://github.com/ggml-org/llama.cpp.git#subdirectory=gguf-py' uv pip install -r requirements.txt + uv pip install -r requirements_diffusion.txt uv pip list export COVERAGE_RCFILE=${REPO_PATH}/.azure-pipelines/scripts/ut/.coverage diff --git a/auto_round/autoround.py b/auto_round/autoround.py index fa71c3550..87f9987e5 100644 --- a/auto_round/autoround.py +++ b/auto_round/autoround.py @@ -78,7 +78,7 @@ def __new__( seed: int = 42, # for adam enable_adam: bool = False, - # for MLLM + # for MLLM and Diffusion extra_config: ExtraConfig = None, **kwargs, ) -> BaseCompressor: @@ -593,7 +593,7 @@ def __init__( guidance_scale: float = 7.5, num_inference_steps: int = 50, generator_seed: int = None, - scheme: Union[str, dict, QuantizationScheme] = "W8A16", + scheme: Union[str, dict, QuantizationScheme] = "W4A16", layer_config: dict[str, Union[str, dict, QuantizationScheme]] = None, dataset: Union[str, list, tuple, torch.utils.data.DataLoader] = "coco2014", iters: int = 200, diff --git a/auto_round/compressors/base.py b/auto_round/compressors/base.py index 54c4fc033..d325bdbf2 100644 --- a/auto_round/compressors/base.py +++ b/auto_round/compressors/base.py @@ -1699,7 +1699,7 @@ def _quantize_via_rtn_blockwise(self, all_to_quantized_module_names: list[str]) cnt = 1 cnt += 1 - def _update_inputs(self, inputs, q_inputs): + def _update_inputs(self, inputs: dict, q_inputs: dict) -> tuple[dict, torch.Tensor]: keys = inputs.keys() input_id_str = [key for key in keys if key.startswith("hidden_state")] if len(input_id_str) != 1: @@ -1709,8 +1709,8 @@ def _update_inputs(self, inputs, q_inputs): ) inputs["input_ids"] = inputs.pop(input_id_str[0], None) if q_inputs is not None: - q_inputs["input_ids"] = q_inputs.pop(input_id_str[0], None) - return inputs, q_inputs["input_ids"] + q_inputs = q_inputs.pop(input_id_str[0], None) + return inputs, q_inputs def quantize(self) -> tuple[torch.nn.Module, dict[str, Any]]: """Quantize the model and return the quantized model along with layer configurations.The entry of AutoRound. @@ -2695,12 +2695,19 @@ def get_act_max_hook(module, input, output): continue return hook_handles - def _get_current_output(self, output, indices): + def _get_current_output(self, output: list[torch.Tensor], indices: list[int]) -> torch.Tensor: current_output = [output[x] for x in indices] current_output = torch.cat(current_output, dim=self.batch_dim) return current_output - def _get_current_q_output(self, block, input_ids, input_others, indices, device): + def _get_current_q_output( + self, + block: torch.nn.Module, + input_ids: list[torch.Tensor], + input_others: dict, + indices: list[int], + device: str, + ) -> torch.Tensor: current_input_ids, current_input_others = self._sampling_inputs( input_ids, input_others, @@ -2941,7 +2948,7 @@ def _quantize_block( clear_memory(input_ids) return None, output - def _split_inputs(self, inputs): + def _split_inputs(self, inputs: dict) -> tuple[torch.Tensor, dict]: input_ids = inputs["input_ids"] inputs.pop("input_ids", None) input_others = inputs @@ -3319,7 +3326,7 @@ def _step(self, scaler: Any, optimizer: Any, lr_schedule: Any): @torch.no_grad() def _sampling_inputs( cls, - input_ids: list[torch.Tensor], + input_ids: Union[list[torch.Tensor], dict], input_others: dict, indices: list[int], seqlen: int, diff --git a/auto_round/compressors/config.py b/auto_round/compressors/config.py index 27beee378..d42e13427 100644 --- a/auto_round/compressors/config.py +++ b/auto_round/compressors/config.py @@ -111,7 +111,7 @@ def __init__( guidance_scale (float): Control how much the image generation process follows the text prompt. The more it is, the more closely it follows the prompt (default is 7.5). num_inference_steps (int): The reference number of denoising steps (default is 50). - generator_seed (int): A sees that controls the initial noise from which an image is generated (default is None). + generator_seed (int): A seed that controls the initial noise for image generation (default is None). """ self.tuning_config = TuningExtraConfig( amp=amp, diff --git a/auto_round/compressors/diffusion/compressor.py b/auto_round/compressors/diffusion/compressor.py index 980820500..8c4acb9e4 100644 --- a/auto_round/compressors/diffusion/compressor.py +++ b/auto_round/compressors/diffusion/compressor.py @@ -85,7 +85,7 @@ def __init__( guidance_scale: float = 7.5, num_inference_steps: int = 50, generator_seed: int = None, - scheme: Union[str, dict, QuantizationScheme] = "W8A16", + scheme: Union[str, dict, QuantizationScheme] = "W4A16", layer_config: dict[str, Union[str, dict, QuantizationScheme]] = None, dataset: Union[str, list, tuple, torch.utils.data.DataLoader] = "coco2014", iters: int = 200, @@ -160,31 +160,33 @@ def __init__( **kwargs, ) - def _update_inputs(self, inputs, q_inputs): + def _update_inputs(self, inputs: dict, q_inputs: dict) -> tuple[dict, dict]: # flux transformer model's blocks will update hidden_states and encoder_hidden_states input_id_str = [key for key in inputs.keys() if "hidden_state" in key] if q_inputs is not None: q_inputs = {k: q_inputs.pop(k, None) for k in input_id_str} return inputs, q_inputs - def _split_inputs(self, inputs): + def _split_inputs(self, inputs: dict) -> tuple[dict, dict]: input_id_str = [key for key in inputs.keys() if "hidden_state" in key] input_ids = {k: inputs.pop(k, None) for k in input_id_str} input_others = inputs return input_ids, input_others - def _get_current_output(self, output, indices): - if isinstance(output, list): - current_output = [output[x] for x in indices] - current_output = torch.cat(current_output, dim=self.batch_dim) - - elif isinstance(output, dict): - assert "hidden_states" in output - current_output = [output["hidden_states"][x] for x in indices] - current_output = torch.cat(current_output, dim=self.batch_dim) + def _get_current_output(self, output: dict, indices: list[int]) -> torch.Tensor: + assert "hidden_states" in output + current_output = [output["hidden_states"][x] for x in indices] + current_output = torch.cat(current_output, dim=self.batch_dim) return current_output - def _get_current_q_output(self, block, input_ids, input_others, indices, device): + def _get_current_q_output( + self, + block: torch.nn.Module, + input_ids: dict, + input_others: dict, + indices: list[int], + device: str, + ) -> torch.Tensor: output_config = output_configs.get(block.__class__.__name__, []) idx = None if "hidden_states" not in output_config else output_config.index("hidden_states") current_input_ids, current_input_others = self._sampling_inputs( diff --git a/auto_round/compressors/diffusion/dataset.py b/auto_round/compressors/diffusion/dataset.py index cb37d6b7f..521faef7a 100644 --- a/auto_round/compressors/diffusion/dataset.py +++ b/auto_round/compressors/diffusion/dataset.py @@ -27,7 +27,10 @@ class DiffusionDataset(Dataset): """Dataset for supervised fine-tuning.""" COCO_URL = { - "coco2014": "https://github.com/mlcommons/inference/raw/refs/heads/master/text_to_image/coco2014/captions/captions_source.tsv" + "coco2014": ( + "https://github.com/mlcommons/inference/raw/refs/heads/master/text_to_image/" + "coco2014/captions/captions_source.tsv" + ) } def __init__( diff --git a/auto_round/utils.py b/auto_round/utils.py index e7907d81d..da0b3f975 100644 --- a/auto_round/utils.py +++ b/auto_round/utils.py @@ -1635,7 +1635,7 @@ def diffusion_load_model( pretrained_model_name_or_path, device="cpu", torch_dtype="auto", - use_auto_mapping=True, + use_auto_mapping=False, trust_remote_code=True, model_dtype=None, **kwargs, @@ -2801,6 +2801,7 @@ def is_mllm_model(model_or_path: Union[str, torch.nn.Module]): def is_diffusion_model(model_or_path: Union[str, object]): if isinstance(model_or_path, str): + index_file = None if not os.path.isdir(model_or_path): try: from huggingface_hub import hf_hub_download @@ -2812,6 +2813,8 @@ def is_diffusion_model(model_or_path: Union[str, object]): elif os.path.exists(os.path.join(model_or_path, "model_index.json")): index_file = os.path.join(model_or_path, "model_index.json") return index_file is not None + elif not isinstance(model_or_path, torch.nn.Module): + from diffusers.pipelines.pipeline_utils import DiffusionPipeline + return isinstance(model_or_path, DiffusionPipeline) else: - pipeline_utils = LazyImport("diffusers.pipelines.pipeline_utils") - return isinstance(model_or_path, pipeline_utils.DiffusionPipeline) + return False diff --git a/test/test_cuda/test_diffusion.py b/test/test_cuda/test_diffusion.py index a3a736324..93eadcd39 100644 --- a/test/test_cuda/test_diffusion.py +++ b/test/test_cuda/test_diffusion.py @@ -53,6 +53,7 @@ def test_diffusion_tune(self): scheme="MXFP4", iters=1, nsamples=1, + num_inference_steps=2, layer_config=layer_config, dataset="/dataset/captions_source.tsv", ) @@ -73,5 +74,13 @@ def test_block_name(self): self.assertTrue(any(["context_embedder" not in n for n in block_name])) + def test_diffusion_model_checker(self): + from auto_round.utils import is_diffusion_model + self.assertTrue(is_diffusion_model("/dataset/FLUX.1-dev")) + self.assertTrue(is_diffusion_model("/models/stable-diffusion-2-1")) + self.assertTrue(is_diffusion_model("/models/stable-diffusion-xl-base-1.0")) + self.assertFalse(is_diffusion_model("/models/Qwen3-8B")) + + if __name__ == "__main__": unittest.main() From 82b1dcc8e9f5aaca72ad9e3304dd991ba824d05f Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 25 Sep 2025 11:29:29 +0000 Subject: [PATCH 6/9] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- auto_round/compressors/base.py | 14 +++++++------- auto_round/compressors/diffusion/compressor.py | 14 +++++++------- auto_round/utils.py | 1 + test/test_cuda/test_diffusion.py | 2 +- 4 files changed, 16 insertions(+), 15 deletions(-) diff --git a/auto_round/compressors/base.py b/auto_round/compressors/base.py index d325bdbf2..39f1b6575 100644 --- a/auto_round/compressors/base.py +++ b/auto_round/compressors/base.py @@ -2701,13 +2701,13 @@ def _get_current_output(self, output: list[torch.Tensor], indices: list[int]) -> return current_output def _get_current_q_output( - self, - block: torch.nn.Module, - input_ids: list[torch.Tensor], - input_others: dict, - indices: list[int], - device: str, - ) -> torch.Tensor: + self, + block: torch.nn.Module, + input_ids: list[torch.Tensor], + input_others: dict, + indices: list[int], + device: str, + ) -> torch.Tensor: current_input_ids, current_input_others = self._sampling_inputs( input_ids, input_others, diff --git a/auto_round/compressors/diffusion/compressor.py b/auto_round/compressors/diffusion/compressor.py index 8c4acb9e4..4c4b9a00e 100644 --- a/auto_round/compressors/diffusion/compressor.py +++ b/auto_round/compressors/diffusion/compressor.py @@ -180,13 +180,13 @@ def _get_current_output(self, output: dict, indices: list[int]) -> torch.Tensor: return current_output def _get_current_q_output( - self, - block: torch.nn.Module, - input_ids: dict, - input_others: dict, - indices: list[int], - device: str, - ) -> torch.Tensor: + self, + block: torch.nn.Module, + input_ids: dict, + input_others: dict, + indices: list[int], + device: str, + ) -> torch.Tensor: output_config = output_configs.get(block.__class__.__name__, []) idx = None if "hidden_states" not in output_config else output_config.index("hidden_states") current_input_ids, current_input_others = self._sampling_inputs( diff --git a/auto_round/utils.py b/auto_round/utils.py index da0b3f975..8fd528a6f 100644 --- a/auto_round/utils.py +++ b/auto_round/utils.py @@ -2815,6 +2815,7 @@ def is_diffusion_model(model_or_path: Union[str, object]): return index_file is not None elif not isinstance(model_or_path, torch.nn.Module): from diffusers.pipelines.pipeline_utils import DiffusionPipeline + return isinstance(model_or_path, DiffusionPipeline) else: return False diff --git a/test/test_cuda/test_diffusion.py b/test/test_cuda/test_diffusion.py index 93eadcd39..e6db98def 100644 --- a/test/test_cuda/test_diffusion.py +++ b/test/test_cuda/test_diffusion.py @@ -73,9 +73,9 @@ def test_block_name(self): self.assertTrue(len(block_name) == 2) self.assertTrue(any(["context_embedder" not in n for n in block_name])) - def test_diffusion_model_checker(self): from auto_round.utils import is_diffusion_model + self.assertTrue(is_diffusion_model("/dataset/FLUX.1-dev")) self.assertTrue(is_diffusion_model("/models/stable-diffusion-2-1")) self.assertTrue(is_diffusion_model("/models/stable-diffusion-xl-base-1.0")) From 1397a01c5f72a2eb44cc4660e2752fbe5331725e Mon Sep 17 00:00:00 2001 From: "Wang, Mengni" Date: Thu, 25 Sep 2025 19:34:14 +0800 Subject: [PATCH 7/9] Update test_diffusion.py --- test/test_cuda/test_diffusion.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_cuda/test_diffusion.py b/test/test_cuda/test_diffusion.py index e6db98def..c5fa63471 100644 --- a/test/test_cuda/test_diffusion.py +++ b/test/test_cuda/test_diffusion.py @@ -57,7 +57,7 @@ def test_diffusion_tune(self): layer_config=layer_config, dataset="/dataset/captions_source.tsv", ) - # skip model saving since it taks much time + # skip model saving since it takes much time autoround.quantize() shutil.rmtree(self.save_dir, ignore_errors=True) From 1ba3548f0eae5b02fef453d32f6fe16aa9788a04 Mon Sep 17 00:00:00 2001 From: Mengni Wang Date: Thu, 25 Sep 2025 10:32:23 -0400 Subject: [PATCH 8/9] fix issue Signed-off-by: Mengni Wang --- auto_round/autoround.py | 6 +- auto_round/compressors/diffusion/dataset.py | 70 ++++++++++++++------- auto_round/utils.py | 5 +- 3 files changed, 52 insertions(+), 29 deletions(-) diff --git a/auto_round/autoround.py b/auto_round/autoround.py index 87f9987e5..68420d65c 100644 --- a/auto_round/autoround.py +++ b/auto_round/autoround.py @@ -146,11 +146,13 @@ def __new__( if (extra_config and not extra_config.mllm_config.is_default()) or is_mllm_model(model): logger.info("using MLLM mode for multimodal model.") model_cls.append(MLLMCompressor) - extra_config.diffusion_config = None + if extra_config: + extra_config.diffusion_config = None elif (extra_config and not extra_config.diffusion_config.is_default()) or is_diffusion_model(model): logger.info("using Diffusion mode for diffusion model.") model_cls.append(DiffusionCompressor) - extra_config.mllm_config = None + if extra_config: + extra_config.mllm_config = None else: if extra_config: extra_config.mllm_config = None diff --git a/auto_round/compressors/diffusion/dataset.py b/auto_round/compressors/diffusion/dataset.py index 521faef7a..ee1da49a6 100644 --- a/auto_round/compressors/diffusion/dataset.py +++ b/auto_round/compressors/diffusion/dataset.py @@ -23,15 +23,38 @@ from auto_round.utils import logger -class DiffusionDataset(Dataset): - """Dataset for supervised fine-tuning.""" +DIFFUSION_DATASET: Dict[str, Dataset] = {} + + +COCO_URL = { + "coco2014": ( + "https://github.com/mlcommons/inference/raw/refs/heads/master/text_to_image/" + "coco2014/captions/captions_source.tsv" + ) +} + + +def register_dataset(name_list): + """Class decorator to register a DATASET subclass to the registry. + + Decorator function used before a Pattern subclass. + + Args: + name: A string. Define the dataset type. - COCO_URL = { - "coco2014": ( - "https://github.com/mlcommons/inference/raw/refs/heads/master/text_to_image/" - "coco2014/captions/captions_source.tsv" - ) - } + Returns: + cls: The class of register. + """ + + def register(dataset): + for name in name_list.replace(" ", "").split(","): + DIFFUSION_DATASET[name] = dataset + + return register + +@register_dataset("local") +class Text2ImgDataset(Dataset): + """Dataset for supervised fine-tuning.""" def __init__( self, @@ -42,22 +65,9 @@ def __init__( self.captions = [] self.caption_ids = [] - if os.path.exists(dataset_path): - logger.info(f"use dataset {dataset_path}, loading from disk...") - df = pd.read_csv(dataset_path, sep="\t") - else: - from io import StringIO - - import requests - - dataset_path = "coco2014" + logger.info(f"use dataset {dataset_path}, loading from disk...") + df = pd.read_csv(dataset_path, sep="\t") - if dataset_path in self.COCO_URL: - logger.info(f"use dataset {dataset_path}, downloading ...") - text_data = requests.get(self.COCO_URL[dataset_path]).text - df = pd.read_csv(StringIO(text_data), sep="\t") - else: - raise KeyError(f"{dataset_path} is not support, we support {self.COCO_URL.keys()}.") for index, row in df.iterrows(): if nsamples > 0 and index + 1 > nsamples: break @@ -88,7 +98,19 @@ def get_diffusion_dataloader( Returns: DataLoader: The DataLoader for the calibrated datasets. """ - dataset = DiffusionDataset(dataset, nsamples) + if dataset in COCO_URL: + import requests + + logger.info(f"use dataset {dataset}, downloading ...") + text_data = requests.get(COCO_URL[dataset]).text + with open("captions_source.tsv", "w") as f: + f.write(text_data) + dataset = "captions_source.tsv" + + if isinstance(dataset, str) and os.path.exists(dataset): + dataset = DIFFUSION_DATASET["local"](dataset, nsamples) + else: + raise ValueError("Only support coco2014 dataset or loading local tsv file now.") set_seed(seed) dataloader_params = {"batch_size": bs, "shuffle": True} diff --git a/auto_round/utils.py b/auto_round/utils.py index 8fd528a6f..d39520f42 100644 --- a/auto_round/utils.py +++ b/auto_round/utils.py @@ -2814,8 +2814,7 @@ def is_diffusion_model(model_or_path: Union[str, object]): index_file = os.path.join(model_or_path, "model_index.json") return index_file is not None elif not isinstance(model_or_path, torch.nn.Module): - from diffusers.pipelines.pipeline_utils import DiffusionPipeline - - return isinstance(model_or_path, DiffusionPipeline) + pipeline_utils = LazyImport("diffusers.pipelines.pipeline_utils") + return isinstance(model_or_path, pipeline_utils.DiffusionPipeline) else: return False From 5f6621e8dd601b688ca22f10c015138f9282e4fb Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 25 Sep 2025 14:33:26 +0000 Subject: [PATCH 9/9] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- auto_round/compressors/diffusion/dataset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/auto_round/compressors/diffusion/dataset.py b/auto_round/compressors/diffusion/dataset.py index ee1da49a6..a716a8a58 100644 --- a/auto_round/compressors/diffusion/dataset.py +++ b/auto_round/compressors/diffusion/dataset.py @@ -22,7 +22,6 @@ from auto_round.utils import logger - DIFFUSION_DATASET: Dict[str, Dataset] = {} @@ -52,6 +51,7 @@ def register(dataset): return register + @register_dataset("local") class Text2ImgDataset(Dataset): """Dataset for supervised fine-tuning."""