Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion auto_round/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from auto_round.autoround import AutoRound

# support for old api
from auto_round.autoround import AutoRoundLLM, AutoRoundMLLM, AutoRoundAdam
from auto_round.autoround import AutoRoundLLM, AutoRoundMLLM, AutoRoundAdam, AutoRoundDiffusion
from auto_round.utils import LazyImport


Expand Down
80 changes: 78 additions & 2 deletions auto_round/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,25 @@ def __init__(self, *args, **kwargs):
help="the template for building training dataset. It can be a custom one.",
)

## ===================== diffusion model ==================
self.add_argument(
"--guidance_scale",
default=7.5,
type=float,
)

self.add_argument(
"--num_inference_steps",
default=50,
type=int,
)

self.add_argument(
"--generator_seed",
default=None,
type=int,
)

## ======================= eval =======================
self.add_argument(
"--tasks",
Expand Down Expand Up @@ -258,6 +277,21 @@ def __init__(self, *args, **kwargs):
"--eval_model_dtype", default=None, type=str, help="the torch_dytpe to load the model for evaluation."
)

## ======================= diffusion model eval =======================
self.add_argument("--prompt_file", default=None, type=str, help="the prompt file to load prmpt.")

self.add_argument("--prompt", default=None, type=str, help="the prompt for test.")

self.add_argument(
"--metrics",
"--metric",
default="clip",
help="support clip, clip-iqa, imagereward",
)

self.add_argument(
"--image_save_dir", default="./tmp_image_save", type=str, help="path to save generated images"
)

def setup_parser():
parser = BasicArgumentParser()
Expand Down Expand Up @@ -427,6 +461,7 @@ def tune(args):
)

from auto_round.compressors import (
DiffusionExtraConfig,
ExtraConfig,
MLLMExtraConfig,
SchemeExtraConfig,
Expand Down Expand Up @@ -463,9 +498,13 @@ def tune(args):
mllm_config = MLLMExtraConfig(
quant_nontext_module=args.quant_nontext_module, extra_data_dir=args.extra_data_dir, template=args.template
)
diffusion_config = DiffusionExtraConfig(
guidance_scale=args.guidance_scale, num_inference_steps=args.num_inference_steps, generator_seed=args.generator_seed
)
extra_config.tuning_config = tuning_config
extra_config.scheme_config = scheme_config
extra_config.mllm_config = mllm_config
extra_config.diffusion_config = diffusion_config

autoround: BaseCompressor = AutoRound(
model=model_name,
Expand Down Expand Up @@ -522,6 +561,45 @@ def tune(args):
model.eval()
clear_memory()

eval_model_dtype = get_model_dtype(args.eval_model_dtype, "auto")

# diffusion model has different evaluation path
if getattr(autoround, "diffusion", False):
pipe = autoround.pipe
pipe.to(model.dtype)
pipe.transformer = model
device_str = detect_device(device_str)
pipe = pipe.to(device_str)
if pipe.dtype != eval_model_dtype and eval_model_dtype != "auto":
pipe.to(getattr(torch, eval_model_dtype))

gen_kwargs = {
"guidance_scale": args.guidance_scale,
"output_type": "pil",
"num_inference_steps": args.num_inference_steps,
"generator": (
None
if args.generator_seed is None
else torch.Generator(device=pipe.device).manual_seed(args.generator_seed)
),
}
if not os.path.exists(args.image_save_dir):
os.makedirs(args.image_save_dir)

if args.prompt is not None:
outputs = pipe(prompt=args.prompt, **gen_kwargs)
outputs.images[0].save(os.path.join(args.image_save_dir, "img.png"))
logger.info(
f"Image generated with prompt {args.prompt} is saved as {os.path.join(args.image_save_dir, 'img.png')}"
)

if args.prompt_file is not None:
from auto_round.compressors.diffusion import diffusion_eval

metrics = args.metrics.split(",")
diffusion_eval(pipe, args.prompt_file, metrics, args.image_save_dir, 1, gen_kwargs)
return

lm_eval_version = get_library_version("lm-eval")

eval_folder = folders[-1]
Expand All @@ -543,8 +621,6 @@ def tune(args):

import time

eval_model_dtype = get_model_dtype(args.eval_model_dtype, "auto")

if autoround.act_bits <= 8 or eval_gguf_model:
if eval_gguf_model:
# for file in os.listdir(eval_folder):
Expand Down
88 changes: 87 additions & 1 deletion auto_round/autoround.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,14 @@
from auto_round.compressors import (
AdamCompressor,
BaseCompressor,
DiffusionCompressor,
ExtraConfig,
LLMCompressor,
MLLMCompressor,
)
from auto_round.logger import deprecated, logger
from auto_round.schemes import QuantizationScheme
from auto_round.utils import is_mllm_model
from auto_round.utils import is_mllm_model, is_diffusion_model


class AutoRound:
Expand Down Expand Up @@ -145,6 +146,11 @@ def __new__(
if (extra_config and not extra_config.mllm_config.is_default()) or is_mllm_model(model):
logger.info("using MLLM mode for multimodal model.")
model_cls.append(MLLMCompressor)
extra_config.diffusion_config = None
elif (extra_config and not extra_config.diffusion_config.is_default()) or is_diffusion_model(model):
logger.info("using Diffusion mode for diffusion model.")
model_cls.append(DiffusionCompressor)
extra_config.mllm_config = None
else:
if extra_config:
extra_config.mllm_config = None
Expand Down Expand Up @@ -540,3 +546,83 @@ def __init__(
seed=seed,
**kwargs,
)


@deprecated("AutoRound")
class AutoRoundDiffusion(DiffusionCompressor):
"""Class for automatic rounding-based quantization with Diffusion models.
Args:
model: The PyTorch model to be quantized.
tokenizer: An optional tokenizer for processing input data, is not used for diffusion models.
guidance_scale (float): Control how much the image generation process follows the text prompt.
The more it is, the more closely it follows the prompt (default is 7.5).
num_inference_steps (int): The reference number of denoising steps (default is 50).
generator_seed (int): A sees that controls the initial noise from which an image is generated (default is None).
scheme: (str| dict | QuantizationScheme ): A preset scheme that defines the quantization configurations.
layer_config (dict): Configuration for weight quantization (default is None).
dataset: The path or name of the calib dataset.
iters (int): Number of iterations (default is 200).
seqlen (int): Length of the sequence.
nsamples (int): Number of samples (default is 128).
batch_size (int): Batch size for training (default is 8).
gradient_accumulate_steps (int): Number of gradient accumulation steps (default is 1).
low_gpu_mem_usage (bool): Whether to use low GPU memory (default is False).
device_map (str | dict | int | torch.device, optional): Device placement map. Defaults to 0.
enable_torch_compile (bool): Whether to enable torch compile to optimize quant_block/layer
**kwargs: Additional keyword arguments.
"""

bits: int | None
group_size: int | None
sym: bool | None
data_type: str | None
act_bits: int | None
act_group_size: int | None
act_sym: bool | None
act_data_type: str | None
act_dynamic: bool | None
super_bits: int | None
super_group_size: int | None

def __init__(
self,
model: Union[object, str],
tokenizer=None,
guidance_scale: float = 7.5,
num_inference_steps: int = 50,
generator_seed: int = None,
scheme: Union[str, dict, QuantizationScheme] = "W8A16",
layer_config: dict[str, Union[str, dict, QuantizationScheme]] = None,
dataset: Union[str, list, tuple, torch.utils.data.DataLoader] = "coco2014",
iters: int = 200,
seqlen: int = 2048,
nsamples: int = 128,
batch_size: int = 8,
gradient_accumulate_steps: int = 1,
low_gpu_mem_usage: bool = False,
device_map: Union[str, torch.device, int, dict] = 0,
enable_torch_compile: bool = False,
seed: int = 42,
**kwargs,
):
super().__init__(
model=model,
tokenizer=None,
guidance_scale=guidance_scale,
num_inference_steps=num_inference_steps,
generator_seed=generator_seed,
scheme=scheme,
layer_config=layer_config,
dataset=dataset,
iters=iters,
seqlen=seqlen,
nsamples=nsamples,
batch_size=batch_size,
gradient_accumulate_steps=gradient_accumulate_steps,
low_gpu_mem_usage=low_gpu_mem_usage,
device_map=device_map,
enable_torch_compile=enable_torch_compile,
seed=seed,
**kwargs,
)
2 changes: 2 additions & 0 deletions auto_round/compressors/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@

from auto_round.compressors.base import *
from auto_round.compressors.mllm.compressor import MLLMCompressor
from auto_round.compressors.diffusion.compressor import DiffusionCompressor
from auto_round.compressors.config import (
DiffusionExtraConfig,
ExtraConfig,
MLLMExtraConfig,
SchemeExtraConfig,
Expand Down
Loading