Skip to content

Commit 8fa4ebd

Browse files
Support full int8 quantization for diffusers (#1193)
* Support full int8 quantization for diffusers * Add SD quant API test * apply black * apply style * reapply black * Apply suggestions from code review --------- Co-authored-by: Ilyas Moutawwakil <[email protected]>
1 parent e6ed9f6 commit 8fa4ebd

File tree

6 files changed

+185
-26
lines changed

6 files changed

+185
-26
lines changed

optimum/commands/export/openvino.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -364,6 +364,9 @@ def run(self):
364364
)
365365

366366
if self.args.quant_mode in ["nf4_f8e4m3", "nf4_f8e5m2", "int4_f8e4m3", "int4_f8e5m2"]:
367+
if library_name == "diffusers":
368+
raise NotImplementedError("Mixed precision quantization isn't supported for diffusers.")
369+
367370
wc_config = prepare_wc_config(self.args, _DEFAULT_4BIT_CONFIG)
368371
wc_dtype, q_dtype = self.args.quant_mode.split("_")
369372
wc_config["dtype"] = wc_dtype
@@ -421,7 +424,7 @@ def run(self):
421424

422425
model_cls = OVSanaPipeline
423426
else:
424-
raise NotImplementedError(f"Quantization in hybrid mode isn't supported for class {class_name}.")
427+
raise NotImplementedError(f"Quantization isn't supported for class {class_name}.")
425428

426429
model = model_cls.from_pretrained(self.args.model, export=True, quantization_config=quantization_config)
427430
model.save_pretrained(self.args.output)

optimum/intel/openvino/configuration.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -789,10 +789,12 @@ def post_init(self):
789789

790790
if self.dataset is not None:
791791
speech_to_text_datasets = list(PREDEFINED_SPEECH_TO_TEXT_DATASETS.keys())
792-
if self.dataset not in LANGUAGE_DATASETS + speech_to_text_datasets:
792+
stable_diffusion_datasets = list(PREDEFINED_SD_DATASETS.keys())
793+
if self.dataset not in LANGUAGE_DATASETS + speech_to_text_datasets + stable_diffusion_datasets:
793794
raise ValueError(
794-
f"""You can only choose between the following datasets: {LANGUAGE_DATASETS} for LLMs or
795-
{speech_to_text_datasets} for speech-to-text models, but we found {self.dataset}."""
795+
f"""You can only choose between the following datasets: {LANGUAGE_DATASETS} for LLMs,
796+
{speech_to_text_datasets} for speech-to-text models or
797+
{stable_diffusion_datasets} for diffusion models, but we found {self.dataset}."""
796798
)
797799

798800
if self.bits != 8:

optimum/intel/openvino/modeling_diffusion.py

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -534,7 +534,7 @@ def _from_pretrained(
534534
else:
535535
# why is this quantization not performed in __init__?
536536
if ov_pipeline_class.export_feature != "text-to-image":
537-
raise NotImplementedError(f"Quantization in hybrid mode is not supported for {cls.__name__}")
537+
raise NotImplementedError(f"Quantization is not supported for {cls.__name__}")
538538

539539
from optimum.intel import OVQuantizer
540540

@@ -548,10 +548,13 @@ def _from_pretrained(
548548
# same as in DiffusionPipeline.from_pretrained, we save where the model was instantiated from
549549
ov_pipeline.register_to_config(_name_or_path=config.get("_name_or_path", str(model_id)))
550550

551-
hybrid_quantization_config = deepcopy(quantization_config)
552-
hybrid_quantization_config.quant_method = OVQuantizationMethod.HYBRID
553551
quantizer = OVQuantizer(ov_pipeline)
554-
quantizer.quantize(ov_config=OVConfig(quantization_config=hybrid_quantization_config))
552+
if isinstance(quantization_config, OVWeightQuantizationConfig):
553+
hybrid_quantization_config = deepcopy(quantization_config)
554+
hybrid_quantization_config.quant_method = OVQuantizationMethod.HYBRID
555+
quantizer.quantize(ov_config=OVConfig(quantization_config=hybrid_quantization_config))
556+
else:
557+
quantizer.quantize(ov_config=OVConfig(quantization_config=quantization_config))
555558

556559
return ov_pipeline
557560
ov_pipeline = ov_pipeline_class(
@@ -878,9 +881,11 @@ def reshape(
878881
self.text_encoder.model = self._reshape_text_encoder(
879882
self.text_encoder.model,
880883
batch_size,
881-
getattr(self.tokenizer, "model_max_length", -1)
882-
if "Gemma" not in self.tokenizer.__class__.__name__
883-
else -1,
884+
(
885+
getattr(self.tokenizer, "model_max_length", -1)
886+
if "Gemma" not in self.tokenizer.__class__.__name__
887+
else -1
888+
),
884889
)
885890

886891
if self.text_encoder_2 is not None:

optimum/intel/openvino/quantization.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -440,6 +440,16 @@ def _quantize_ovbasemodel(
440440
# Quantize model(s)
441441
if isinstance(self.model, _OVModelForWhisper):
442442
self._quantize_whisper_model(quantization_config, calibration_dataset, **kwargs)
443+
elif is_diffusers_available() and isinstance(self.model, OVDiffusionPipeline):
444+
for name, sub_model in self.model.ov_submodels.items():
445+
if name not in ("unet", "transformer"):
446+
_weight_only_quantization(sub_model, OVWeightQuantizationConfig(bits=8), **kwargs)
447+
else:
448+
quantized_vision_model = _full_quantization(
449+
sub_model, quantization_config, calibration_dataset, **kwargs
450+
)
451+
getattr(self.model, name).model = quantized_vision_model
452+
self.model.clear_requests()
443453
else:
444454
quantized_model = _full_quantization(
445455
self.model.model, quantization_config, calibration_dataset, **kwargs
@@ -450,6 +460,9 @@ def _quantize_ovbasemodel(
450460
if calibration_dataset is None:
451461
raise ValueError("Calibration dataset is required to run quantization.")
452462

463+
if is_diffusers_available() and isinstance(self.model, OVDiffusionPipeline):
464+
raise NotImplementedError("Mixed precision quantization isn't supported for diffusers.")
465+
453466
quantized_model = _mixed_quantization(self.model.model, quantization_config, calibration_dataset, **kwargs)
454467
self.model.model = quantized_model
455468
self.model.request = None

tests/openvino/test_exporters_cli.py

Lines changed: 68 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -189,19 +189,23 @@ class OVCLIExportTestCase(unittest.TestCase):
189189
"int8",
190190
"--dataset librispeech --num-samples 1 --smooth-quant-alpha 0.9 --trust-remote-code",
191191
[14, 22, 21] if is_transformers_version("<=", "4.36.0") else [14, 22, 25],
192-
[{"int8": 14}, {"int8": 21}, {"int8": 17}]
193-
if is_transformers_version("<=", "4.36.0")
194-
else [{"int8": 14}, {"int8": 22}, {"int8": 18}],
192+
(
193+
[{"int8": 14}, {"int8": 21}, {"int8": 17}]
194+
if is_transformers_version("<=", "4.36.0")
195+
else [{"int8": 14}, {"int8": 22}, {"int8": 18}]
196+
),
195197
),
196198
(
197199
"automatic-speech-recognition-with-past",
198200
"whisper",
199201
"f8e4m3",
200202
"--dataset librispeech --num-samples 1 --smooth-quant-alpha 0.9 --trust-remote-code",
201203
[14, 22, 21] if is_transformers_version("<=", "4.36.0") else [14, 22, 25],
202-
[{"f8e4m3": 14}, {"f8e4m3": 21}, {"f8e4m3": 17}]
203-
if is_transformers_version("<=", "4.36.0")
204-
else [{"f8e4m3": 14}, {"f8e4m3": 22}, {"f8e4m3": 18}],
204+
(
205+
[{"f8e4m3": 14}, {"f8e4m3": 21}, {"f8e4m3": 17}]
206+
if is_transformers_version("<=", "4.36.0")
207+
else [{"f8e4m3": 14}, {"f8e4m3": 22}, {"f8e4m3": 18}]
208+
),
205209
),
206210
(
207211
"text-generation",
@@ -263,6 +267,62 @@ class OVCLIExportTestCase(unittest.TestCase):
263267
{"f8e5m2": 2, "int4": 28},
264268
],
265269
),
270+
(
271+
"stable-diffusion",
272+
"stable-diffusion",
273+
"int8",
274+
"--dataset conceptual_captions --num-samples 1 --trust-remote-code",
275+
[
276+
112,
277+
0,
278+
0,
279+
0,
280+
],
281+
[
282+
{"int8": 121},
283+
{"int8": 42},
284+
{"int8": 34},
285+
{"int8": 64},
286+
],
287+
),
288+
(
289+
"stable-diffusion-xl",
290+
"stable-diffusion-xl",
291+
"f8e5m2",
292+
"--dataset laion/220k-GPT4Vision-captions-from-LIVIS --num-samples 1 --trust-remote-code",
293+
[
294+
174,
295+
0,
296+
0,
297+
0,
298+
0,
299+
],
300+
[
301+
{"f8e5m2": 183},
302+
{"int8": 42},
303+
{"int8": 34},
304+
{"int8": 64},
305+
{"int8": 66},
306+
],
307+
),
308+
(
309+
"latent-consistency",
310+
"latent-consistency",
311+
"f8e4m3",
312+
"--dataset laion/filtered-wit --num-samples 1 --trust-remote-code",
313+
[
314+
79,
315+
0,
316+
0,
317+
0,
318+
],
319+
[
320+
{"f8e4m3": 84},
321+
{"int8": 42},
322+
{"int8": 34},
323+
{"int8": 40},
324+
],
325+
),
266326
]
267327

268328
TEST_4BIT_CONFIGURATIONS = [
@@ -709,6 +769,8 @@ def test_exporters_cli_full_quantization(
709769
expected_fake_nodes_per_model = expected_fake_nodes_per_model[:-1]
710770
elif "text-generation" in task:
711771
submodels = [model]
772+
elif any(x in task for x in ("stable-diffusion", "latent-consistency")):
773+
submodels = model.ov_submodels.values()
712774
else:
713775
raise Exception("Unexpected task.")
714776

tests/openvino/test_quantization.py

Lines changed: 83 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -19,26 +19,22 @@
1919
import itertools
2020
import logging
2121
import unittest
22-
from collections import defaultdict, Iterable
22+
from collections import defaultdict
23+
from collections.abc import Iterable
2324
from enum import Enum
2425
from functools import partial
2526
from typing import Union, Type
2627

2728
import openvino as ov
2829
import pytest
29-
import evaluate
3030
import numpy as np
3131
import torch
32-
from datasets import load_dataset
3332
from parameterized import parameterized
3433
import nncf
3534
from transformers import (
3635
AutoModelForQuestionAnswering,
37-
AutoModelForSequenceClassification,
3836
AutoTokenizer,
3937
AutoProcessor,
40-
TrainingArguments,
41-
default_data_collator,
4238
)
4339
from transformers.testing_utils import slow
4440
from transformers.utils.quantization_config import QuantizationMethod
@@ -116,9 +112,11 @@ class OVQuantizerTest(unittest.TestCase):
116112
smooth_quant_alpha=0.95,
117113
),
118114
[14, 22, 21] if is_transformers_version("<=", "4.36.0") else [14, 22, 25],
119-
[{"int8": 14}, {"int8": 21}, {"int8": 17}]
120-
if is_transformers_version("<=", "4.36.0")
121-
else [{"int8": 14}, {"int8": 22}, {"int8": 18}],
115+
(
116+
[{"int8": 14}, {"int8": 21}, {"int8": 17}]
117+
if is_transformers_version("<=", "4.36.0")
118+
else [{"int8": 14}, {"int8": 22}, {"int8": 18}]
119+
),
122120
),
123121
(
124122
OVModelForCausalLM,
@@ -234,6 +232,77 @@ class OVQuantizerTest(unittest.TestCase):
234232
{"f8e5m2": 2, "int4": 28},
235233
],
236234
),
235+
(
236+
OVStableDiffusionPipeline,
237+
"stable-diffusion",
238+
dict(
239+
weight_only=False,
240+
dataset="conceptual_captions",
241+
num_samples=1,
242+
processor=MODEL_NAMES["stable-diffusion"],
243+
trust_remote_code=True,
244+
),
245+
[
246+
112,
247+
0,
248+
0,
249+
0,
250+
],
251+
[
252+
{"int8": 121},
253+
{"int8": 42},
254+
{"int8": 34},
255+
{"int8": 64},
256+
],
257+
),
258+
(
259+
OVStableDiffusionXLPipeline,
260+
"stable-diffusion-xl",
261+
dict(
262+
weight_only=False,
263+
dtype="f8e5m2",
264+
dataset="laion/220k-GPT4Vision-captions-from-LIVIS",
265+
num_samples=1,
266+
processor=MODEL_NAMES["stable-diffusion-xl"],
267+
trust_remote_code=True,
268+
),
269+
[
270+
174,
271+
0,
272+
0,
273+
0,
274+
0,
275+
],
276+
[
277+
{"f8e5m2": 183},
278+
{"int8": 42},
279+
{"int8": 34},
280+
{"int8": 64},
281+
{"int8": 66},
282+
],
283+
),
284+
(
285+
OVLatentConsistencyModelPipeline,
286+
"latent-consistency",
287+
OVQuantizationConfig(
288+
dtype="f8e4m3",
289+
dataset="laion/filtered-wit",
290+
num_samples=1,
291+
trust_remote_code=True,
292+
),
293+
[
294+
79,
295+
0,
296+
0,
297+
0,
298+
],
299+
[
300+
{"f8e4m3": 84},
301+
{"int8": 42},
302+
{"int8": 34},
303+
{"int8": 40},
304+
],
305+
),
237306
]
238307

239308
@parameterized.expand(SUPPORTED_ARCHITECTURES_TORCH_MODEL)
@@ -359,6 +428,11 @@ def test_ov_model_static_quantization_with_auto_dataset(
359428
tokens = tokenizer("This is a sample input", return_tensors="pt")
360429
outputs = ov_model(**tokens)
361430
self.assertTrue("logits" in outputs)
431+
elif any(
432+
x == model_cls
433+
for x in (OVStableDiffusionPipeline, OVStableDiffusionXLPipeline, OVLatentConsistencyModelPipeline)
434+
):
435+
submodels = ov_model.ov_submodels.values()
362436
else:
363437
raise Exception("Unexpected model class.")
364438

0 commit comments

Comments
 (0)