make common utility.

sayakpaul · sayakpaul · commit edf66b7953b0 · 2025-06-07T09:01:24.000+05:30
diff --git a/tests/quantization/bnb/test_4bit.py b/tests/quantization/bnb/test_4bit.py
@@ -45,13 +45,14 @@
     require_peft_backend,
     require_torch,
     require_torch_accelerator,
-    require_torch_gpu,
     require_torch_version_greater,
     require_transformers_version_greater,
     slow,
     torch_device,
 )
 
+from ..utils import QuantCompileMiscTests
+
 
 def get_some_linear_layer(model):
     if model.__class__.__name__ in ["SD3Transformer2DModel", "FluxTransformer2DModel"]:
@@ -860,23 +861,9 @@ def test_fp4_double_safe(self):
         self.test_serialization(quant_type="fp4", double_quant=True, safe_serialization=True)
 
 
-@require_torch_gpu
-@slow
-class Bnb4BitCompileTests(unittest.TestCase):
-    def setUp(self):
-        super().setUp()
-        gc.collect()
-        backend_empty_cache(torch_device)
-        torch.compiler.reset()
-
-    def tearDown(self):
-        super().tearDown()
-        gc.collect()
-        backend_empty_cache(torch_device)
-        torch.compiler.reset()
-
+class Bnb4BitCompileTests(QuantCompileMiscTests):
     @require_torch_version_greater("2.7.1")
-    def test_torch_compile_4bit(self):
+    def test_torch_compile(self):
         torch._dynamo.config.capture_dynamic_output_shape_ops = True
 
         quantization_config = PipelineQuantizationConfig(
@@ -886,15 +873,6 @@ def test_torch_compile_4bit(self):
                 "bnb_4bit_quant_type": "nf4",
                 "bnb_4bit_compute_dtype": torch.bfloat16,
             },
-            components_to_quantize=["transformer"],
+            components_to_quantize=["transformer", "text_encoder_2"],
         )
-        pipe = DiffusionPipeline.from_pretrained(
-            "stabilityai/stable-diffusion-3-medium-diffusers",
-            quantization_config=quantization_config,
-            torch_dtype=torch.bfloat16,
-        ).to("cuda")
-        pipe.transformer.compile(fullgraph=True)
-
-        for _ in range(2):
-            # with torch._dynamo.config.patch(error_on_recompile=True):
-            pipe("a dog", num_inference_steps=4, max_sequence_length=16, height=256, width=256)
+        super().test_torch_compile(quantization_config=quantization_config)
diff --git a/tests/quantization/bnb/test_mixed_int8.py b/tests/quantization/bnb/test_mixed_int8.py
@@ -28,6 +28,7 @@
     SD3Transformer2DModel,
     logging,
 )
+from diffusers.quantizers import PipelineQuantizationConfig
 from diffusers.utils import is_accelerate_version
 from diffusers.utils.testing_utils import (
     CaptureLogger,
@@ -42,11 +43,14 @@
     require_peft_version_greater,
     require_torch,
     require_torch_accelerator,
+    require_torch_version_greater_equal,
     require_transformers_version_greater,
     slow,
     torch_device,
 )
 
+from ..utils import QuantCompileMiscTests
+
 
 def get_some_linear_layer(model):
     if model.__class__.__name__ in ["SD3Transformer2DModel", "FluxTransformer2DModel"]:
@@ -773,3 +777,18 @@ def test_serialization_sharded(self):
         out_0 = self.model_0(**inputs)[0]
         out_1 = model_1(**inputs)[0]
         self.assertTrue(torch.equal(out_0, out_1))
+
+
+class Bnb8BitCompileTests(QuantCompileMiscTests):
+    @require_torch_version_greater_equal("2.6.0")
+    def test_torch_compile(self):
+        torch._dynamo.config.capture_dynamic_output_shape_ops = True
+
+        quantization_config = PipelineQuantizationConfig(
+            quant_backend="bitsandbytes_8bit",
+            quant_kwargs={
+                "load_in_8bit": True,
+            },
+            components_to_quantize=["transformer", "text_encoder_2"],
+        )
+        super().test_torch_compile(quantization_config=quantization_config, torch_dtype=torch.float16)
diff --git a/tests/quantization/test_torch_compile_utils.py b/tests/quantization/test_torch_compile_utils.py
@@ -0,0 +1,49 @@
+# coding=utf-8
+# Copyright 2024 The HuggingFace Team Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a clone of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import gc
+import unittest
+
+import torch
+
+from diffusers import DiffusionPipeline
+from diffusers.utils.testing_utils import backend_empty_cache, require_torch_gpu, slow, torch_device
+
+
+@require_torch_gpu
+@slow
+class QuantCompileMiscTests(unittest.TestCase):
+    def setUp(self):
+        super().setUp()
+        gc.collect()
+        backend_empty_cache(torch_device)
+        torch.compiler.reset()
+
+    def tearDown(self):
+        super().tearDown()
+        gc.collect()
+        backend_empty_cache(torch_device)
+        torch.compiler.reset()
+
+    def test_torch_compile(self, quantization_config, torch_dtype=torch.bfloat16):
+        pipe = DiffusionPipeline.from_pretrained(
+            "stabilityai/stable-diffusion-3-medium-diffusers",
+            quantization_config=quantization_config,
+            torch_dtype=torch_dtype,
+        ).to("cuda")
+        pipe.transformer.compile(fullgraph=True)
+
+        for _ in range(2):
+            # small resolutions to ensure speedy execution.
+            pipe("a dog", num_inference_steps=4, max_sequence_length=16, height=256, width=256)