diff --git a/tests/pipelines/test_pipelines_common.py b/tests/pipelines/test_pipelines_common.py index db8209835be4..8a476e428784 100644 --- a/tests/pipelines/test_pipelines_common.py +++ b/tests/pipelines/test_pipelines_common.py @@ -14,6 +14,7 @@ import torch.nn as nn from huggingface_hub import ModelCard, delete_repo from huggingface_hub.utils import is_jinja_available +from parameterized import parameterized from transformers import CLIPTextConfig, CLIPTextModel, CLIPTokenizer import diffusers @@ -32,7 +33,6 @@ UNet2DConditionModel, apply_faster_cache, ) -from diffusers.hooks import apply_group_offloading from diffusers.hooks.faster_cache import FasterCacheBlockHook, FasterCacheDenoiserHook from diffusers.hooks.first_block_cache import FirstBlockCacheConfig from diffusers.hooks.pyramid_attention_broadcast import PyramidAttentionBroadcastHook @@ -2244,80 +2244,6 @@ def test_layerwise_casting_inference(self): inputs = self.get_dummy_inputs(torch_device) _ = pipe(**inputs)[0] - @require_torch_accelerator - def test_group_offloading_inference(self): - if not self.test_group_offloading: - return - - def create_pipe(): - torch.manual_seed(0) - components = self.get_dummy_components() - pipe = self.pipeline_class(**components) - pipe.set_progress_bar_config(disable=None) - return pipe - - def enable_group_offload_on_component(pipe, group_offloading_kwargs): - # We intentionally don't test VAE's here. This is because some tests enable tiling on the VAE. If - # tiling is enabled and a forward pass is run, when accelerator streams are used, the execution order of - # the layers is not traced correctly. This causes errors. For apply group offloading to VAE, a - # warmup forward pass (even with dummy small inputs) is recommended. - for component_name in [ - "text_encoder", - "text_encoder_2", - "text_encoder_3", - "transformer", - "unet", - "controlnet", - ]: - if not hasattr(pipe, component_name): - continue - component = getattr(pipe, component_name) - if not getattr(component, "_supports_group_offloading", True): - continue - if hasattr(component, "enable_group_offload"): - # For diffusers ModelMixin implementations - component.enable_group_offload(torch.device(torch_device), **group_offloading_kwargs) - else: - # For other models not part of diffusers - apply_group_offloading( - component, onload_device=torch.device(torch_device), **group_offloading_kwargs - ) - self.assertTrue( - all( - module._diffusers_hook.get_hook("group_offloading") is not None - for module in component.modules() - if hasattr(module, "_diffusers_hook") - ) - ) - for component_name in ["vae", "vqvae", "image_encoder"]: - component = getattr(pipe, component_name, None) - if isinstance(component, torch.nn.Module): - component.to(torch_device) - - def run_forward(pipe): - torch.manual_seed(0) - inputs = self.get_dummy_inputs(torch_device) - return pipe(**inputs)[0] - - pipe = create_pipe().to(torch_device) - output_without_group_offloading = run_forward(pipe) - - pipe = create_pipe() - enable_group_offload_on_component(pipe, {"offload_type": "block_level", "num_blocks_per_group": 1}) - output_with_group_offloading1 = run_forward(pipe) - - pipe = create_pipe() - enable_group_offload_on_component(pipe, {"offload_type": "leaf_level"}) - output_with_group_offloading2 = run_forward(pipe) - - if torch.is_tensor(output_without_group_offloading): - output_without_group_offloading = output_without_group_offloading.detach().cpu().numpy() - output_with_group_offloading1 = output_with_group_offloading1.detach().cpu().numpy() - output_with_group_offloading2 = output_with_group_offloading2.detach().cpu().numpy() - - self.assertTrue(np.allclose(output_without_group_offloading, output_with_group_offloading1, atol=1e-4)) - self.assertTrue(np.allclose(output_without_group_offloading, output_with_group_offloading2, atol=1e-4)) - def test_torch_dtype_dict(self): components = self.get_dummy_components() if not components: @@ -2364,7 +2290,7 @@ def test_pipeline_with_accelerator_device_map(self, expected_max_difference=1e-4 self.assertLess(max_diff, expected_max_difference) @require_torch_accelerator - def test_pipeline_level_group_offloading_sanity_checks(self): + def test_group_offloading_sanity_checks(self): components = self.get_dummy_components() pipe: DiffusionPipeline = self.pipeline_class(**components) @@ -2394,41 +2320,61 @@ def test_pipeline_level_group_offloading_sanity_checks(self): component_device = next(component.parameters())[0].device self.assertTrue(torch.device(component_device).type == torch.device(offload_device).type) + @parameterized.expand([("block_level"), ("leaf_level")]) @require_torch_accelerator - def test_pipeline_level_group_offloading_inference(self, expected_max_difference=1e-4): - components = self.get_dummy_components() - pipe: DiffusionPipeline = self.pipeline_class(**components) + def test_group_offloading_inference(self, offload_type: str = "block_level"): + if not self.test_group_offloading: + pytest.skip("`test_group_offloading` is disabled hence skipping.") - for name, component in pipe.components.items(): - if hasattr(component, "_supports_group_offloading"): - if not component._supports_group_offloading: - pytest.skip(f"{self.pipeline_class.__name__} is not suitable for this test.") + def create_pipe(): + torch.manual_seed(0) + components = self.get_dummy_components() + pipe = self.pipeline_class(**components) + pipe.set_progress_bar_config(disable=None) + return pipe - # Regular inference. - pipe = pipe.to(torch_device) - pipe.set_progress_bar_config(disable=None) - torch.manual_seed(0) - inputs = self.get_dummy_inputs(torch_device) - inputs["generator"] = torch.manual_seed(0) - out = pipe(**inputs)[0] + def enable_group_offload(pipe, group_offloading_kwargs): + # We intentionally don't test VAE's here. This is because some tests enable tiling on the VAE. If + # tiling is enabled and a forward pass is run, when accelerator streams are used, the execution order of + # the layers is not traced correctly. This causes errors. For apply group offloading to VAE, a + # warmup forward pass (even with dummy small inputs) is recommended. + exclude_modules = {"vae", "vqvae", "image_encoder"} + exclude_modules = list(exclude_modules & set(pipe.components.keys())) + pipe.enable_group_offload( + exclude_modules=exclude_modules, onload_device=torch_device, **group_offloading_kwargs + ) + for component_name, component in pipe.components.items(): + if component_name in exclude_modules: + continue + elif isinstance(component, torch.nn.Module): + assert all( + module._diffusers_hook.get_hook("group_offloading") is not None + for module in component.modules() + if hasattr(module, "_diffusers_hook") + ) - pipe.to("cpu") - del pipe + def run_forward(pipe): + torch.manual_seed(0) + inputs = self.get_dummy_inputs(torch_device) + return pipe(**inputs)[0] - # Inference with offloading - pipe: DiffusionPipeline = self.pipeline_class(**components) - offload_device = "cpu" - pipe.enable_group_offload( - onload_device=torch_device, - offload_device=offload_device, - offload_type="leaf_level", - ) - pipe.set_progress_bar_config(disable=None) - inputs["generator"] = torch.manual_seed(0) - out_offload = pipe(**inputs)[0] + pipe = create_pipe().to(torch_device) + output_without_group_offloading = run_forward(pipe) - max_diff = np.abs(to_np(out) - to_np(out_offload)).max() - self.assertLess(max_diff, expected_max_difference) + pipe = create_pipe() + if offload_type == "block_level": + offloading_kwargs = {"offload_type": "block_level", "num_blocks_per_group": 1} + else: + offloading_kwargs = {"offload_type": "leaf_level"} + enable_group_offload(pipe, offloading_kwargs) + + output_with_group_offloading = run_forward(pipe) + + if torch.is_tensor(output_without_group_offloading): + output_without_group_offloading = output_without_group_offloading.detach().cpu().numpy() + output_with_group_offloading = output_with_group_offloading.detach().cpu().numpy() + + assert np.allclose(output_without_group_offloading, output_with_group_offloading, atol=1e-4) @is_staging_test