diff --git a/src/diffusers/pipelines/pag/pipeline_pag_sana.py b/src/diffusers/pipelines/pag/pipeline_pag_sana.py index 416b2f7c60f2..d0bbb46b09e7 100644 --- a/src/diffusers/pipelines/pag/pipeline_pag_sana.py +++ b/src/diffusers/pipelines/pag/pipeline_pag_sana.py @@ -268,7 +268,8 @@ def encode_prompt( else: batch_size = prompt_embeds.shape[0] - self.tokenizer.padding_side = "right" + if getattr(self, "tokenizer", None) is not None: + self.tokenizer.padding_side = "right" # See Section 3.1. of the paper. max_length = max_sequence_length diff --git a/src/diffusers/pipelines/sana/pipeline_sana.py b/src/diffusers/pipelines/sana/pipeline_sana.py index cca4dfe5e8ba..11c63be52a87 100644 --- a/src/diffusers/pipelines/sana/pipeline_sana.py +++ b/src/diffusers/pipelines/sana/pipeline_sana.py @@ -312,7 +312,8 @@ def encode_prompt( else: batch_size = prompt_embeds.shape[0] - self.tokenizer.padding_side = "right" + if getattr(self, "tokenizer", None) is not None: + self.tokenizer.padding_side = "right" # See Section 3.1. of the paper. max_length = max_sequence_length diff --git a/src/diffusers/utils/source_code_parsing_utils.py b/src/diffusers/utils/source_code_parsing_utils.py new file mode 100644 index 000000000000..5f94711c21d8 --- /dev/null +++ b/src/diffusers/utils/source_code_parsing_utils.py @@ -0,0 +1,52 @@ +import ast +import importlib +import inspect +import textwrap + + +class ReturnNameVisitor(ast.NodeVisitor): + """Thanks to ChatGPT for pairing.""" + + def __init__(self): + self.return_names = [] + + def visit_Return(self, node): + # Check if the return value is a tuple. + if isinstance(node.value, ast.Tuple): + for elt in node.value.elts: + if isinstance(elt, ast.Name): + self.return_names.append(elt.id) + else: + try: + self.return_names.append(ast.unparse(elt)) + except Exception: + self.return_names.append(str(elt)) + else: + if isinstance(node.value, ast.Name): + self.return_names.append(node.value.id) + else: + try: + self.return_names.append(ast.unparse(node.value)) + except Exception: + self.return_names.append(str(node.value)) + self.generic_visit(node) + + def _determine_parent_module(self, cls): + from diffusers import DiffusionPipeline + from diffusers.models.modeling_utils import ModelMixin + + if issubclass(cls, DiffusionPipeline): + return "pipelines" + elif issubclass(cls, ModelMixin): + return "models" + else: + raise NotImplementedError + + def get_ast_tree(self, cls, attribute_name="encode_prompt"): + parent_module_name = self._determine_parent_module(cls) + main_module = importlib.import_module(f"diffusers.{parent_module_name}") + current_cls_module = getattr(main_module, cls.__name__) + source_code = inspect.getsource(getattr(current_cls_module, attribute_name)) + source_code = textwrap.dedent(source_code) + tree = ast.parse(source_code) + return tree diff --git a/tests/pipelines/animatediff/test_animatediff.py b/tests/pipelines/animatediff/test_animatediff.py index 4913a46b8d4f..4088d46df5b2 100644 --- a/tests/pipelines/animatediff/test_animatediff.py +++ b/tests/pipelines/animatediff/test_animatediff.py @@ -548,6 +548,14 @@ def test_xformers_attention_forwardGenerator_pass(self): def test_vae_slicing(self): return super().test_vae_slicing(image_count=2) + def test_encode_prompt_works_in_isolation(self): + extra_required_param_value_dict = { + "device": torch.device(torch_device).type, + "num_images_per_prompt": 1, + "do_classifier_free_guidance": self.get_dummy_inputs(device=torch_device).get("guidance_scale", 1.0) > 1.0, + } + return super().test_encode_prompt_works_in_isolation(extra_required_param_value_dict) + @slow @require_torch_accelerator diff --git a/tests/pipelines/animatediff/test_animatediff_controlnet.py b/tests/pipelines/animatediff/test_animatediff_controlnet.py index 6fcf6fe44fb7..7bde663b111e 100644 --- a/tests/pipelines/animatediff/test_animatediff_controlnet.py +++ b/tests/pipelines/animatediff/test_animatediff_controlnet.py @@ -517,3 +517,11 @@ def test_vae_slicing(self, video_count=2): output_2 = pipe(**inputs) assert np.abs(output_2[0].flatten() - output_1[0].flatten()).max() < 1e-2 + + def test_encode_prompt_works_in_isolation(self): + extra_required_param_value_dict = { + "device": torch.device(torch_device).type, + "num_images_per_prompt": 1, + "do_classifier_free_guidance": self.get_dummy_inputs(device=torch_device).get("guidance_scale", 1.0) > 1.0, + } + return super().test_encode_prompt_works_in_isolation(extra_required_param_value_dict) diff --git a/tests/pipelines/animatediff/test_animatediff_sdxl.py b/tests/pipelines/animatediff/test_animatediff_sdxl.py index 45fa6bfc5c6d..f9686ec005f7 100644 --- a/tests/pipelines/animatediff/test_animatediff_sdxl.py +++ b/tests/pipelines/animatediff/test_animatediff_sdxl.py @@ -21,7 +21,6 @@ IPAdapterTesterMixin, PipelineTesterMixin, SDFunctionTesterMixin, - SDXLOptionalComponentsTesterMixin, ) @@ -36,7 +35,6 @@ class AnimateDiffPipelineSDXLFastTests( IPAdapterTesterMixin, SDFunctionTesterMixin, PipelineTesterMixin, - SDXLOptionalComponentsTesterMixin, unittest.TestCase, ): pipeline_class = AnimateDiffSDXLPipeline @@ -250,33 +248,6 @@ def test_to_dtype(self): model_dtypes = [component.dtype for component in pipe.components.values() if hasattr(component, "dtype")] self.assertTrue(all(dtype == torch.float16 for dtype in model_dtypes)) - def test_prompt_embeds(self): - components = self.get_dummy_components() - pipe = self.pipeline_class(**components) - pipe.set_progress_bar_config(disable=None) - pipe.to(torch_device) - - inputs = self.get_dummy_inputs(torch_device) - prompt = inputs.pop("prompt") - - ( - prompt_embeds, - negative_prompt_embeds, - pooled_prompt_embeds, - negative_pooled_prompt_embeds, - ) = pipe.encode_prompt(prompt) - - pipe( - **inputs, - prompt_embeds=prompt_embeds, - negative_prompt_embeds=negative_prompt_embeds, - pooled_prompt_embeds=pooled_prompt_embeds, - negative_pooled_prompt_embeds=negative_pooled_prompt_embeds, - ) - - def test_save_load_optional_components(self): - self._test_save_load_optional_components() - @unittest.skipIf( torch_device != "cuda" or not is_xformers_available(), reason="XFormers attention is only available with CUDA and `xformers` installed", @@ -305,3 +276,11 @@ def test_xformers_attention_forwardGenerator_pass(self): max_diff = np.abs(to_np(output_with_offload) - to_np(output_without_offload)).max() self.assertLess(max_diff, 1e-4, "XFormers attention should not affect the inference results") + + @unittest.skip("Test currently not supported.") + def test_encode_prompt_works_in_isolation(self): + pass + + @unittest.skip("Functionality is tested elsewhere.") + def test_save_load_optional_components(self): + pass diff --git a/tests/pipelines/animatediff/test_animatediff_sparsectrl.py b/tests/pipelines/animatediff/test_animatediff_sparsectrl.py index 21b59d0252b2..3e33326c8a87 100644 --- a/tests/pipelines/animatediff/test_animatediff_sparsectrl.py +++ b/tests/pipelines/animatediff/test_animatediff_sparsectrl.py @@ -484,3 +484,11 @@ def test_free_init_with_schedulers(self): def test_vae_slicing(self): return super().test_vae_slicing(image_count=2) + + def test_encode_prompt_works_in_isolation(self): + extra_required_param_value_dict = { + "device": torch.device(torch_device).type, + "num_images_per_prompt": 1, + "do_classifier_free_guidance": self.get_dummy_inputs(device=torch_device).get("guidance_scale", 1.0) > 1.0, + } + return super().test_encode_prompt_works_in_isolation(extra_required_param_value_dict) diff --git a/tests/pipelines/animatediff/test_animatediff_video2video.py b/tests/pipelines/animatediff/test_animatediff_video2video.py index bb1cb9882c69..bc771e148eb2 100644 --- a/tests/pipelines/animatediff/test_animatediff_video2video.py +++ b/tests/pipelines/animatediff/test_animatediff_video2video.py @@ -544,3 +544,11 @@ def test_free_noise_multi_prompt(self): inputs["strength"] = 0.5 inputs["prompt"] = {0: "Caterpillar on a leaf", 10: "Butterfly on a leaf", 42: "Error on a leaf"} pipe(**inputs).frames[0] + + def test_encode_prompt_works_in_isolation(self): + extra_required_param_value_dict = { + "device": torch.device(torch_device).type, + "num_images_per_prompt": 1, + "do_classifier_free_guidance": self.get_dummy_inputs(device=torch_device).get("guidance_scale", 1.0) > 1.0, + } + return super().test_encode_prompt_works_in_isolation(extra_required_param_value_dict) diff --git a/tests/pipelines/animatediff/test_animatediff_video2video_controlnet.py b/tests/pipelines/animatediff/test_animatediff_video2video_controlnet.py index 5a4b507aff50..3babbbe4ba11 100644 --- a/tests/pipelines/animatediff/test_animatediff_video2video_controlnet.py +++ b/tests/pipelines/animatediff/test_animatediff_video2video_controlnet.py @@ -533,3 +533,11 @@ def test_free_noise_multi_prompt(self): inputs["strength"] = 0.5 inputs["prompt"] = {0: "Caterpillar on a leaf", 10: "Butterfly on a leaf", 42: "Error on a leaf"} pipe(**inputs).frames[0] + + def test_encode_prompt_works_in_isolation(self): + extra_required_param_value_dict = { + "device": torch.device(torch_device).type, + "num_images_per_prompt": 1, + "do_classifier_free_guidance": self.get_dummy_inputs(device=torch_device).get("guidance_scale", 1.0) > 1.0, + } + return super().test_encode_prompt_works_in_isolation(extra_required_param_value_dict) diff --git a/tests/pipelines/audioldm2/test_audioldm2.py b/tests/pipelines/audioldm2/test_audioldm2.py index 95aaa370ef8b..66052392f07f 100644 --- a/tests/pipelines/audioldm2/test_audioldm2.py +++ b/tests/pipelines/audioldm2/test_audioldm2.py @@ -508,9 +508,14 @@ def test_to_dtype(self): model_dtypes = {key: component.dtype for key, component in components.items() if hasattr(component, "dtype")} self.assertTrue(all(dtype == torch.float16 for dtype in model_dtypes.values())) + @unittest.skip("Test not supported.") def test_sequential_cpu_offload_forward_pass(self): pass + @unittest.skip("Test not supported for now because of the use of `projection_model` in `encode_prompt()`.") + def test_encode_prompt_works_in_isolation(self): + pass + @nightly class AudioLDM2PipelineSlowTests(unittest.TestCase): diff --git a/tests/pipelines/aura_flow/test_pipeline_aura_flow.py b/tests/pipelines/aura_flow/test_pipeline_aura_flow.py index f0b67afcc052..c56aeb905ac3 100644 --- a/tests/pipelines/aura_flow/test_pipeline_aura_flow.py +++ b/tests/pipelines/aura_flow/test_pipeline_aura_flow.py @@ -5,9 +5,6 @@ from transformers import AutoTokenizer, UMT5EncoderModel from diffusers import AuraFlowPipeline, AuraFlowTransformer2DModel, AutoencoderKL, FlowMatchEulerDiscreteScheduler -from diffusers.utils.testing_utils import ( - torch_device, -) from ..test_pipelines_common import ( PipelineTesterMixin, @@ -90,37 +87,6 @@ def get_dummy_inputs(self, device, seed=0): } return inputs - def test_aura_flow_prompt_embeds(self): - pipe = self.pipeline_class(**self.get_dummy_components()).to(torch_device) - inputs = self.get_dummy_inputs(torch_device) - - output_with_prompt = pipe(**inputs).images[0] - - inputs = self.get_dummy_inputs(torch_device) - prompt = inputs.pop("prompt") - - do_classifier_free_guidance = inputs["guidance_scale"] > 1 - ( - prompt_embeds, - prompt_attention_mask, - negative_prompt_embeds, - negative_prompt_attention_mask, - ) = pipe.encode_prompt( - prompt, - do_classifier_free_guidance=do_classifier_free_guidance, - device=torch_device, - ) - output_with_embeds = pipe( - prompt_embeds=prompt_embeds, - prompt_attention_mask=prompt_attention_mask, - negative_prompt_embeds=negative_prompt_embeds, - negative_prompt_attention_mask=negative_prompt_attention_mask, - **inputs, - ).images[0] - - max_diff = np.abs(output_with_prompt - output_with_embeds).max() - assert max_diff < 1e-4 - def test_attention_slicing_forward_pass(self): # Attention slicing needs to implemented differently for this because how single DiT and MMDiT # blocks interfere with each other. diff --git a/tests/pipelines/blipdiffusion/test_blipdiffusion.py b/tests/pipelines/blipdiffusion/test_blipdiffusion.py index 6d422745ce5a..e073f55aec9e 100644 --- a/tests/pipelines/blipdiffusion/test_blipdiffusion.py +++ b/tests/pipelines/blipdiffusion/test_blipdiffusion.py @@ -198,3 +198,7 @@ def test_blipdiffusion(self): assert ( np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 ), f" expected_slice {image_slice.flatten()}, but got {image_slice.flatten()}" + + @unittest.skip("Test not supported because of complexities in deriving query_embeds.") + def test_encode_prompt_works_in_isolation(self): + pass diff --git a/tests/pipelines/cogview3/test_cogview3plus.py b/tests/pipelines/cogview3/test_cogview3plus.py index 4619de81d535..79dffd230a75 100644 --- a/tests/pipelines/cogview3/test_cogview3plus.py +++ b/tests/pipelines/cogview3/test_cogview3plus.py @@ -232,6 +232,9 @@ def test_attention_slicing_forward_pass( "Attention slicing should not affect the inference results", ) + def test_encode_prompt_works_in_isolation(self): + return super().test_encode_prompt_works_in_isolation(atol=1e-3, rtol=1e-3) + @slow @require_torch_accelerator diff --git a/tests/pipelines/controlnet/test_controlnet.py b/tests/pipelines/controlnet/test_controlnet.py index e2c0c60ddfa4..157eefd3154b 100644 --- a/tests/pipelines/controlnet/test_controlnet.py +++ b/tests/pipelines/controlnet/test_controlnet.py @@ -288,6 +288,13 @@ def test_controlnet_lcm_custom_timesteps(self): assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 + def test_encode_prompt_works_in_isolation(self): + extra_required_param_value_dict = { + "device": torch.device(torch_device).type, + "do_classifier_free_guidance": self.get_dummy_inputs(device=torch_device).get("guidance_scale", 1.0) > 1.0, + } + return super().test_encode_prompt_works_in_isolation(extra_required_param_value_dict) + class StableDiffusionMultiControlNetPipelineFastTests( IPAdapterTesterMixin, PipelineTesterMixin, PipelineKarrasSchedulerTesterMixin, unittest.TestCase @@ -522,6 +529,13 @@ def test_inference_multiple_prompt_input(self): assert image.shape == (4, 64, 64, 3) + def test_encode_prompt_works_in_isolation(self): + extra_required_param_value_dict = { + "device": torch.device(torch_device).type, + "do_classifier_free_guidance": self.get_dummy_inputs(device=torch_device).get("guidance_scale", 1.0) > 1.0, + } + return super().test_encode_prompt_works_in_isolation(extra_required_param_value_dict) + class StableDiffusionMultiControlNetOneModelPipelineFastTests( IPAdapterTesterMixin, PipelineTesterMixin, PipelineKarrasSchedulerTesterMixin, unittest.TestCase @@ -707,6 +721,13 @@ def test_save_pretrained_raise_not_implemented_exception(self): except NotImplementedError: pass + def test_encode_prompt_works_in_isolation(self): + extra_required_param_value_dict = { + "device": torch.device(torch_device).type, + "do_classifier_free_guidance": self.get_dummy_inputs(device=torch_device).get("guidance_scale", 1.0) > 1.0, + } + return super().test_encode_prompt_works_in_isolation(extra_required_param_value_dict) + @slow @require_torch_accelerator diff --git a/tests/pipelines/controlnet/test_controlnet_blip_diffusion.py b/tests/pipelines/controlnet/test_controlnet_blip_diffusion.py index b4d3e3aaa8ed..eedda4e21722 100644 --- a/tests/pipelines/controlnet/test_controlnet_blip_diffusion.py +++ b/tests/pipelines/controlnet/test_controlnet_blip_diffusion.py @@ -222,3 +222,7 @@ def test_blipdiffusion_controlnet(self): assert ( np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 ), f" expected_slice {expected_slice}, but got {image_slice.flatten()}" + + @unittest.skip("Test not supported because of complexities in deriving query_embeds.") + def test_encode_prompt_works_in_isolation(self): + pass diff --git a/tests/pipelines/controlnet/test_controlnet_img2img.py b/tests/pipelines/controlnet/test_controlnet_img2img.py index 6bcf6532fa90..100765ee34cb 100644 --- a/tests/pipelines/controlnet/test_controlnet_img2img.py +++ b/tests/pipelines/controlnet/test_controlnet_img2img.py @@ -189,6 +189,13 @@ def test_xformers_attention_forwardGenerator_pass(self): def test_inference_batch_single_identical(self): self._test_inference_batch_single_identical(expected_max_diff=2e-3) + def test_encode_prompt_works_in_isolation(self): + extra_required_param_value_dict = { + "device": torch.device(torch_device).type, + "do_classifier_free_guidance": self.get_dummy_inputs(device=torch_device).get("guidance_scale", 1.0) > 1.0, + } + return super().test_encode_prompt_works_in_isolation(extra_required_param_value_dict) + class StableDiffusionMultiControlNetPipelineFastTests( IPAdapterTesterMixin, PipelineTesterMixin, PipelineKarrasSchedulerTesterMixin, unittest.TestCase @@ -391,6 +398,13 @@ def test_save_pretrained_raise_not_implemented_exception(self): except NotImplementedError: pass + def test_encode_prompt_works_in_isolation(self): + extra_required_param_value_dict = { + "device": torch.device(torch_device).type, + "do_classifier_free_guidance": self.get_dummy_inputs(device=torch_device).get("guidance_scale", 1.0) > 1.0, + } + return super().test_encode_prompt_works_in_isolation(extra_required_param_value_dict) + @slow @require_torch_accelerator diff --git a/tests/pipelines/controlnet/test_controlnet_inpaint.py b/tests/pipelines/controlnet/test_controlnet_inpaint.py index 95f6814ac92a..b06590e13cb6 100644 --- a/tests/pipelines/controlnet/test_controlnet_inpaint.py +++ b/tests/pipelines/controlnet/test_controlnet_inpaint.py @@ -176,6 +176,13 @@ def test_xformers_attention_forwardGenerator_pass(self): def test_inference_batch_single_identical(self): self._test_inference_batch_single_identical(expected_max_diff=2e-3) + def test_encode_prompt_works_in_isolation(self): + extra_required_param_value_dict = { + "device": torch.device(torch_device).type, + "do_classifier_free_guidance": self.get_dummy_inputs(device=torch_device).get("guidance_scale", 1.0) > 1.0, + } + return super().test_encode_prompt_works_in_isolation(extra_required_param_value_dict) + class ControlNetSimpleInpaintPipelineFastTests(ControlNetInpaintPipelineFastTests): pipeline_class = StableDiffusionControlNetInpaintPipeline @@ -443,6 +450,13 @@ def test_save_pretrained_raise_not_implemented_exception(self): except NotImplementedError: pass + def test_encode_prompt_works_in_isolation(self): + extra_required_param_value_dict = { + "device": torch.device(torch_device).type, + "do_classifier_free_guidance": self.get_dummy_inputs(device=torch_device).get("guidance_scale", 1.0) > 1.0, + } + return super().test_encode_prompt_works_in_isolation(extra_required_param_value_dict) + @slow @require_torch_accelerator diff --git a/tests/pipelines/controlnet/test_controlnet_sdxl.py b/tests/pipelines/controlnet/test_controlnet_sdxl.py index dda6339427f8..1e540738b60e 100644 --- a/tests/pipelines/controlnet/test_controlnet_sdxl.py +++ b/tests/pipelines/controlnet/test_controlnet_sdxl.py @@ -55,7 +55,6 @@ PipelineKarrasSchedulerTesterMixin, PipelineLatentTesterMixin, PipelineTesterMixin, - SDXLOptionalComponentsTesterMixin, ) @@ -67,7 +66,6 @@ class StableDiffusionXLControlNetPipelineFastTests( PipelineLatentTesterMixin, PipelineKarrasSchedulerTesterMixin, PipelineTesterMixin, - SDXLOptionalComponentsTesterMixin, unittest.TestCase, ): pipeline_class = StableDiffusionXLControlNetPipeline @@ -212,8 +210,9 @@ def test_xformers_attention_forwardGenerator_pass(self): def test_inference_batch_single_identical(self): self._test_inference_batch_single_identical(expected_max_diff=2e-3) + @unittest.skip("We test this functionality elsewhere already.") def test_save_load_optional_components(self): - self._test_save_load_optional_components() + pass @require_torch_accelerator def test_stable_diffusion_xl_offloads(self): @@ -297,45 +296,6 @@ def test_stable_diffusion_xl_multi_prompts(self): # ensure the results are not equal assert np.abs(image_slice_1.flatten() - image_slice_3.flatten()).max() > 1e-4 - # Copied from test_stable_diffusion_xl.py - def test_stable_diffusion_xl_prompt_embeds(self): - components = self.get_dummy_components() - sd_pipe = self.pipeline_class(**components) - sd_pipe = sd_pipe.to(torch_device) - sd_pipe = sd_pipe.to(torch_device) - sd_pipe.set_progress_bar_config(disable=None) - - # forward without prompt embeds - inputs = self.get_dummy_inputs(torch_device) - inputs["prompt"] = 2 * [inputs["prompt"]] - inputs["num_images_per_prompt"] = 2 - - output = sd_pipe(**inputs) - image_slice_1 = output.images[0, -3:, -3:, -1] - - # forward with prompt embeds - inputs = self.get_dummy_inputs(torch_device) - prompt = 2 * [inputs.pop("prompt")] - - ( - prompt_embeds, - negative_prompt_embeds, - pooled_prompt_embeds, - negative_pooled_prompt_embeds, - ) = sd_pipe.encode_prompt(prompt) - - output = sd_pipe( - **inputs, - prompt_embeds=prompt_embeds, - negative_prompt_embeds=negative_prompt_embeds, - pooled_prompt_embeds=pooled_prompt_embeds, - negative_pooled_prompt_embeds=negative_pooled_prompt_embeds, - ) - image_slice_2 = output.images[0, -3:, -3:, -1] - - # make sure that it's equal - assert np.abs(image_slice_1.flatten() - image_slice_2.flatten()).max() < 1e-4 - def test_controlnet_sdxl_guess(self): device = "cpu" @@ -483,7 +443,7 @@ def new_step(self, *args, **kwargs): class StableDiffusionXLMultiControlNetPipelineFastTests( - PipelineTesterMixin, PipelineKarrasSchedulerTesterMixin, SDXLOptionalComponentsTesterMixin, unittest.TestCase + PipelineTesterMixin, PipelineKarrasSchedulerTesterMixin, unittest.TestCase ): pipeline_class = StableDiffusionXLControlNetPipeline params = TEXT_TO_IMAGE_PARAMS @@ -685,12 +645,13 @@ def test_xformers_attention_forwardGenerator_pass(self): def test_inference_batch_single_identical(self): self._test_inference_batch_single_identical(expected_max_diff=2e-3) + @unittest.skip("We test this functionality elsewhere already.") def test_save_load_optional_components(self): - return self._test_save_load_optional_components() + pass class StableDiffusionXLMultiControlNetOneModelPipelineFastTests( - PipelineKarrasSchedulerTesterMixin, PipelineTesterMixin, SDXLOptionalComponentsTesterMixin, unittest.TestCase + PipelineKarrasSchedulerTesterMixin, PipelineTesterMixin, unittest.TestCase ): pipeline_class = StableDiffusionXLControlNetPipeline params = TEXT_TO_IMAGE_PARAMS @@ -862,6 +823,10 @@ def test_control_guidance_switch(self): def test_attention_slicing_forward_pass(self): return self._test_attention_slicing_forward_pass(expected_max_diff=2e-3) + @unittest.skip("We test this functionality elsewhere already.") + def test_save_load_optional_components(self): + pass + @unittest.skipIf( torch_device != "cuda" or not is_xformers_available(), reason="XFormers attention is only available with CUDA and `xformers` installed", @@ -872,9 +837,6 @@ def test_xformers_attention_forwardGenerator_pass(self): def test_inference_batch_single_identical(self): self._test_inference_batch_single_identical(expected_max_diff=2e-3) - def test_save_load_optional_components(self): - self._test_save_load_optional_components() - def test_negative_conditions(self): components = self.get_dummy_components() pipe = self.pipeline_class(**components) diff --git a/tests/pipelines/controlnet/test_controlnet_sdxl_img2img.py b/tests/pipelines/controlnet/test_controlnet_sdxl_img2img.py index 88708b5cd1ab..bf5da16fcbb8 100644 --- a/tests/pipelines/controlnet/test_controlnet_sdxl_img2img.py +++ b/tests/pipelines/controlnet/test_controlnet_sdxl_img2img.py @@ -327,42 +327,3 @@ def test_stable_diffusion_xl_multi_prompts(self): # ensure the results are not equal assert np.abs(image_slice_1.flatten() - image_slice_3.flatten()).max() > 1e-4 - - # Copied from test_stable_diffusion_xl.py - def test_stable_diffusion_xl_prompt_embeds(self): - components = self.get_dummy_components() - sd_pipe = self.pipeline_class(**components) - sd_pipe = sd_pipe.to(torch_device) - sd_pipe = sd_pipe.to(torch_device) - sd_pipe.set_progress_bar_config(disable=None) - - # forward without prompt embeds - inputs = self.get_dummy_inputs(torch_device) - inputs["prompt"] = 2 * [inputs["prompt"]] - inputs["num_images_per_prompt"] = 2 - - output = sd_pipe(**inputs) - image_slice_1 = output.images[0, -3:, -3:, -1] - - # forward with prompt embeds - inputs = self.get_dummy_inputs(torch_device) - prompt = 2 * [inputs.pop("prompt")] - - ( - prompt_embeds, - negative_prompt_embeds, - pooled_prompt_embeds, - negative_pooled_prompt_embeds, - ) = sd_pipe.encode_prompt(prompt) - - output = sd_pipe( - **inputs, - prompt_embeds=prompt_embeds, - negative_prompt_embeds=negative_prompt_embeds, - pooled_prompt_embeds=pooled_prompt_embeds, - negative_pooled_prompt_embeds=negative_pooled_prompt_embeds, - ) - image_slice_2 = output.images[0, -3:, -3:, -1] - - # make sure that it's equal - assert np.abs(image_slice_1.flatten() - image_slice_2.flatten()).max() < 1e-4 diff --git a/tests/pipelines/controlnet_hunyuandit/test_controlnet_hunyuandit.py b/tests/pipelines/controlnet_hunyuandit/test_controlnet_hunyuandit.py index 5c6054ccb605..10be77e3bab4 100644 --- a/tests/pipelines/controlnet_hunyuandit/test_controlnet_hunyuandit.py +++ b/tests/pipelines/controlnet_hunyuandit/test_controlnet_hunyuandit.py @@ -178,6 +178,12 @@ def test_save_load_optional_components(self): # TODO(YiYi) need to fix later pass + @unittest.skip( + "Test not supported as `encode_prompt` is called two times separately which deivates from about 99% of the pipelines we have." + ) + def test_encode_prompt_works_in_isolation(self): + pass + @slow @require_torch_accelerator diff --git a/tests/pipelines/controlnet_xs/test_controlnetxs.py b/tests/pipelines/controlnet_xs/test_controlnetxs.py index 1da5b52bd050..74af4b6775cc 100644 --- a/tests/pipelines/controlnet_xs/test_controlnetxs.py +++ b/tests/pipelines/controlnet_xs/test_controlnetxs.py @@ -335,6 +335,13 @@ def test_to_device(self): output_device = pipe(**self.get_dummy_inputs(torch_device))[0] self.assertTrue(np.isnan(to_np(output_device)).sum() == 0) + def test_encode_prompt_works_in_isolation(self): + extra_required_param_value_dict = { + "device": torch.device(torch_device).type, + "do_classifier_free_guidance": self.get_dummy_inputs(device=torch_device).get("guidance_scale", 1.0) > 1.0, + } + return super().test_encode_prompt_works_in_isolation(extra_required_param_value_dict) + @slow @require_torch_accelerator diff --git a/tests/pipelines/controlnet_xs/test_controlnetxs_sdxl.py b/tests/pipelines/controlnet_xs/test_controlnetxs_sdxl.py index 644bb669d8e8..24a8b9cd5739 100644 --- a/tests/pipelines/controlnet_xs/test_controlnetxs_sdxl.py +++ b/tests/pipelines/controlnet_xs/test_controlnetxs_sdxl.py @@ -57,7 +57,6 @@ PipelineKarrasSchedulerTesterMixin, PipelineLatentTesterMixin, PipelineTesterMixin, - SDXLOptionalComponentsTesterMixin, ) @@ -68,7 +67,6 @@ class StableDiffusionXLControlNetXSPipelineFastTests( PipelineLatentTesterMixin, PipelineKarrasSchedulerTesterMixin, PipelineTesterMixin, - SDXLOptionalComponentsTesterMixin, unittest.TestCase, ): pipeline_class = StableDiffusionXLControlNetXSPipeline @@ -201,6 +199,10 @@ def test_xformers_attention_forwardGenerator_pass(self): def test_inference_batch_single_identical(self): self._test_inference_batch_single_identical(expected_max_diff=2e-3) + @unittest.skip("We test this functionality elsewhere already.") + def test_save_load_optional_components(self): + pass + @require_torch_accelerator # Copied from test_controlnet_sdxl.py def test_stable_diffusion_xl_offloads(self): @@ -285,49 +287,6 @@ def test_stable_diffusion_xl_multi_prompts(self): # ensure the results are not equal assert np.abs(image_slice_1.flatten() - image_slice_3.flatten()).max() > 1e-4 - # Copied from test_stable_diffusion_xl.py - def test_stable_diffusion_xl_prompt_embeds(self): - components = self.get_dummy_components() - sd_pipe = self.pipeline_class(**components) - sd_pipe = sd_pipe.to(torch_device) - sd_pipe = sd_pipe.to(torch_device) - sd_pipe.set_progress_bar_config(disable=None) - - # forward without prompt embeds - inputs = self.get_dummy_inputs(torch_device) - inputs["prompt"] = 2 * [inputs["prompt"]] - inputs["num_images_per_prompt"] = 2 - - output = sd_pipe(**inputs) - image_slice_1 = output.images[0, -3:, -3:, -1] - - # forward with prompt embeds - inputs = self.get_dummy_inputs(torch_device) - prompt = 2 * [inputs.pop("prompt")] - - ( - prompt_embeds, - negative_prompt_embeds, - pooled_prompt_embeds, - negative_pooled_prompt_embeds, - ) = sd_pipe.encode_prompt(prompt) - - output = sd_pipe( - **inputs, - prompt_embeds=prompt_embeds, - negative_prompt_embeds=negative_prompt_embeds, - pooled_prompt_embeds=pooled_prompt_embeds, - negative_pooled_prompt_embeds=negative_pooled_prompt_embeds, - ) - image_slice_2 = output.images[0, -3:, -3:, -1] - - # make sure that it's equal - assert np.abs(image_slice_1.flatten() - image_slice_2.flatten()).max() < 1.1e-4 - - # Copied from test_stable_diffusion_xl.py - def test_save_load_optional_components(self): - self._test_save_load_optional_components() - # Copied from test_controlnetxs.py def test_to_dtype(self): components = self.get_dummy_components() diff --git a/tests/pipelines/deepfloyd_if/test_if.py b/tests/pipelines/deepfloyd_if/test_if.py index 43ba7bf643b1..295b29f12e8c 100644 --- a/tests/pipelines/deepfloyd_if/test_if.py +++ b/tests/pipelines/deepfloyd_if/test_if.py @@ -67,9 +67,6 @@ def get_dummy_inputs(self, device, seed=0): return inputs - def test_save_load_optional_components(self): - self._test_save_load_optional_components() - @unittest.skipIf(torch_device not in ["cuda", "xpu"], reason="float16 requires CUDA or XPU") @require_accelerator def test_save_load_float16(self): @@ -99,6 +96,10 @@ def test_xformers_attention_forwardGenerator_pass(self): def test_save_load_dduf(self): super().test_save_load_dduf(atol=1e-2, rtol=1e-2) + @unittest.skip("Functionality is tested elsewhere.") + def test_save_load_optional_components(self): + pass + @slow @require_torch_accelerator diff --git a/tests/pipelines/deepfloyd_if/test_if_img2img.py b/tests/pipelines/deepfloyd_if/test_if_img2img.py index 47d7386be9ed..da06dc355896 100644 --- a/tests/pipelines/deepfloyd_if/test_if_img2img.py +++ b/tests/pipelines/deepfloyd_if/test_if_img2img.py @@ -73,9 +73,6 @@ def get_dummy_inputs(self, device, seed=0): return inputs - def test_save_load_optional_components(self): - self._test_save_load_optional_components() - @unittest.skipIf( torch_device != "cuda" or not is_xformers_available(), reason="XFormers attention is only available with CUDA and `xformers` installed", @@ -110,6 +107,10 @@ def test_inference_batch_single_identical(self): def test_save_load_dduf(self): super().test_save_load_dduf(atol=1e-2, rtol=1e-2) + @unittest.skip("Functionality is tested elsewhere.") + def test_save_load_optional_components(self): + pass + @slow @require_torch_accelerator diff --git a/tests/pipelines/deepfloyd_if/test_if_img2img_superresolution.py b/tests/pipelines/deepfloyd_if/test_if_img2img_superresolution.py index 96456506c037..77f2f9c7bb64 100644 --- a/tests/pipelines/deepfloyd_if/test_if_img2img_superresolution.py +++ b/tests/pipelines/deepfloyd_if/test_if_img2img_superresolution.py @@ -83,9 +83,6 @@ def get_dummy_inputs(self, device, seed=0): def test_xformers_attention_forwardGenerator_pass(self): self._test_xformers_attention_forwardGenerator_pass(expected_max_diff=1e-3) - def test_save_load_optional_components(self): - self._test_save_load_optional_components() - @unittest.skipIf(torch_device not in ["cuda", "xpu"], reason="float16 requires CUDA or XPU") @require_accelerator def test_save_load_float16(self): @@ -108,6 +105,10 @@ def test_inference_batch_single_identical(self): def test_save_load_dduf(self): super().test_save_load_dduf(atol=1e-2, rtol=1e-2) + @unittest.skip("Functionality is tested elsewhere.") + def test_save_load_optional_components(self): + pass + @slow @require_torch_accelerator diff --git a/tests/pipelines/deepfloyd_if/test_if_inpainting.py b/tests/pipelines/deepfloyd_if/test_if_inpainting.py index 412fbd3d37a9..a62d95725774 100644 --- a/tests/pipelines/deepfloyd_if/test_if_inpainting.py +++ b/tests/pipelines/deepfloyd_if/test_if_inpainting.py @@ -83,9 +83,6 @@ def get_dummy_inputs(self, device, seed=0): def test_xformers_attention_forwardGenerator_pass(self): self._test_xformers_attention_forwardGenerator_pass(expected_max_diff=1e-3) - def test_save_load_optional_components(self): - self._test_save_load_optional_components() - @unittest.skipIf(torch_device not in ["cuda", "xpu"], reason="float16 requires CUDA or XPU") @require_accelerator def test_save_load_float16(self): @@ -108,6 +105,10 @@ def test_inference_batch_single_identical(self): def test_save_load_dduf(self): super().test_save_load_dduf(atol=1e-2, rtol=1e-2) + @unittest.skip("Test done elsewhere.") + def test_save_load_optional_components(self, expected_max_difference=0.0001): + pass + @slow @require_torch_accelerator diff --git a/tests/pipelines/deepfloyd_if/test_if_inpainting_superresolution.py b/tests/pipelines/deepfloyd_if/test_if_inpainting_superresolution.py index 2ecf9fba8165..f98284bef646 100644 --- a/tests/pipelines/deepfloyd_if/test_if_inpainting_superresolution.py +++ b/tests/pipelines/deepfloyd_if/test_if_inpainting_superresolution.py @@ -85,9 +85,6 @@ def get_dummy_inputs(self, device, seed=0): def test_xformers_attention_forwardGenerator_pass(self): self._test_xformers_attention_forwardGenerator_pass(expected_max_diff=1e-3) - def test_save_load_optional_components(self): - self._test_save_load_optional_components() - @unittest.skipIf(torch_device not in ["cuda", "xpu"], reason="float16 requires CUDA or XPU") @require_accelerator def test_save_load_float16(self): @@ -110,6 +107,10 @@ def test_inference_batch_single_identical(self): def test_save_load_dduf(self): super().test_save_load_dduf(atol=1e-2, rtol=1e-2) + @unittest.skip("Test done elsewhere.") + def test_save_load_optional_components(self, expected_max_difference=0.0001): + pass + @slow @require_torch_accelerator diff --git a/tests/pipelines/deepfloyd_if/test_if_superresolution.py b/tests/pipelines/deepfloyd_if/test_if_superresolution.py index 9d37efa3bde4..435b0cc6ec07 100644 --- a/tests/pipelines/deepfloyd_if/test_if_superresolution.py +++ b/tests/pipelines/deepfloyd_if/test_if_superresolution.py @@ -78,9 +78,6 @@ def get_dummy_inputs(self, device, seed=0): def test_xformers_attention_forwardGenerator_pass(self): self._test_xformers_attention_forwardGenerator_pass(expected_max_diff=1e-3) - def test_save_load_optional_components(self): - self._test_save_load_optional_components() - @unittest.skipIf(torch_device not in ["cuda", "xpu"], reason="float16 requires CUDA or XPU") @require_accelerator def test_save_load_float16(self): @@ -103,6 +100,10 @@ def test_inference_batch_single_identical(self): def test_save_load_dduf(self): super().test_save_load_dduf(atol=1e-2, rtol=1e-2) + @unittest.skip("Test done elsewhere.") + def test_save_load_optional_components(self, expected_max_difference=0.0001): + pass + @slow @require_torch_accelerator diff --git a/tests/pipelines/hunyuan_dit/test_hunyuan_dit.py b/tests/pipelines/hunyuan_dit/test_hunyuan_dit.py index 6c9117a55c36..18c41c1ae881 100644 --- a/tests/pipelines/hunyuan_dit/test_hunyuan_dit.py +++ b/tests/pipelines/hunyuan_dit/test_hunyuan_dit.py @@ -298,6 +298,12 @@ def test_fused_qkv_projections(self): original_image_slice, image_slice_disabled, atol=1e-2, rtol=1e-2 ), "Original outputs should match when fused QKV projections are disabled." + @unittest.skip( + "Test not supported as `encode_prompt` is called two times separately which deivates from about 99% of the pipelines we have." + ) + def test_encode_prompt_works_in_isolation(self): + pass + @slow @require_torch_accelerator diff --git a/tests/pipelines/i2vgen_xl/test_i2vgenxl.py b/tests/pipelines/i2vgen_xl/test_i2vgenxl.py index f6ac22a9b575..868a40c9fb53 100644 --- a/tests/pipelines/i2vgen_xl/test_i2vgenxl.py +++ b/tests/pipelines/i2vgen_xl/test_i2vgenxl.py @@ -228,6 +228,10 @@ def test_num_videos_per_prompt(self): assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 + @unittest.skip("Test not supported for now.") + def test_encode_prompt_works_in_isolation(self): + pass + @slow @require_torch_accelerator diff --git a/tests/pipelines/kolors/test_kolors_img2img.py b/tests/pipelines/kolors/test_kolors_img2img.py index 9f1ca43a081f..025bcf2fac74 100644 --- a/tests/pipelines/kolors/test_kolors_img2img.py +++ b/tests/pipelines/kolors/test_kolors_img2img.py @@ -152,3 +152,7 @@ def test_inference_batch_single_identical(self): def test_float16_inference(self): super().test_float16_inference(expected_max_diff=7e-2) + + @unittest.skip("Test not supported because kolors img2img doesn't take pooled embeds as inputs unline kolors t2i.") + def test_encode_prompt_works_in_isolation(self): + pass diff --git a/tests/pipelines/latent_consistency_models/test_latent_consistency_models.py b/tests/pipelines/latent_consistency_models/test_latent_consistency_models.py index b60a4553cded..4db79ad16a03 100644 --- a/tests/pipelines/latent_consistency_models/test_latent_consistency_models.py +++ b/tests/pipelines/latent_consistency_models/test_latent_consistency_models.py @@ -213,6 +213,13 @@ def callback_inputs_test(pipe, i, t, callback_kwargs): output = pipe(**inputs)[0] assert output.abs().sum() == 0 + def test_encode_prompt_works_in_isolation(self): + extra_required_param_value_dict = { + "device": torch.device(torch_device).type, + "do_classifier_free_guidance": self.get_dummy_inputs(device=torch_device).get("guidance_scale", 1.0) > 1.0, + } + return super().test_encode_prompt_works_in_isolation(extra_required_param_value_dict) + @slow @require_torch_gpu diff --git a/tests/pipelines/latent_consistency_models/test_latent_consistency_models_img2img.py b/tests/pipelines/latent_consistency_models/test_latent_consistency_models_img2img.py index 386e60c54ac6..1187d555bb5e 100644 --- a/tests/pipelines/latent_consistency_models/test_latent_consistency_models_img2img.py +++ b/tests/pipelines/latent_consistency_models/test_latent_consistency_models_img2img.py @@ -220,6 +220,13 @@ def callback_inputs_test(pipe, i, t, callback_kwargs): output = pipe(**inputs)[0] assert output.abs().sum() == 0 + def test_encode_prompt_works_in_isolation(self): + extra_required_param_value_dict = { + "device": torch.device(torch_device).type, + "do_classifier_free_guidance": self.get_dummy_inputs(device=torch_device).get("guidance_scale", 1.0) > 1.0, + } + return super().test_encode_prompt_works_in_isolation(extra_required_param_value_dict) + @slow @require_torch_gpu diff --git a/tests/pipelines/latte/test_latte.py b/tests/pipelines/latte/test_latte.py index 315da3ed46ea..fb74bce284bb 100644 --- a/tests/pipelines/latte/test_latte.py +++ b/tests/pipelines/latte/test_latte.py @@ -279,6 +279,10 @@ def test_save_load_optional_components(self): def test_xformers_attention_forwardGenerator_pass(self): super()._test_xformers_attention_forwardGenerator_pass(test_mean_pixel_difference=False) + @unittest.skip("Test not supported because `encode_prompt()` has multiple returns.") + def test_encode_prompt_works_in_isolation(self): + pass + @slow @require_torch_gpu diff --git a/tests/pipelines/lumina2/test_pipeline_lumina2.py b/tests/pipelines/lumina2/test_pipeline_lumina2.py index 5f05f1f0faf7..3e783b80e7e4 100644 --- a/tests/pipelines/lumina2/test_pipeline_lumina2.py +++ b/tests/pipelines/lumina2/test_pipeline_lumina2.py @@ -1,6 +1,5 @@ import unittest -import numpy as np import torch from transformers import AutoTokenizer, Gemma2Config, Gemma2Model @@ -10,7 +9,6 @@ Lumina2Text2ImgPipeline, Lumina2Transformer2DModel, ) -from diffusers.utils.testing_utils import torch_device from ..test_pipelines_common import PipelineTesterMixin @@ -117,32 +115,3 @@ def get_dummy_inputs(self, device, seed=0): "output_type": "np", } return inputs - - def test_lumina_prompt_embeds(self): - pipe = self.pipeline_class(**self.get_dummy_components()).to(torch_device) - inputs = self.get_dummy_inputs(torch_device) - - output_with_prompt = pipe(**inputs).images[0] - - inputs = self.get_dummy_inputs(torch_device) - prompt = inputs.pop("prompt") - - do_classifier_free_guidance = inputs["guidance_scale"] > 1 - ( - prompt_embeds, - prompt_attention_mask, - negative_prompt_embeds, - negative_prompt_attention_mask, - ) = pipe.encode_prompt( - prompt, - do_classifier_free_guidance=do_classifier_free_guidance, - device=torch_device, - ) - output_with_embeds = pipe( - prompt_embeds=prompt_embeds, - prompt_attention_mask=prompt_attention_mask, - **inputs, - ).images[0] - - max_diff = np.abs(output_with_prompt - output_with_embeds).max() - assert max_diff < 1e-4 diff --git a/tests/pipelines/pag/test_pag_animatediff.py b/tests/pipelines/pag/test_pag_animatediff.py index 59ce9cc0a987..6fa96275406f 100644 --- a/tests/pipelines/pag/test_pag_animatediff.py +++ b/tests/pipelines/pag/test_pag_animatediff.py @@ -553,3 +553,11 @@ def test_pag_applied_layers(self): pag_layers = ["motion_modules.42"] with self.assertRaises(ValueError): pipe._set_pag_attn_processor(pag_applied_layers=pag_layers, do_classifier_free_guidance=False) + + def test_encode_prompt_works_in_isolation(self): + extra_required_param_value_dict = { + "device": torch.device(torch_device).type, + "num_images_per_prompt": 1, + "do_classifier_free_guidance": self.get_dummy_inputs(device=torch_device).get("guidance_scale", 1.0) > 1.0, + } + return super().test_encode_prompt_works_in_isolation(extra_required_param_value_dict) diff --git a/tests/pipelines/pag/test_pag_controlnet_sd.py b/tests/pipelines/pag/test_pag_controlnet_sd.py index 8a7eb6f0c675..ee97b0507a34 100644 --- a/tests/pipelines/pag/test_pag_controlnet_sd.py +++ b/tests/pipelines/pag/test_pag_controlnet_sd.py @@ -28,9 +28,7 @@ StableDiffusionControlNetPipeline, UNet2DConditionModel, ) -from diffusers.utils.testing_utils import ( - enable_full_determinism, -) +from diffusers.utils.testing_utils import enable_full_determinism, torch_device from diffusers.utils.torch_utils import randn_tensor from ..pipeline_params import ( @@ -246,3 +244,10 @@ def test_pag_uncond(self): max_diff = np.abs(image_slice.flatten() - expected_slice).max() assert max_diff < 1e-3, f"output is different from expected, {image_slice.flatten()}" + + def test_encode_prompt_works_in_isolation(self): + extra_required_param_value_dict = { + "device": torch.device(torch_device).type, + "do_classifier_free_guidance": self.get_dummy_inputs(device=torch_device).get("guidance_scale", 1.0) > 1.0, + } + return super().test_encode_prompt_works_in_isolation(extra_required_param_value_dict) diff --git a/tests/pipelines/pag/test_pag_controlnet_sd_inpaint.py b/tests/pipelines/pag/test_pag_controlnet_sd_inpaint.py index 0a7413e99926..25ef5d253d68 100644 --- a/tests/pipelines/pag/test_pag_controlnet_sd_inpaint.py +++ b/tests/pipelines/pag/test_pag_controlnet_sd_inpaint.py @@ -32,10 +32,7 @@ StableDiffusionControlNetPAGInpaintPipeline, UNet2DConditionModel, ) -from diffusers.utils.testing_utils import ( - enable_full_determinism, - floats_tensor, -) +from diffusers.utils.testing_utils import enable_full_determinism, floats_tensor, torch_device from diffusers.utils.torch_utils import randn_tensor from ..pipeline_params import ( @@ -243,3 +240,10 @@ def test_pag_uncond(self): max_diff = np.abs(image_slice.flatten() - expected_slice).max() assert max_diff < 1e-3, f"output is different from expected, {image_slice.flatten()}" + + def test_encode_prompt_works_in_isolation(self): + extra_required_param_value_dict = { + "device": torch.device(torch_device).type, + "do_classifier_free_guidance": self.get_dummy_inputs(device=torch_device).get("guidance_scale", 1.0) > 1.0, + } + return super().test_encode_prompt_works_in_isolation(extra_required_param_value_dict) diff --git a/tests/pipelines/pag/test_pag_controlnet_sdxl.py b/tests/pipelines/pag/test_pag_controlnet_sdxl.py index 6400cc2b7cab..0588e26286a8 100644 --- a/tests/pipelines/pag/test_pag_controlnet_sdxl.py +++ b/tests/pipelines/pag/test_pag_controlnet_sdxl.py @@ -42,7 +42,6 @@ PipelineFromPipeTesterMixin, PipelineLatentTesterMixin, PipelineTesterMixin, - SDXLOptionalComponentsTesterMixin, ) @@ -54,7 +53,6 @@ class StableDiffusionXLControlNetPAGPipelineFastTests( IPAdapterTesterMixin, PipelineLatentTesterMixin, PipelineFromPipeTesterMixin, - SDXLOptionalComponentsTesterMixin, unittest.TestCase, ): pipeline_class = StableDiffusionXLControlNetPAGPipeline @@ -214,9 +212,6 @@ def test_pag_disable_enable(self): assert np.abs(out.flatten() - out_pag_disabled.flatten()).max() < 1e-3 assert np.abs(out.flatten() - out_pag_enabled.flatten()).max() > 1e-3 - def test_save_load_optional_components(self): - self._test_save_load_optional_components() - def test_pag_cfg(self): device = "cpu" # ensure determinism for the device-dependent torch.Generator components = self.get_dummy_components() @@ -263,3 +258,7 @@ def test_pag_uncond(self): max_diff = np.abs(image_slice.flatten() - expected_slice).max() assert max_diff < 1e-3, f"output is different from expected, {image_slice.flatten()}" + + @unittest.skip("We test this functionality elsewhere already.") + def test_save_load_optional_components(self): + pass diff --git a/tests/pipelines/pag/test_pag_controlnet_sdxl_img2img.py b/tests/pipelines/pag/test_pag_controlnet_sdxl_img2img.py index b02f4d8b4561..63c7d9fbee2d 100644 --- a/tests/pipelines/pag/test_pag_controlnet_sdxl_img2img.py +++ b/tests/pipelines/pag/test_pag_controlnet_sdxl_img2img.py @@ -41,7 +41,6 @@ PipelineFromPipeTesterMixin, PipelineLatentTesterMixin, PipelineTesterMixin, - SDXLOptionalComponentsTesterMixin, ) @@ -53,7 +52,6 @@ class StableDiffusionXLControlNetPAGImg2ImgPipelineFastTests( PipelineLatentTesterMixin, PipelineTesterMixin, PipelineFromPipeTesterMixin, - SDXLOptionalComponentsTesterMixin, unittest.TestCase, ): pipeline_class = StableDiffusionXLControlNetPAGImg2ImgPipeline diff --git a/tests/pipelines/pag/test_pag_hunyuan_dit.py b/tests/pipelines/pag/test_pag_hunyuan_dit.py index db0e257760ed..3bc4240de90e 100644 --- a/tests/pipelines/pag/test_pag_hunyuan_dit.py +++ b/tests/pipelines/pag/test_pag_hunyuan_dit.py @@ -356,3 +356,9 @@ def test_pag_applied_layers(self): pag_layers = ["blocks.0", r"blocks\.1"] pipe._set_pag_attn_processor(pag_applied_layers=pag_layers, do_classifier_free_guidance=False) assert len(pipe.pag_attn_processors) == 2 + + @unittest.skip( + "Test not supported as `encode_prompt` is called two times separately which deivates from about 99% of the pipelines we have." + ) + def test_encode_prompt_works_in_isolation(self): + pass diff --git a/tests/pipelines/pag/test_pag_kolors.py b/tests/pipelines/pag/test_pag_kolors.py index cf9466988d85..9a5764e24f59 100644 --- a/tests/pipelines/pag/test_pag_kolors.py +++ b/tests/pipelines/pag/test_pag_kolors.py @@ -252,3 +252,6 @@ def test_pag_inference(self): def test_inference_batch_single_identical(self): self._test_inference_batch_single_identical(expected_max_diff=3e-3) + + def test_encode_prompt_works_in_isolation(self): + return super().test_encode_prompt_works_in_isolation(atol=1e-3, rtol=1e-3) diff --git a/tests/pipelines/pag/test_pag_sd.py b/tests/pipelines/pag/test_pag_sd.py index 17e3f7038439..8c3818c1c125 100644 --- a/tests/pipelines/pag/test_pag_sd.py +++ b/tests/pipelines/pag/test_pag_sd.py @@ -47,7 +47,6 @@ PipelineFromPipeTesterMixin, PipelineLatentTesterMixin, PipelineTesterMixin, - SDXLOptionalComponentsTesterMixin, ) @@ -59,7 +58,6 @@ class StableDiffusionPAGPipelineFastTests( IPAdapterTesterMixin, PipelineLatentTesterMixin, PipelineFromPipeTesterMixin, - SDXLOptionalComponentsTesterMixin, unittest.TestCase, ): pipeline_class = StableDiffusionPAGPipeline @@ -278,6 +276,13 @@ def test_pag_inference(self): max_diff = np.abs(image_slice.flatten() - expected_slice).max() self.assertLessEqual(max_diff, 1e-3) + def test_encode_prompt_works_in_isolation(self): + extra_required_param_value_dict = { + "device": torch.device(torch_device).type, + "do_classifier_free_guidance": self.get_dummy_inputs(device=torch_device).get("guidance_scale", 1.0) > 1.0, + } + return super().test_encode_prompt_works_in_isolation(extra_required_param_value_dict) + @slow @require_torch_gpu diff --git a/tests/pipelines/pag/test_pag_sd_img2img.py b/tests/pipelines/pag/test_pag_sd_img2img.py index f44204f82486..8b13a76907af 100644 --- a/tests/pipelines/pag/test_pag_sd_img2img.py +++ b/tests/pipelines/pag/test_pag_sd_img2img.py @@ -210,6 +210,13 @@ def test_pag_inference(self): max_diff = np.abs(image_slice.flatten() - expected_slice).max() self.assertLessEqual(max_diff, 1e-3) + def test_encode_prompt_works_in_isolation(self): + extra_required_param_value_dict = { + "device": torch.device(torch_device).type, + "do_classifier_free_guidance": self.get_dummy_inputs(device=torch_device).get("guidance_scale", 1.0) > 1.0, + } + return super().test_encode_prompt_works_in_isolation(extra_required_param_value_dict) + @slow @require_torch_gpu diff --git a/tests/pipelines/pag/test_pag_sd_inpaint.py b/tests/pipelines/pag/test_pag_sd_inpaint.py index a528b66cc72a..93b562792c14 100644 --- a/tests/pipelines/pag/test_pag_sd_inpaint.py +++ b/tests/pipelines/pag/test_pag_sd_inpaint.py @@ -48,7 +48,6 @@ PipelineFromPipeTesterMixin, PipelineLatentTesterMixin, PipelineTesterMixin, - SDXLOptionalComponentsTesterMixin, ) @@ -60,7 +59,6 @@ class StableDiffusionPAGInpaintPipelineFastTests( IPAdapterTesterMixin, PipelineLatentTesterMixin, PipelineFromPipeTesterMixin, - SDXLOptionalComponentsTesterMixin, unittest.TestCase, ): pipeline_class = StableDiffusionPAGInpaintPipeline @@ -244,6 +242,13 @@ def test_pag_inference(self): max_diff = np.abs(image_slice.flatten() - expected_slice).max() assert max_diff < 1e-3, f"output is different from expected, {image_slice.flatten()}" + def test_encode_prompt_works_in_isolation(self): + extra_required_param_value_dict = { + "device": torch.device(torch_device).type, + "do_classifier_free_guidance": self.get_dummy_inputs(device=torch_device).get("guidance_scale", 1.0) > 1.0, + } + return super().test_encode_prompt_works_in_isolation(extra_required_param_value_dict, atol=1e-3, rtol=1e-3) + @slow @require_torch_gpu diff --git a/tests/pipelines/pag/test_pag_sdxl.py b/tests/pipelines/pag/test_pag_sdxl.py index 589573385677..1d7dfb95a993 100644 --- a/tests/pipelines/pag/test_pag_sdxl.py +++ b/tests/pipelines/pag/test_pag_sdxl.py @@ -47,7 +47,6 @@ PipelineFromPipeTesterMixin, PipelineLatentTesterMixin, PipelineTesterMixin, - SDXLOptionalComponentsTesterMixin, ) @@ -59,7 +58,6 @@ class StableDiffusionXLPAGPipelineFastTests( IPAdapterTesterMixin, PipelineLatentTesterMixin, PipelineFromPipeTesterMixin, - SDXLOptionalComponentsTesterMixin, unittest.TestCase, ): pipeline_class = StableDiffusionXLPAGPipeline @@ -193,9 +191,6 @@ def test_pag_disable_enable(self): assert np.abs(out.flatten() - out_pag_disabled.flatten()).max() < 1e-3 assert np.abs(out.flatten() - out_pag_enabled.flatten()).max() > 1e-3 - def test_save_load_optional_components(self): - self._test_save_load_optional_components() - def test_pag_applied_layers(self): device = "cpu" # ensure determinism for the device-dependent torch.Generator components = self.get_dummy_components() @@ -288,6 +283,10 @@ def test_pag_inference(self): max_diff = np.abs(image_slice.flatten() - expected_slice).max() self.assertLessEqual(max_diff, 1e-3) + @unittest.skip("We test this functionality elsewhere already.") + def test_save_load_optional_components(self): + pass + @slow @require_torch_gpu diff --git a/tests/pipelines/pag/test_pag_sdxl_img2img.py b/tests/pipelines/pag/test_pag_sdxl_img2img.py index 33bd47bfee10..ffaeaa749ce4 100644 --- a/tests/pipelines/pag/test_pag_sdxl_img2img.py +++ b/tests/pipelines/pag/test_pag_sdxl_img2img.py @@ -58,7 +58,6 @@ PipelineFromPipeTesterMixin, PipelineLatentTesterMixin, PipelineTesterMixin, - SDXLOptionalComponentsTesterMixin, ) @@ -70,7 +69,6 @@ class StableDiffusionXLPAGImg2ImgPipelineFastTests( IPAdapterTesterMixin, PipelineLatentTesterMixin, PipelineFromPipeTesterMixin, - SDXLOptionalComponentsTesterMixin, unittest.TestCase, ): pipeline_class = StableDiffusionXLPAGImg2ImgPipeline @@ -241,9 +239,6 @@ def test_pag_disable_enable(self): assert np.abs(out.flatten() - out_pag_disabled.flatten()).max() < 1e-3 assert np.abs(out.flatten() - out_pag_enabled.flatten()).max() > 1e-3 - def test_save_load_optional_components(self): - self._test_save_load_optional_components() - def test_pag_inference(self): device = "cpu" # ensure determinism for the device-dependent torch.Generator components = self.get_dummy_components(requires_aesthetics_score=True) @@ -267,6 +262,10 @@ def test_pag_inference(self): max_diff = np.abs(image_slice.flatten() - expected_slice).max() assert max_diff < 1e-3, f"output is different from expected, {image_slice.flatten()}" + @unittest.skip("We test this functionality elsewhere already.") + def test_save_load_optional_components(self): + pass + @slow @require_torch_gpu diff --git a/tests/pipelines/pag/test_pag_sdxl_inpaint.py b/tests/pipelines/pag/test_pag_sdxl_inpaint.py index 8378b07e9f74..191b44118ef8 100644 --- a/tests/pipelines/pag/test_pag_sdxl_inpaint.py +++ b/tests/pipelines/pag/test_pag_sdxl_inpaint.py @@ -58,7 +58,6 @@ PipelineFromPipeTesterMixin, PipelineLatentTesterMixin, PipelineTesterMixin, - SDXLOptionalComponentsTesterMixin, ) @@ -70,7 +69,6 @@ class StableDiffusionXLPAGInpaintPipelineFastTests( IPAdapterTesterMixin, PipelineLatentTesterMixin, PipelineFromPipeTesterMixin, - SDXLOptionalComponentsTesterMixin, unittest.TestCase, ): pipeline_class = StableDiffusionXLPAGInpaintPipeline @@ -246,9 +244,6 @@ def test_pag_disable_enable(self): assert np.abs(out.flatten() - out_pag_disabled.flatten()).max() < 1e-3 assert np.abs(out.flatten() - out_pag_enabled.flatten()).max() > 1e-3 - def test_save_load_optional_components(self): - self._test_save_load_optional_components() - def test_pag_inference(self): device = "cpu" # ensure determinism for the device-dependent torch.Generator components = self.get_dummy_components(requires_aesthetics_score=True) @@ -272,6 +267,10 @@ def test_pag_inference(self): max_diff = np.abs(image_slice.flatten() - expected_slice).max() assert max_diff < 1e-3, f"output is different from expected, {image_slice.flatten()}" + @unittest.skip("We test this functionality elsewhere already.") + def test_save_load_optional_components(self): + pass + @slow @require_torch_gpu diff --git a/tests/pipelines/pia/test_pia.py b/tests/pipelines/pia/test_pia.py index ead6c2b208de..1156bf32dafa 100644 --- a/tests/pipelines/pia/test_pia.py +++ b/tests/pipelines/pia/test_pia.py @@ -438,3 +438,11 @@ def test_xformers_attention_forwardGenerator_pass(self): max_diff = np.abs(to_np(output_with_offload) - to_np(output_without_offload)).max() self.assertLess(max_diff, 1e-4, "XFormers attention should not affect the inference results") + + def test_encode_prompt_works_in_isolation(self): + extra_required_param_value_dict = { + "device": torch.device(torch_device).type, + "num_images_per_prompt": 1, + "do_classifier_free_guidance": self.get_dummy_inputs(device=torch_device).get("guidance_scale", 1.0) > 1.0, + } + return super().test_encode_prompt_works_in_isolation(extra_required_param_value_dict) diff --git a/tests/pipelines/stable_audio/test_stable_audio.py b/tests/pipelines/stable_audio/test_stable_audio.py index b2ca3ddd0e84..01df82056ce2 100644 --- a/tests/pipelines/stable_audio/test_stable_audio.py +++ b/tests/pipelines/stable_audio/test_stable_audio.py @@ -413,6 +413,10 @@ def test_sequential_cpu_offload_forward_pass(self): def test_sequential_offload_forward_pass_twice(self): pass + @unittest.skip("Test not supported because `rotary_embed_dim` doesn't have any sensible default.") + def test_encode_prompt_works_in_isolation(self): + pass + @nightly @require_torch_gpu diff --git a/tests/pipelines/stable_cascade/test_stable_cascade_decoder.py b/tests/pipelines/stable_cascade/test_stable_cascade_decoder.py index 07e4244e3c68..1d8f4a4f6c78 100644 --- a/tests/pipelines/stable_cascade/test_stable_cascade_decoder.py +++ b/tests/pipelines/stable_cascade/test_stable_cascade_decoder.py @@ -307,6 +307,14 @@ def test_stable_cascade_decoder_single_prompt_multiple_image_embeddings_with_gui batch_size * prior_num_images_per_prompt * decoder_num_images_per_prompt ) + def test_encode_prompt_works_in_isolation(self): + extra_required_param_value_dict = { + "device": torch.device(torch_device).type, + "batch_size": 1, + "do_classifier_free_guidance": self.get_dummy_inputs(device=torch_device).get("guidance_scale", 1.0) > 1.0, + } + return super().test_encode_prompt_works_in_isolation(extra_required_param_value_dict) + @slow @require_torch_gpu diff --git a/tests/pipelines/stable_cascade/test_stable_cascade_prior.py b/tests/pipelines/stable_cascade/test_stable_cascade_prior.py index 0208224a1d80..db1c7703a5fa 100644 --- a/tests/pipelines/stable_cascade/test_stable_cascade_prior.py +++ b/tests/pipelines/stable_cascade/test_stable_cascade_prior.py @@ -275,6 +275,10 @@ def test_stable_cascade_decoder_prompt_embeds(self): assert np.abs(output_prompt.image_embeddings - output_prompt_embeds.image_embeddings).max() < 1e-5 + @unittest.skip("Test not supported because dtype determination relies on text encoder.") + def test_encode_prompt_works_in_isolation(self): + pass + @slow @require_torch_gpu diff --git a/tests/pipelines/stable_diffusion/test_stable_diffusion.py b/tests/pipelines/stable_diffusion/test_stable_diffusion.py index d60092c4e5cb..c4ce562c3f0f 100644 --- a/tests/pipelines/stable_diffusion/test_stable_diffusion.py +++ b/tests/pipelines/stable_diffusion/test_stable_diffusion.py @@ -375,84 +375,6 @@ def test_stable_diffusion_negative_prompt_embeds(self): assert np.abs(image_slice_1.flatten() - image_slice_2.flatten()).max() < 1e-4 - def test_stable_diffusion_prompt_embeds_no_text_encoder_or_tokenizer(self): - components = self.get_dummy_components() - sd_pipe = StableDiffusionPipeline(**components) - sd_pipe = sd_pipe.to(torch_device) - sd_pipe.set_progress_bar_config(disable=None) - - inputs = self.get_dummy_inputs(torch_device) - inputs["negative_prompt"] = "this is a negative prompt" - - # forward - output = sd_pipe(**inputs) - image_slice_1 = output.images[0, -3:, -3:, -1] - - inputs = self.get_dummy_inputs(torch_device) - prompt = inputs.pop("prompt") - negative_prompt = "this is a negative prompt" - - prompt_embeds, negative_prompt_embeds = sd_pipe.encode_prompt( - prompt, - torch_device, - 1, - True, - negative_prompt=negative_prompt, - prompt_embeds=None, - negative_prompt_embeds=None, - ) - - inputs["prompt_embeds"] = prompt_embeds - inputs["negative_prompt_embeds"] = negative_prompt_embeds - - sd_pipe.text_encoder = None - sd_pipe.tokenizer = None - - # forward - output = sd_pipe(**inputs) - image_slice_2 = output.images[0, -3:, -3:, -1] - - assert np.abs(image_slice_1.flatten() - image_slice_2.flatten()).max() < 1e-4 - - def test_stable_diffusion_prompt_embeds_with_plain_negative_prompt_list(self): - components = self.get_dummy_components() - sd_pipe = StableDiffusionPipeline(**components) - sd_pipe = sd_pipe.to(torch_device) - sd_pipe = sd_pipe.to(torch_device) - sd_pipe.set_progress_bar_config(disable=None) - - inputs = self.get_dummy_inputs(torch_device) - negative_prompt = 3 * ["this is a negative prompt"] - inputs["negative_prompt"] = negative_prompt - inputs["prompt"] = 3 * [inputs["prompt"]] - - # forward - output = sd_pipe(**inputs) - image_slice_1 = output.images[0, -3:, -3:, -1] - - inputs = self.get_dummy_inputs(torch_device) - inputs["negative_prompt"] = negative_prompt - prompt = 3 * [inputs.pop("prompt")] - - text_inputs = sd_pipe.tokenizer( - prompt, - padding="max_length", - max_length=sd_pipe.tokenizer.model_max_length, - truncation=True, - return_tensors="pt", - ) - text_inputs = text_inputs["input_ids"].to(torch_device) - - prompt_embeds = sd_pipe.text_encoder(text_inputs)[0] - - inputs["prompt_embeds"] = prompt_embeds - - # forward - output = sd_pipe(**inputs) - image_slice_2 = output.images[0, -3:, -3:, -1] - - assert np.abs(image_slice_1.flatten() - image_slice_2.flatten()).max() < 1e-4 - def test_stable_diffusion_ddim_factor_8(self): device = "cpu" # ensure determinism for the device-dependent torch.Generator @@ -850,6 +772,13 @@ def test_pipeline_accept_tuple_type_unet_sample_size(self): pipe = StableDiffusionPipeline.from_pretrained(sd_repo_id, unet=customised_unet) assert pipe.unet.config.sample_size == sample_size + def test_encode_prompt_works_in_isolation(self): + extra_required_param_value_dict = { + "device": torch.device(torch_device).type, + "do_classifier_free_guidance": self.get_dummy_inputs(device=torch_device).get("guidance_scale", 1.0) > 1.0, + } + return super().test_encode_prompt_works_in_isolation(extra_required_param_value_dict) + @slow @require_torch_gpu diff --git a/tests/pipelines/stable_diffusion/test_stable_diffusion_img2img.py b/tests/pipelines/stable_diffusion/test_stable_diffusion_img2img.py index 7ba0bb5a4a5d..ae40822ade80 100644 --- a/tests/pipelines/stable_diffusion/test_stable_diffusion_img2img.py +++ b/tests/pipelines/stable_diffusion/test_stable_diffusion_img2img.py @@ -391,6 +391,13 @@ def callback_on_step_end(pipe, i, t, callback_kwargs): # they should be the same assert torch.allclose(intermediate_latent, output_interrupted, atol=1e-4) + def test_encode_prompt_works_in_isolation(self): + extra_required_param_value_dict = { + "device": torch.device(torch_device).type, + "do_classifier_free_guidance": self.get_dummy_inputs(device=torch_device).get("guidance_scale", 1.0) > 1.0, + } + return super().test_encode_prompt_works_in_isolation(extra_required_param_value_dict) + @slow @require_torch_gpu diff --git a/tests/pipelines/stable_diffusion/test_stable_diffusion_inpaint.py b/tests/pipelines/stable_diffusion/test_stable_diffusion_inpaint.py index ff04ea2cfc5d..e2a7821beb31 100644 --- a/tests/pipelines/stable_diffusion/test_stable_diffusion_inpaint.py +++ b/tests/pipelines/stable_diffusion/test_stable_diffusion_inpaint.py @@ -394,6 +394,13 @@ def test_ip_adapter(self, from_simple=False, expected_pipe_slice=None): ) return super().test_ip_adapter(expected_pipe_slice=expected_pipe_slice) + def test_encode_prompt_works_in_isolation(self): + extra_required_param_value_dict = { + "device": torch.device(torch_device).type, + "do_classifier_free_guidance": self.get_dummy_inputs(device=torch_device).get("guidance_scale", 1.0) > 1.0, + } + return super().test_encode_prompt_works_in_isolation(extra_required_param_value_dict, atol=1e-3, rtol=1e-3) + class StableDiffusionSimpleInpaintPipelineFastTests(StableDiffusionInpaintPipelineFastTests): pipeline_class = StableDiffusionInpaintPipeline diff --git a/tests/pipelines/stable_diffusion_2/test_stable_diffusion.py b/tests/pipelines/stable_diffusion_2/test_stable_diffusion.py index a7375d37eccd..5790d4dccec7 100644 --- a/tests/pipelines/stable_diffusion_2/test_stable_diffusion.py +++ b/tests/pipelines/stable_diffusion_2/test_stable_diffusion.py @@ -312,6 +312,13 @@ def test_attention_slicing_forward_pass(self): def test_inference_batch_single_identical(self): super().test_inference_batch_single_identical(expected_max_diff=3e-3) + def test_encode_prompt_works_in_isolation(self): + extra_required_param_value_dict = { + "device": torch.device(torch_device).type, + "do_classifier_free_guidance": self.get_dummy_inputs(device=torch_device).get("guidance_scale", 1.0) > 1.0, + } + return super().test_encode_prompt_works_in_isolation(extra_required_param_value_dict) + @slow @require_torch_accelerator diff --git a/tests/pipelines/stable_diffusion_2/test_stable_diffusion_attend_and_excite.py b/tests/pipelines/stable_diffusion_2/test_stable_diffusion_attend_and_excite.py index 1caad9500b24..c66491b15c66 100644 --- a/tests/pipelines/stable_diffusion_2/test_stable_diffusion_attend_and_excite.py +++ b/tests/pipelines/stable_diffusion_2/test_stable_diffusion_attend_and_excite.py @@ -204,6 +204,13 @@ def test_karras_schedulers_shape(self): def test_from_pipe_consistent_forward_pass_cpu_offload(self): super().test_from_pipe_consistent_forward_pass_cpu_offload(expected_max_diff=5e-3) + def test_encode_prompt_works_in_isolation(self): + extra_required_param_value_dict = { + "device": torch.device(torch_device).type, + "do_classifier_free_guidance": self.get_dummy_inputs(device=torch_device).get("guidance_scale", 1.0) > 1.0, + } + return super().test_encode_prompt_works_in_isolation(extra_required_param_value_dict) + @require_torch_accelerator @nightly diff --git a/tests/pipelines/stable_diffusion_2/test_stable_diffusion_depth.py b/tests/pipelines/stable_diffusion_2/test_stable_diffusion_depth.py index 430d99781a25..e66c270a5f91 100644 --- a/tests/pipelines/stable_diffusion_2/test_stable_diffusion_depth.py +++ b/tests/pipelines/stable_diffusion_2/test_stable_diffusion_depth.py @@ -369,6 +369,13 @@ def test_attention_slicing_forward_pass(self): def test_inference_batch_single_identical(self): super().test_inference_batch_single_identical(expected_max_diff=7e-3) + def test_encode_prompt_works_in_isolation(self): + extra_required_param_value_dict = { + "device": torch.device(torch_device).type, + "do_classifier_free_guidance": self.get_dummy_inputs(device=torch_device).get("guidance_scale", 1.0) > 1.0, + } + return super().test_encode_prompt_works_in_isolation(extra_required_param_value_dict) + @slow @require_torch_gpu diff --git a/tests/pipelines/stable_diffusion_2/test_stable_diffusion_diffedit.py b/tests/pipelines/stable_diffusion_2/test_stable_diffusion_diffedit.py index 1cb03ddd96d7..567e3e2fd466 100644 --- a/tests/pipelines/stable_diffusion_2/test_stable_diffusion_diffedit.py +++ b/tests/pipelines/stable_diffusion_2/test_stable_diffusion_diffedit.py @@ -291,6 +291,13 @@ def test_inversion_dpm(self): max_diff = np.abs(image_slice.flatten() - expected_slice).max() self.assertLessEqual(max_diff, 1e-3) + def test_encode_prompt_works_in_isolation(self): + extra_required_param_value_dict = { + "device": torch.device(torch_device).type, + "do_classifier_free_guidance": self.get_dummy_inputs(device=torch_device).get("guidance_scale", 1.0) > 1.0, + } + return super().test_encode_prompt_works_in_isolation(extra_required_param_value_dict) + @require_torch_gpu @nightly diff --git a/tests/pipelines/stable_diffusion_2/test_stable_diffusion_inpaint.py b/tests/pipelines/stable_diffusion_2/test_stable_diffusion_inpaint.py index b99a1816456e..e20b07640cb4 100644 --- a/tests/pipelines/stable_diffusion_2/test_stable_diffusion_inpaint.py +++ b/tests/pipelines/stable_diffusion_2/test_stable_diffusion_inpaint.py @@ -152,6 +152,13 @@ def test_stable_diffusion_inpaint(self): def test_inference_batch_single_identical(self): super().test_inference_batch_single_identical(expected_max_diff=3e-3) + def test_encode_prompt_works_in_isolation(self): + extra_required_param_value_dict = { + "device": torch.device(torch_device).type, + "do_classifier_free_guidance": self.get_dummy_inputs(device=torch_device).get("guidance_scale", 1.0) > 1.0, + } + return super().test_encode_prompt_works_in_isolation(extra_required_param_value_dict) + @slow @require_torch_gpu diff --git a/tests/pipelines/stable_diffusion_2/test_stable_diffusion_latent_upscale.py b/tests/pipelines/stable_diffusion_2/test_stable_diffusion_latent_upscale.py index 134175bdaffe..52458286df8b 100644 --- a/tests/pipelines/stable_diffusion_2/test_stable_diffusion_latent_upscale.py +++ b/tests/pipelines/stable_diffusion_2/test_stable_diffusion_latent_upscale.py @@ -279,6 +279,10 @@ def test_karras_schedulers_shape(self): def test_float16_inference(self): super().test_float16_inference(expected_max_diff=5e-1) + @unittest.skip("Test not supported for a weird use of `text_input_ids`.") + def test_encode_prompt_works_in_isolation(self): + pass + @require_torch_gpu @slow diff --git a/tests/pipelines/stable_diffusion_3/test_pipeline_stable_diffusion_3.py b/tests/pipelines/stable_diffusion_3/test_pipeline_stable_diffusion_3.py index 24d03a035066..340176367fd6 100644 --- a/tests/pipelines/stable_diffusion_3/test_pipeline_stable_diffusion_3.py +++ b/tests/pipelines/stable_diffusion_3/test_pipeline_stable_diffusion_3.py @@ -156,39 +156,6 @@ def test_stable_diffusion_3_different_negative_prompts(self): # Outputs should be different here assert max_diff > 1e-2 - def test_stable_diffusion_3_prompt_embeds(self): - pipe = self.pipeline_class(**self.get_dummy_components()).to(torch_device) - inputs = self.get_dummy_inputs(torch_device) - - output_with_prompt = pipe(**inputs).images[0] - - inputs = self.get_dummy_inputs(torch_device) - prompt = inputs.pop("prompt") - - do_classifier_free_guidance = inputs["guidance_scale"] > 1 - ( - prompt_embeds, - negative_prompt_embeds, - pooled_prompt_embeds, - negative_pooled_prompt_embeds, - ) = pipe.encode_prompt( - prompt, - prompt_2=None, - prompt_3=None, - do_classifier_free_guidance=do_classifier_free_guidance, - device=torch_device, - ) - output_with_embeds = pipe( - prompt_embeds=prompt_embeds, - negative_prompt_embeds=negative_prompt_embeds, - pooled_prompt_embeds=pooled_prompt_embeds, - negative_pooled_prompt_embeds=negative_pooled_prompt_embeds, - **inputs, - ).images[0] - - max_diff = np.abs(output_with_prompt - output_with_embeds).max() - assert max_diff < 1e-4 - def test_fused_qkv_projections(self): device = "cpu" # ensure determinism for the device-dependent torch.Generator components = self.get_dummy_components() diff --git a/tests/pipelines/stable_diffusion_3/test_pipeline_stable_diffusion_3_img2img.py b/tests/pipelines/stable_diffusion_3/test_pipeline_stable_diffusion_3_img2img.py index 358c8d9aee12..95c9256489b4 100644 --- a/tests/pipelines/stable_diffusion_3/test_pipeline_stable_diffusion_3_img2img.py +++ b/tests/pipelines/stable_diffusion_3/test_pipeline_stable_diffusion_3_img2img.py @@ -159,39 +159,7 @@ def test_stable_diffusion_3_img2img_different_negative_prompts(self): # Outputs should be different here assert max_diff > 1e-2 - def test_stable_diffusion_3_img2img_prompt_embeds(self): - pipe = self.pipeline_class(**self.get_dummy_components()).to(torch_device) - inputs = self.get_dummy_inputs(torch_device) - - output_with_prompt = pipe(**inputs).images[0] - - inputs = self.get_dummy_inputs(torch_device) - prompt = inputs.pop("prompt") - - do_classifier_free_guidance = inputs["guidance_scale"] > 1 - ( - prompt_embeds, - negative_prompt_embeds, - pooled_prompt_embeds, - negative_pooled_prompt_embeds, - ) = pipe.encode_prompt( - prompt, - prompt_2=None, - prompt_3=None, - do_classifier_free_guidance=do_classifier_free_guidance, - device=torch_device, - ) - output_with_embeds = pipe( - prompt_embeds=prompt_embeds, - negative_prompt_embeds=negative_prompt_embeds, - pooled_prompt_embeds=pooled_prompt_embeds, - negative_pooled_prompt_embeds=negative_pooled_prompt_embeds, - **inputs, - ).images[0] - - max_diff = np.abs(output_with_prompt - output_with_embeds).max() - assert max_diff < 1e-4 - + @unittest.skip("Skip for now.") def test_multi_vae(self): pass diff --git a/tests/pipelines/stable_diffusion_3/test_pipeline_stable_diffusion_3_inpaint.py b/tests/pipelines/stable_diffusion_3/test_pipeline_stable_diffusion_3_inpaint.py index a37ea3fc39c5..4090306dec72 100644 --- a/tests/pipelines/stable_diffusion_3/test_pipeline_stable_diffusion_3_inpaint.py +++ b/tests/pipelines/stable_diffusion_3/test_pipeline_stable_diffusion_3_inpaint.py @@ -164,38 +164,5 @@ def test_stable_diffusion_3_inpaint_different_negative_prompts(self): # Outputs should be different here assert max_diff > 1e-2 - def test_stable_diffusion_3_inpaint_prompt_embeds(self): - pipe = self.pipeline_class(**self.get_dummy_components()).to(torch_device) - inputs = self.get_dummy_inputs(torch_device) - - output_with_prompt = pipe(**inputs).images[0] - - inputs = self.get_dummy_inputs(torch_device) - prompt = inputs.pop("prompt") - - do_classifier_free_guidance = inputs["guidance_scale"] > 1 - ( - prompt_embeds, - negative_prompt_embeds, - pooled_prompt_embeds, - negative_pooled_prompt_embeds, - ) = pipe.encode_prompt( - prompt, - prompt_2=None, - prompt_3=None, - do_classifier_free_guidance=do_classifier_free_guidance, - device=torch_device, - ) - output_with_embeds = pipe( - prompt_embeds=prompt_embeds, - negative_prompt_embeds=negative_prompt_embeds, - pooled_prompt_embeds=pooled_prompt_embeds, - negative_pooled_prompt_embeds=negative_pooled_prompt_embeds, - **inputs, - ).images[0] - - max_diff = np.abs(output_with_prompt - output_with_embeds).max() - assert max_diff < 1e-4 - def test_multi_vae(self): pass diff --git a/tests/pipelines/stable_diffusion_adapter/test_stable_diffusion_adapter.py b/tests/pipelines/stable_diffusion_adapter/test_stable_diffusion_adapter.py index 15f298c67e11..3743bdd0a870 100644 --- a/tests/pipelines/stable_diffusion_adapter/test_stable_diffusion_adapter.py +++ b/tests/pipelines/stable_diffusion_adapter/test_stable_diffusion_adapter.py @@ -336,6 +336,13 @@ def test_adapter_lcm_custom_timesteps(self): assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 + def test_encode_prompt_works_in_isolation(self): + extra_required_param_value_dict = { + "device": torch.device(torch_device).type, + "do_classifier_free_guidance": self.get_dummy_inputs(device=torch_device).get("guidance_scale", 1.0) > 1.0, + } + return super().test_encode_prompt_works_in_isolation(extra_required_param_value_dict) + class StableDiffusionFullAdapterPipelineFastTests( AdapterTests, PipelineTesterMixin, PipelineFromPipeTesterMixin, unittest.TestCase diff --git a/tests/pipelines/stable_diffusion_gligen/test_stable_diffusion_gligen.py b/tests/pipelines/stable_diffusion_gligen/test_stable_diffusion_gligen.py index 405809aee19e..b3ac507f768e 100644 --- a/tests/pipelines/stable_diffusion_gligen/test_stable_diffusion_gligen.py +++ b/tests/pipelines/stable_diffusion_gligen/test_stable_diffusion_gligen.py @@ -169,3 +169,7 @@ def test_attention_slicing_forward_pass(self): def test_inference_batch_single_identical(self): super().test_inference_batch_single_identical(batch_size=3, expected_max_diff=3e-3) + + @unittest.skip("Test not supported as tokenizer is used for parsing bounding boxes.") + def test_encode_prompt_works_in_isolation(self): + pass diff --git a/tests/pipelines/stable_diffusion_gligen_text_image/test_stable_diffusion_gligen_text_image.py b/tests/pipelines/stable_diffusion_gligen_text_image/test_stable_diffusion_gligen_text_image.py index 15e4c60db82d..b080bb987e13 100644 --- a/tests/pipelines/stable_diffusion_gligen_text_image/test_stable_diffusion_gligen_text_image.py +++ b/tests/pipelines/stable_diffusion_gligen_text_image/test_stable_diffusion_gligen_text_image.py @@ -207,3 +207,9 @@ def test_attention_slicing_forward_pass(self): def test_inference_batch_single_identical(self): super().test_inference_batch_single_identical(batch_size=3, expected_max_diff=3e-3) + + @unittest.skip( + "Test not supported because of the use of `text_encoder` in `get_cross_attention_kwargs_with_grounded()`." + ) + def test_encode_prompt_works_in_isolation(self): + pass diff --git a/tests/pipelines/stable_diffusion_panorama/test_stable_diffusion_panorama.py b/tests/pipelines/stable_diffusion_panorama/test_stable_diffusion_panorama.py index 6dc6c31ae9a7..4734af259921 100644 --- a/tests/pipelines/stable_diffusion_panorama/test_stable_diffusion_panorama.py +++ b/tests/pipelines/stable_diffusion_panorama/test_stable_diffusion_panorama.py @@ -258,6 +258,13 @@ def test_stable_diffusion_panorama_pndm(self): assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 + def test_encode_prompt_works_in_isolation(self): + extra_required_param_value_dict = { + "device": torch.device(torch_device).type, + "do_classifier_free_guidance": self.get_dummy_inputs(device=torch_device).get("guidance_scale", 1.0) > 1.0, + } + return super().test_encode_prompt_works_in_isolation(extra_required_param_value_dict) + @nightly @require_torch_gpu diff --git a/tests/pipelines/stable_diffusion_sag/test_stable_diffusion_sag.py b/tests/pipelines/stable_diffusion_sag/test_stable_diffusion_sag.py index 1d4e66bd65f0..bd1ba268d2d9 100644 --- a/tests/pipelines/stable_diffusion_sag/test_stable_diffusion_sag.py +++ b/tests/pipelines/stable_diffusion_sag/test_stable_diffusion_sag.py @@ -153,6 +153,13 @@ def test_pipeline_different_schedulers(self): # Karras schedulers are not supported image = pipeline(**inputs).images[0] + def test_encode_prompt_works_in_isolation(self): + extra_required_param_value_dict = { + "device": torch.device(torch_device).type, + "do_classifier_free_guidance": self.get_dummy_inputs(device=torch_device).get("guidance_scale", 1.0) > 1.0, + } + return super().test_encode_prompt_works_in_isolation(extra_required_param_value_dict) + @nightly @require_torch_gpu diff --git a/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl.py b/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl.py index dfd1c9c37271..e574029acffd 100644 --- a/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl.py +++ b/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl.py @@ -54,7 +54,6 @@ PipelineLatentTesterMixin, PipelineTesterMixin, SDFunctionTesterMixin, - SDXLOptionalComponentsTesterMixin, ) @@ -66,7 +65,6 @@ class StableDiffusionXLPipelineFastTests( IPAdapterTesterMixin, PipelineLatentTesterMixin, PipelineTesterMixin, - SDXLOptionalComponentsTesterMixin, unittest.TestCase, ): pipeline_class = StableDiffusionXLPipeline @@ -254,84 +252,6 @@ def test_stable_diffusion_ays(self): np.abs(output.flatten() - output_sigmas.flatten()).max() > 1e-3 ), "use ays sigmas should have different outputs" - def test_stable_diffusion_xl_prompt_embeds(self): - components = self.get_dummy_components() - sd_pipe = StableDiffusionXLPipeline(**components) - sd_pipe = sd_pipe.to(torch_device) - sd_pipe = sd_pipe.to(torch_device) - sd_pipe.set_progress_bar_config(disable=None) - - # forward without prompt embeds - inputs = self.get_dummy_inputs(torch_device) - inputs["prompt"] = 2 * [inputs["prompt"]] - inputs["num_images_per_prompt"] = 2 - - output = sd_pipe(**inputs) - image_slice_1 = output.images[0, -3:, -3:, -1] - - # forward with prompt embeds - inputs = self.get_dummy_inputs(torch_device) - prompt = 2 * [inputs.pop("prompt")] - - ( - prompt_embeds, - negative_prompt_embeds, - pooled_prompt_embeds, - negative_pooled_prompt_embeds, - ) = sd_pipe.encode_prompt(prompt) - - output = sd_pipe( - **inputs, - prompt_embeds=prompt_embeds, - negative_prompt_embeds=negative_prompt_embeds, - pooled_prompt_embeds=pooled_prompt_embeds, - negative_pooled_prompt_embeds=negative_pooled_prompt_embeds, - ) - image_slice_2 = output.images[0, -3:, -3:, -1] - - # make sure that it's equal - assert np.abs(image_slice_1.flatten() - image_slice_2.flatten()).max() < 1e-4 - - def test_stable_diffusion_xl_negative_prompt_embeds(self): - components = self.get_dummy_components() - sd_pipe = StableDiffusionXLPipeline(**components) - sd_pipe = sd_pipe.to(torch_device) - sd_pipe = sd_pipe.to(torch_device) - sd_pipe.set_progress_bar_config(disable=None) - - # forward without prompt embeds - inputs = self.get_dummy_inputs(torch_device) - negative_prompt = 3 * ["this is a negative prompt"] - inputs["negative_prompt"] = negative_prompt - inputs["prompt"] = 3 * [inputs["prompt"]] - - output = sd_pipe(**inputs) - image_slice_1 = output.images[0, -3:, -3:, -1] - - # forward with prompt embeds - inputs = self.get_dummy_inputs(torch_device) - negative_prompt = 3 * ["this is a negative prompt"] - prompt = 3 * [inputs.pop("prompt")] - - ( - prompt_embeds, - negative_prompt_embeds, - pooled_prompt_embeds, - negative_pooled_prompt_embeds, - ) = sd_pipe.encode_prompt(prompt, negative_prompt=negative_prompt) - - output = sd_pipe( - **inputs, - prompt_embeds=prompt_embeds, - negative_prompt_embeds=negative_prompt_embeds, - pooled_prompt_embeds=pooled_prompt_embeds, - negative_pooled_prompt_embeds=negative_pooled_prompt_embeds, - ) - image_slice_2 = output.images[0, -3:, -3:, -1] - - # make sure that it's equal - assert np.abs(image_slice_1.flatten() - image_slice_2.flatten()).max() < 1e-4 - def test_ip_adapter(self): expected_pipe_slice = None if torch_device == "cpu": @@ -345,9 +265,6 @@ def test_attention_slicing_forward_pass(self): def test_inference_batch_single_identical(self): super().test_inference_batch_single_identical(expected_max_diff=3e-3) - def test_save_load_optional_components(self): - self._test_save_load_optional_components() - @require_torch_gpu def test_stable_diffusion_xl_offloads(self): pipes = [] @@ -377,41 +294,9 @@ def test_stable_diffusion_xl_offloads(self): assert np.abs(image_slices[0] - image_slices[1]).max() < 1e-3 assert np.abs(image_slices[0] - image_slices[2]).max() < 1e-3 - def test_stable_diffusion_xl_img2img_prompt_embeds_only(self): - components = self.get_dummy_components() - sd_pipe = StableDiffusionXLPipeline(**components) - sd_pipe = sd_pipe.to(torch_device) - sd_pipe.set_progress_bar_config(disable=None) - - # forward without prompt embeds - generator_device = "cpu" - inputs = self.get_dummy_inputs(generator_device) - inputs["prompt"] = 3 * [inputs["prompt"]] - - output = sd_pipe(**inputs) - image_slice_1 = output.images[0, -3:, -3:, -1] - - # forward with prompt embeds - generator_device = "cpu" - inputs = self.get_dummy_inputs(generator_device) - prompt = 3 * [inputs.pop("prompt")] - - ( - prompt_embeds, - _, - pooled_prompt_embeds, - _, - ) = sd_pipe.encode_prompt(prompt) - - output = sd_pipe( - **inputs, - prompt_embeds=prompt_embeds, - pooled_prompt_embeds=pooled_prompt_embeds, - ) - image_slice_2 = output.images[0, -3:, -3:, -1] - - # make sure that it's equal - assert np.abs(image_slice_1.flatten() - image_slice_2.flatten()).max() < 1e-4 + @unittest.skip("We test this functionality elsewhere already.") + def test_save_load_optional_components(self): + pass def test_stable_diffusion_two_xl_mixture_of_denoiser_fast(self): components = self.get_dummy_components() diff --git a/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl_adapter.py b/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl_adapter.py index 23291b0407aa..07333623867e 100644 --- a/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl_adapter.py +++ b/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl_adapter.py @@ -42,7 +42,6 @@ from ..test_pipelines_common import ( IPAdapterTesterMixin, PipelineTesterMixin, - SDXLOptionalComponentsTesterMixin, assert_mean_pixel_difference, ) @@ -50,9 +49,7 @@ enable_full_determinism() -class StableDiffusionXLAdapterPipelineFastTests( - IPAdapterTesterMixin, PipelineTesterMixin, SDXLOptionalComponentsTesterMixin, unittest.TestCase -): +class StableDiffusionXLAdapterPipelineFastTests(IPAdapterTesterMixin, PipelineTesterMixin, unittest.TestCase): pipeline_class = StableDiffusionXLAdapterPipeline params = TEXT_GUIDED_IMAGE_VARIATION_PARAMS batch_params = TEXT_GUIDED_IMAGE_VARIATION_BATCH_PARAMS @@ -300,6 +297,10 @@ def test_ip_adapter(self, from_multi=False, expected_pipe_slice=None): return super().test_ip_adapter(expected_pipe_slice=expected_pipe_slice) + @unittest.skip("We test this functionality elsewhere already.") + def test_save_load_optional_components(self): + pass + def test_stable_diffusion_adapter_default_case(self): device = "cpu" # ensure determinism for the device-dependent torch.Generator components = self.get_dummy_components() @@ -373,9 +374,6 @@ def test_total_downscale_factor(self, adapter_type): expected_out_image_size, ) - def test_save_load_optional_components(self): - return self._test_save_load_optional_components() - def test_adapter_sdxl_lcm(self): device = "cpu" # ensure determinism for the device-dependent torch.Generator @@ -515,6 +513,10 @@ def test_inference_batch_consistent( logger.setLevel(level=diffusers.logging.WARNING) + @unittest.skip("We test this functionality elsewhere already.") + def test_save_load_optional_components(self): + pass + def test_num_images_per_prompt(self): components = self.get_dummy_components() pipe = self.pipeline_class(**components) diff --git a/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl_img2img.py b/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl_img2img.py index ceec86a811c0..b0a979c49360 100644 --- a/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl_img2img.py +++ b/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl_img2img.py @@ -57,7 +57,6 @@ IPAdapterTesterMixin, PipelineLatentTesterMixin, PipelineTesterMixin, - SDXLOptionalComponentsTesterMixin, ) @@ -266,52 +265,10 @@ def test_attention_slicing_forward_pass(self): def test_inference_batch_single_identical(self): super().test_inference_batch_single_identical(expected_max_diff=3e-3) - # TODO(Patrick, Sayak) - skip for now as this requires more refiner tests + @unittest.skip("Skip for now.") def test_save_load_optional_components(self): pass - def test_stable_diffusion_xl_img2img_negative_prompt_embeds(self): - components = self.get_dummy_components() - sd_pipe = StableDiffusionXLImg2ImgPipeline(**components) - sd_pipe = sd_pipe.to(torch_device) - sd_pipe = sd_pipe.to(torch_device) - sd_pipe.set_progress_bar_config(disable=None) - - # forward without prompt embeds - generator_device = "cpu" - inputs = self.get_dummy_inputs(generator_device) - negative_prompt = 3 * ["this is a negative prompt"] - inputs["negative_prompt"] = negative_prompt - inputs["prompt"] = 3 * [inputs["prompt"]] - - output = sd_pipe(**inputs) - image_slice_1 = output.images[0, -3:, -3:, -1] - - # forward with prompt embeds - generator_device = "cpu" - inputs = self.get_dummy_inputs(generator_device) - negative_prompt = 3 * ["this is a negative prompt"] - prompt = 3 * [inputs.pop("prompt")] - - ( - prompt_embeds, - negative_prompt_embeds, - pooled_prompt_embeds, - negative_pooled_prompt_embeds, - ) = sd_pipe.encode_prompt(prompt, negative_prompt=negative_prompt) - - output = sd_pipe( - **inputs, - prompt_embeds=prompt_embeds, - negative_prompt_embeds=negative_prompt_embeds, - pooled_prompt_embeds=pooled_prompt_embeds, - negative_pooled_prompt_embeds=negative_pooled_prompt_embeds, - ) - image_slice_2 = output.images[0, -3:, -3:, -1] - - # make sure that it's equal - assert np.abs(image_slice_1.flatten() - image_slice_2.flatten()).max() < 1e-4 - def test_ip_adapter(self): expected_pipe_slice = None if torch_device == "cpu": @@ -519,7 +476,7 @@ def callback_on_step_end(pipe, i, t, callback_kwargs): class StableDiffusionXLImg2ImgRefinerOnlyPipelineFastTests( - PipelineLatentTesterMixin, PipelineTesterMixin, SDXLOptionalComponentsTesterMixin, unittest.TestCase + PipelineLatentTesterMixin, PipelineTesterMixin, unittest.TestCase ): pipeline_class = StableDiffusionXLImg2ImgPipeline params = TEXT_GUIDED_IMAGE_VARIATION_PARAMS - {"height", "width"} @@ -697,92 +654,15 @@ def test_stable_diffusion_xl_img2img_negative_conditions(self): > 1e-4 ) - def test_stable_diffusion_xl_img2img_negative_prompt_embeds(self): - components = self.get_dummy_components() - sd_pipe = StableDiffusionXLImg2ImgPipeline(**components) - sd_pipe = sd_pipe.to(torch_device) - sd_pipe = sd_pipe.to(torch_device) - sd_pipe.set_progress_bar_config(disable=None) - - # forward without prompt embeds - generator_device = "cpu" - inputs = self.get_dummy_inputs(generator_device) - negative_prompt = 3 * ["this is a negative prompt"] - inputs["negative_prompt"] = negative_prompt - inputs["prompt"] = 3 * [inputs["prompt"]] - - output = sd_pipe(**inputs) - image_slice_1 = output.images[0, -3:, -3:, -1] - - # forward with prompt embeds - generator_device = "cpu" - inputs = self.get_dummy_inputs(generator_device) - negative_prompt = 3 * ["this is a negative prompt"] - prompt = 3 * [inputs.pop("prompt")] - - ( - prompt_embeds, - negative_prompt_embeds, - pooled_prompt_embeds, - negative_pooled_prompt_embeds, - ) = sd_pipe.encode_prompt(prompt, negative_prompt=negative_prompt) - - output = sd_pipe( - **inputs, - prompt_embeds=prompt_embeds, - negative_prompt_embeds=negative_prompt_embeds, - pooled_prompt_embeds=pooled_prompt_embeds, - negative_pooled_prompt_embeds=negative_pooled_prompt_embeds, - ) - image_slice_2 = output.images[0, -3:, -3:, -1] - - # make sure that it's equal - assert np.abs(image_slice_1.flatten() - image_slice_2.flatten()).max() < 1e-4 - - def test_stable_diffusion_xl_img2img_prompt_embeds_only(self): - components = self.get_dummy_components() - sd_pipe = StableDiffusionXLImg2ImgPipeline(**components) - sd_pipe = sd_pipe.to(torch_device) - sd_pipe.set_progress_bar_config(disable=None) - - # forward without prompt embeds - generator_device = "cpu" - inputs = self.get_dummy_inputs(generator_device) - inputs["prompt"] = 3 * [inputs["prompt"]] - - output = sd_pipe(**inputs) - image_slice_1 = output.images[0, -3:, -3:, -1] - - # forward with prompt embeds - generator_device = "cpu" - inputs = self.get_dummy_inputs(generator_device) - prompt = 3 * [inputs.pop("prompt")] - - ( - prompt_embeds, - _, - pooled_prompt_embeds, - _, - ) = sd_pipe.encode_prompt(prompt) - - output = sd_pipe( - **inputs, - prompt_embeds=prompt_embeds, - pooled_prompt_embeds=pooled_prompt_embeds, - ) - image_slice_2 = output.images[0, -3:, -3:, -1] - - # make sure that it's equal - assert np.abs(image_slice_1.flatten() - image_slice_2.flatten()).max() < 1e-4 - def test_attention_slicing_forward_pass(self): super().test_attention_slicing_forward_pass(expected_max_diff=3e-3) def test_inference_batch_single_identical(self): super().test_inference_batch_single_identical(expected_max_diff=3e-3) + @unittest.skip("We test this functionality elsewhere already.") def test_save_load_optional_components(self): - self._test_save_load_optional_components() + pass @slow diff --git a/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl_inpaint.py b/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl_inpaint.py index c759f4c112d9..f5fba4ede207 100644 --- a/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl_inpaint.py +++ b/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl_inpaint.py @@ -301,50 +301,10 @@ def test_attention_slicing_forward_pass(self): def test_inference_batch_single_identical(self): super().test_inference_batch_single_identical(expected_max_diff=3e-3) - # TODO(Patrick, Sayak) - skip for now as this requires more refiner tests + @unittest.skip("Skip for now.") def test_save_load_optional_components(self): pass - def test_stable_diffusion_xl_inpaint_negative_prompt_embeds(self): - components = self.get_dummy_components() - sd_pipe = StableDiffusionXLInpaintPipeline(**components) - sd_pipe = sd_pipe.to(torch_device) - sd_pipe = sd_pipe.to(torch_device) - sd_pipe.set_progress_bar_config(disable=None) - - # forward without prompt embeds - inputs = self.get_dummy_inputs(torch_device) - negative_prompt = 3 * ["this is a negative prompt"] - inputs["negative_prompt"] = negative_prompt - inputs["prompt"] = 3 * [inputs["prompt"]] - - output = sd_pipe(**inputs) - image_slice_1 = output.images[0, -3:, -3:, -1] - - # forward with prompt embeds - inputs = self.get_dummy_inputs(torch_device) - negative_prompt = 3 * ["this is a negative prompt"] - prompt = 3 * [inputs.pop("prompt")] - - ( - prompt_embeds, - negative_prompt_embeds, - pooled_prompt_embeds, - negative_pooled_prompt_embeds, - ) = sd_pipe.encode_prompt(prompt, negative_prompt=negative_prompt) - - output = sd_pipe( - **inputs, - prompt_embeds=prompt_embeds, - negative_prompt_embeds=negative_prompt_embeds, - pooled_prompt_embeds=pooled_prompt_embeds, - negative_pooled_prompt_embeds=negative_pooled_prompt_embeds, - ) - image_slice_2 = output.images[0, -3:, -3:, -1] - - # make sure that it's equal - assert np.abs(image_slice_1.flatten() - image_slice_2.flatten()).max() < 1e-4 - @require_torch_gpu def test_stable_diffusion_xl_offloads(self): pipes = [] diff --git a/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl_instruction_pix2pix.py b/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl_instruction_pix2pix.py index 98cecb4e0f7c..79d38c4a7b43 100644 --- a/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl_instruction_pix2pix.py +++ b/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl_instruction_pix2pix.py @@ -40,7 +40,6 @@ PipelineKarrasSchedulerTesterMixin, PipelineLatentTesterMixin, PipelineTesterMixin, - SDXLOptionalComponentsTesterMixin, ) @@ -51,7 +50,6 @@ class StableDiffusionXLInstructPix2PixPipelineFastTests( PipelineLatentTesterMixin, PipelineKarrasSchedulerTesterMixin, PipelineTesterMixin, - SDXLOptionalComponentsTesterMixin, unittest.TestCase, ): pipeline_class = StableDiffusionXLInstructPix2PixPipeline @@ -182,8 +180,10 @@ def test_latents_input(self): max_diff = np.abs(out - out_latents_inputs).max() self.assertLess(max_diff, 1e-4, "passing latents as image input generate different result from passing image") + @unittest.skip("Test not supported at the moment.") def test_cfg(self): pass + @unittest.skip("Functionality is tested elsewhere.") def test_save_load_optional_components(self): - self._test_save_load_optional_components() + pass diff --git a/tests/pipelines/stable_unclip/test_stable_unclip.py b/tests/pipelines/stable_unclip/test_stable_unclip.py index bb54d212a786..8cf103dffd56 100644 --- a/tests/pipelines/stable_unclip/test_stable_unclip.py +++ b/tests/pipelines/stable_unclip/test_stable_unclip.py @@ -184,6 +184,10 @@ def test_attention_slicing_forward_pass(self): def test_inference_batch_single_identical(self): self._test_inference_batch_single_identical(expected_max_diff=1e-3) + @unittest.skip("Test not supported because of the use of `_encode_prior_prompt()`.") + def test_encode_prompt_works_in_isolation(self): + pass + @nightly @require_torch_gpu diff --git a/tests/pipelines/stable_unclip/test_stable_unclip_img2img.py b/tests/pipelines/stable_unclip/test_stable_unclip_img2img.py index 34f2553a9184..176b6954d616 100644 --- a/tests/pipelines/stable_unclip/test_stable_unclip_img2img.py +++ b/tests/pipelines/stable_unclip/test_stable_unclip_img2img.py @@ -207,6 +207,10 @@ def test_inference_batch_single_identical(self): def test_xformers_attention_forwardGenerator_pass(self): self._test_xformers_attention_forwardGenerator_pass(test_max_difference=False) + @unittest.skip("Test not supported at the moment.") + def test_encode_prompt_works_in_isolation(self): + pass + @nightly @require_torch_gpu diff --git a/tests/pipelines/test_pipelines_common.py b/tests/pipelines/test_pipelines_common.py index 355e851f9fdd..33a7fd9f2b49 100644 --- a/tests/pipelines/test_pipelines_common.py +++ b/tests/pipelines/test_pipelines_common.py @@ -42,6 +42,7 @@ from diffusers.schedulers import KarrasDiffusionSchedulers from diffusers.utils import logging from diffusers.utils.import_utils import is_xformers_available +from diffusers.utils.source_code_parsing_utils import ReturnNameVisitor from diffusers.utils.testing_utils import ( CaptureLogger, require_accelerate_version_greater, @@ -1984,6 +1985,118 @@ def test_loading_with_incorrect_variants_raises_error(self): assert f"You are trying to load the model files of the `variant={variant}`" in str(error.exception) + def test_encode_prompt_works_in_isolation(self, extra_required_param_value_dict=None, atol=1e-4, rtol=1e-4): + if not hasattr(self.pipeline_class, "encode_prompt"): + return + + components = self.get_dummy_components() + + # We initialize the pipeline with only text encoders and tokenizers, + # mimicking a real-world scenario. + components_with_text_encoders = {} + for k in components: + if "text" in k or "tokenizer" in k: + components_with_text_encoders[k] = components[k] + else: + components_with_text_encoders[k] = None + pipe_with_just_text_encoder = self.pipeline_class(**components_with_text_encoders) + pipe_with_just_text_encoder = pipe_with_just_text_encoder.to(torch_device) + + # Get inputs and also the args of `encode_prompts`. + inputs = self.get_dummy_inputs(torch_device) + encode_prompt_signature = inspect.signature(pipe_with_just_text_encoder.encode_prompt) + encode_prompt_parameters = list(encode_prompt_signature.parameters.values()) + + # Required args in encode_prompt with those with no default. + required_params = [] + for param in encode_prompt_parameters: + if param.name == "self" or param.name == "kwargs": + continue + if param.default is inspect.Parameter.empty: + required_params.append(param.name) + + # Craft inputs for the `encode_prompt()` method to run in isolation. + encode_prompt_param_names = [p.name for p in encode_prompt_parameters if p.name != "self"] + input_keys = list(inputs.keys()) + encode_prompt_inputs = {k: inputs.pop(k) for k in input_keys if k in encode_prompt_param_names} + + pipe_call_signature = inspect.signature(pipe_with_just_text_encoder.__call__) + pipe_call_parameters = pipe_call_signature.parameters + + # For each required arg in encode_prompt, check if it's missing + # in encode_prompt_inputs. If so, see if __call__ has a default + # for that arg and use it if available. + for required_param_name in required_params: + if required_param_name not in encode_prompt_inputs: + pipe_call_param = pipe_call_parameters.get(required_param_name, None) + if pipe_call_param is not None and pipe_call_param.default is not inspect.Parameter.empty: + # Use the default from pipe.__call__ + encode_prompt_inputs[required_param_name] = pipe_call_param.default + elif extra_required_param_value_dict is not None and isinstance(extra_required_param_value_dict, dict): + encode_prompt_inputs[required_param_name] = extra_required_param_value_dict[required_param_name] + else: + raise ValueError( + f"Required parameter '{required_param_name}' in " + f"encode_prompt has no default in either encode_prompt or __call__." + ) + + # Compute `encode_prompt()`. + with torch.no_grad(): + encoded_prompt_outputs = pipe_with_just_text_encoder.encode_prompt(**encode_prompt_inputs) + + # Programatically determine the reutrn names of `encode_prompt.` + ast_vistor = ReturnNameVisitor() + encode_prompt_tree = ast_vistor.get_ast_tree(cls=self.pipeline_class) + ast_vistor.visit(encode_prompt_tree) + prompt_embed_kwargs = ast_vistor.return_names + prompt_embeds_kwargs = dict(zip(prompt_embed_kwargs, encoded_prompt_outputs)) + + # Pack the outputs of `encode_prompt`. + adapted_prompt_embeds_kwargs = { + k: prompt_embeds_kwargs.pop(k) for k in list(prompt_embeds_kwargs.keys()) if k in pipe_call_parameters + } + + # now initialize a pipeline without text encoders and compute outputs with the + # `encode_prompt()` outputs and other relevant inputs. + components_with_text_encoders = {} + for k in components: + if "text" in k or "tokenizer" in k: + components_with_text_encoders[k] = None + else: + components_with_text_encoders[k] = components[k] + pipe_without_text_encoders = self.pipeline_class(**components_with_text_encoders).to(torch_device) + + # Set `negative_prompt` to None as we have already calculated its embeds + # if it was present in `inputs`. This is because otherwise we will interfere wrongly + # for non-None `negative_prompt` values as defaults (PixArt for example). + pipe_without_tes_inputs = {**inputs, **adapted_prompt_embeds_kwargs} + if ( + pipe_call_parameters.get("negative_prompt", None) is not None + and pipe_call_parameters.get("negative_prompt").default is not None + ): + pipe_without_tes_inputs.update({"negative_prompt": None}) + + # Pipelines like attend and excite have `prompt` as a required argument. + if ( + pipe_call_parameters.get("prompt", None) is not None + and pipe_call_parameters.get("prompt").default is inspect.Parameter.empty + and pipe_call_parameters.get("prompt_embeds", None) is not None + and pipe_call_parameters.get("prompt_embeds").default is None + ): + pipe_without_tes_inputs.update({"prompt": None}) + + pipe_out = pipe_without_text_encoders(**pipe_without_tes_inputs)[0] + + # Compare against regular pipeline outputs. + full_pipe = self.pipeline_class(**components).to(torch_device) + inputs = self.get_dummy_inputs(torch_device) + pipe_out_2 = full_pipe(**inputs)[0] + + if isinstance(pipe_out, np.ndarray) and isinstance(pipe_out_2, np.ndarray): + self.assertTrue(np.allclose(pipe_out, pipe_out_2, atol=atol, rtol=rtol)) + elif isinstance(pipe_out, torch.Tensor) and isinstance(pipe_out_2, torch.Tensor): + self.assertTrue(torch.allclose(pipe_out, pipe_out_2, atol=atol, rtol=rtol)) + def test_StableDiffusionMixin_component(self): """Any pipeline that have LDMFuncMixin should have vae and unet components.""" if not issubclass(self.pipeline_class, StableDiffusionMixin): @@ -2256,150 +2369,6 @@ def test_push_to_hub_library_name(self): delete_repo(self.repo_id, token=TOKEN) -# For SDXL and its derivative pipelines (such as ControlNet), we have the text encoders -# and the tokenizers as optional components. So, we need to override the `test_save_load_optional_components()` -# test for all such pipelines. This requires us to use a custom `encode_prompt()` function. -class SDXLOptionalComponentsTesterMixin: - def encode_prompt( - self, tokenizers, text_encoders, prompt: str, num_images_per_prompt: int = 1, negative_prompt: str = None - ): - device = text_encoders[0].device - - if isinstance(prompt, str): - prompt = [prompt] - batch_size = len(prompt) - - prompt_embeds_list = [] - for tokenizer, text_encoder in zip(tokenizers, text_encoders): - text_inputs = tokenizer( - prompt, - padding="max_length", - max_length=tokenizer.model_max_length, - truncation=True, - return_tensors="pt", - ) - - text_input_ids = text_inputs.input_ids - - prompt_embeds = text_encoder(text_input_ids.to(device), output_hidden_states=True) - pooled_prompt_embeds = prompt_embeds[0] - prompt_embeds = prompt_embeds.hidden_states[-2] - prompt_embeds_list.append(prompt_embeds) - - prompt_embeds = torch.concat(prompt_embeds_list, dim=-1) - - if negative_prompt is None: - negative_prompt_embeds = torch.zeros_like(prompt_embeds) - negative_pooled_prompt_embeds = torch.zeros_like(pooled_prompt_embeds) - else: - negative_prompt = batch_size * [negative_prompt] if isinstance(negative_prompt, str) else negative_prompt - - negative_prompt_embeds_list = [] - for tokenizer, text_encoder in zip(tokenizers, text_encoders): - uncond_input = tokenizer( - negative_prompt, - padding="max_length", - max_length=tokenizer.model_max_length, - truncation=True, - return_tensors="pt", - ) - - negative_prompt_embeds = text_encoder(uncond_input.input_ids.to(device), output_hidden_states=True) - negative_pooled_prompt_embeds = negative_prompt_embeds[0] - negative_prompt_embeds = negative_prompt_embeds.hidden_states[-2] - negative_prompt_embeds_list.append(negative_prompt_embeds) - - negative_prompt_embeds = torch.concat(negative_prompt_embeds_list, dim=-1) - - bs_embed, seq_len, _ = prompt_embeds.shape - - # duplicate text embeddings for each generation per prompt, using mps friendly method - prompt_embeds = prompt_embeds.repeat(1, num_images_per_prompt, 1) - prompt_embeds = prompt_embeds.view(bs_embed * num_images_per_prompt, seq_len, -1) - - # for classifier-free guidance - # duplicate unconditional embeddings for each generation per prompt, using mps friendly method - seq_len = negative_prompt_embeds.shape[1] - - negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1) - negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1) - - pooled_prompt_embeds = pooled_prompt_embeds.repeat(1, num_images_per_prompt).view( - bs_embed * num_images_per_prompt, -1 - ) - - # for classifier-free guidance - negative_pooled_prompt_embeds = negative_pooled_prompt_embeds.repeat(1, num_images_per_prompt).view( - bs_embed * num_images_per_prompt, -1 - ) - - return prompt_embeds, negative_prompt_embeds, pooled_prompt_embeds, negative_pooled_prompt_embeds - - def _test_save_load_optional_components(self, expected_max_difference=1e-4): - components = self.get_dummy_components() - - pipe = self.pipeline_class(**components) - for optional_component in pipe._optional_components: - setattr(pipe, optional_component, None) - - for component in pipe.components.values(): - if hasattr(component, "set_default_attn_processor"): - component.set_default_attn_processor() - pipe.to(torch_device) - pipe.set_progress_bar_config(disable=None) - - generator_device = "cpu" - inputs = self.get_dummy_inputs(generator_device) - - tokenizer = components.pop("tokenizer") - tokenizer_2 = components.pop("tokenizer_2") - text_encoder = components.pop("text_encoder") - text_encoder_2 = components.pop("text_encoder_2") - - tokenizers = [tokenizer, tokenizer_2] if tokenizer is not None else [tokenizer_2] - text_encoders = [text_encoder, text_encoder_2] if text_encoder is not None else [text_encoder_2] - prompt = inputs.pop("prompt") - ( - prompt_embeds, - negative_prompt_embeds, - pooled_prompt_embeds, - negative_pooled_prompt_embeds, - ) = self.encode_prompt(tokenizers, text_encoders, prompt) - inputs["prompt_embeds"] = prompt_embeds - inputs["negative_prompt_embeds"] = negative_prompt_embeds - inputs["pooled_prompt_embeds"] = pooled_prompt_embeds - inputs["negative_pooled_prompt_embeds"] = negative_pooled_prompt_embeds - - output = pipe(**inputs)[0] - - with tempfile.TemporaryDirectory() as tmpdir: - pipe.save_pretrained(tmpdir) - pipe_loaded = self.pipeline_class.from_pretrained(tmpdir) - for component in pipe_loaded.components.values(): - if hasattr(component, "set_default_attn_processor"): - component.set_default_attn_processor() - pipe_loaded.to(torch_device) - pipe_loaded.set_progress_bar_config(disable=None) - - for optional_component in pipe._optional_components: - self.assertTrue( - getattr(pipe_loaded, optional_component) is None, - f"`{optional_component}` did not stay set to None after loading.", - ) - - inputs = self.get_dummy_inputs(generator_device) - _ = inputs.pop("prompt") - inputs["prompt_embeds"] = prompt_embeds - inputs["negative_prompt_embeds"] = negative_prompt_embeds - inputs["pooled_prompt_embeds"] = pooled_prompt_embeds - inputs["negative_pooled_prompt_embeds"] = negative_pooled_prompt_embeds - - output_loaded = pipe_loaded(**inputs)[0] - - max_diff = np.abs(to_np(output) - to_np(output_loaded)).max() - self.assertLess(max_diff, expected_max_difference) - - class PyramidAttentionBroadcastTesterMixin: pab_config = PyramidAttentionBroadcastConfig( spatial_attention_block_skip_range=2, diff --git a/tests/pipelines/text_to_video_synthesis/test_text_to_video.py b/tests/pipelines/text_to_video_synthesis/test_text_to_video.py index bca4fdbfae64..7813a2c071b3 100644 --- a/tests/pipelines/text_to_video_synthesis/test_text_to_video.py +++ b/tests/pipelines/text_to_video_synthesis/test_text_to_video.py @@ -173,6 +173,14 @@ def test_inference_batch_single_identical(self): def test_num_images_per_prompt(self): pass + def test_encode_prompt_works_in_isolation(self): + extra_required_param_value_dict = { + "device": torch.device(torch_device).type, + "num_images_per_prompt": 1, + "do_classifier_free_guidance": self.get_dummy_inputs(device=torch_device).get("guidance_scale", 1.0) > 1.0, + } + return super().test_encode_prompt_works_in_isolation(extra_required_param_value_dict) + @slow @skip_mps diff --git a/tests/pipelines/text_to_video_synthesis/test_video_to_video.py b/tests/pipelines/text_to_video_synthesis/test_video_to_video.py index 34ccb09e2204..f44a8aa33c5a 100644 --- a/tests/pipelines/text_to_video_synthesis/test_video_to_video.py +++ b/tests/pipelines/text_to_video_synthesis/test_video_to_video.py @@ -197,6 +197,14 @@ def test_inference_batch_single_identical(self): def test_num_images_per_prompt(self): pass + def test_encode_prompt_works_in_isolation(self): + extra_required_param_value_dict = { + "device": torch.device(torch_device).type, + "num_images_per_prompt": 1, + "do_classifier_free_guidance": self.get_dummy_inputs(device=torch_device).get("guidance_scale", 1.0) > 1.0, + } + return super().test_encode_prompt_works_in_isolation(extra_required_param_value_dict) + @nightly @skip_mps diff --git a/tests/pipelines/unidiffuser/test_unidiffuser.py b/tests/pipelines/unidiffuser/test_unidiffuser.py index 310e46a2e8c6..e922ddd8fd6a 100644 --- a/tests/pipelines/unidiffuser/test_unidiffuser.py +++ b/tests/pipelines/unidiffuser/test_unidiffuser.py @@ -578,6 +578,12 @@ def test_unidiffuser_default_img2text_v1_cuda_fp16(self): expected_text_prefix = '" This This' assert text[0][: len(expected_text_prefix)] == expected_text_prefix + @unittest.skip( + "Test not supported becauseit has a bunch of direct configs at init and also, this pipeline isn't used that much now." + ) + def test_encode_prompt_works_in_isolation(): + pass + @nightly @require_torch_gpu diff --git a/tests/pipelines/wuerstchen/test_wuerstchen_decoder.py b/tests/pipelines/wuerstchen/test_wuerstchen_decoder.py index 467550138790..97d1a1cc3830 100644 --- a/tests/pipelines/wuerstchen/test_wuerstchen_decoder.py +++ b/tests/pipelines/wuerstchen/test_wuerstchen_decoder.py @@ -186,3 +186,7 @@ def test_attention_slicing_forward_pass(self): @unittest.skip(reason="bf16 not supported and requires CUDA") def test_float16_inference(self): super().test_float16_inference() + + @unittest.skip("Test not supoorted.") + def test_encode_prompt_works_in_isolation(self): + super().test_encode_prompt_works_in_isolation() diff --git a/tests/pipelines/wuerstchen/test_wuerstchen_prior.py b/tests/pipelines/wuerstchen/test_wuerstchen_prior.py index 460004da6f04..4bc086e7f65b 100644 --- a/tests/pipelines/wuerstchen/test_wuerstchen_prior.py +++ b/tests/pipelines/wuerstchen/test_wuerstchen_prior.py @@ -267,3 +267,7 @@ def test_inference_with_prior_lora(self): lora_image_embed = output_lora.image_embeddings self.assertTrue(image_embed.shape == lora_image_embed.shape) + + @unittest.skip("Test not supported as dtype cannot be inferred without the text encoder otherwise.") + def test_encode_prompt_works_in_isolation(self): + pass