diff --git a/tests/pipelines/stable_diffusion_3/test_pipeline_stable_diffusion_3.py b/tests/pipelines/stable_diffusion_3/test_pipeline_stable_diffusion_3.py index 2179ec8e226b..43d91d55c949 100644 --- a/tests/pipelines/stable_diffusion_3/test_pipeline_stable_diffusion_3.py +++ b/tests/pipelines/stable_diffusion_3/test_pipeline_stable_diffusion_3.py @@ -124,37 +124,22 @@ def get_dummy_inputs(self, device, seed=0): } return inputs - def test_stable_diffusion_3_different_prompts(self): - pipe = self.pipeline_class(**self.get_dummy_components()).to(torch_device) - - inputs = self.get_dummy_inputs(torch_device) - output_same_prompt = pipe(**inputs).images[0] - - inputs = self.get_dummy_inputs(torch_device) - inputs["prompt_2"] = "a different prompt" - inputs["prompt_3"] = "another different prompt" - output_different_prompts = pipe(**inputs).images[0] - - max_diff = np.abs(output_same_prompt - output_different_prompts).max() - - # Outputs should be different here - assert max_diff > 1e-2 - - def test_stable_diffusion_3_different_negative_prompts(self): - pipe = self.pipeline_class(**self.get_dummy_components()).to(torch_device) - - inputs = self.get_dummy_inputs(torch_device) - output_same_prompt = pipe(**inputs).images[0] + def test_inference(self): + components = self.get_dummy_components() + pipe = self.pipeline_class(**components) inputs = self.get_dummy_inputs(torch_device) - inputs["negative_prompt_2"] = "deformed" - inputs["negative_prompt_3"] = "blurry" - output_different_prompts = pipe(**inputs).images[0] + image = pipe(**inputs).images[0] + generated_slice = image.flatten() + generated_slice = np.concatenate([generated_slice[:8], generated_slice[-8:]]) - max_diff = np.abs(output_same_prompt - output_different_prompts).max() + # fmt: off + expected_slice = np.array([0.5112, 0.5228, 0.5235, 0.5524, 0.3188, 0.5017, 0.5574, 0.4899, 0.6812, 0.5991, 0.3908, 0.5213, 0.5582, 0.4457, 0.4204, 0.5616]) + # fmt: on - # Outputs should be different here - assert max_diff > 1e-2 + self.assertTrue( + np.allclose(generated_slice, expected_slice, atol=1e-3), "Output does not match expected slice." + ) def test_fused_qkv_projections(self): device = "cpu" # ensure determinism for the device-dependent torch.Generator @@ -268,40 +253,9 @@ def test_sd3_inference(self): image = pipe(**inputs).images[0] image_slice = image[0, :10, :10] - expected_slice = np.array( - [ - 0.4648, - 0.4404, - 0.4177, - 0.5063, - 0.4800, - 0.4287, - 0.5425, - 0.5190, - 0.4717, - 0.5430, - 0.5195, - 0.4766, - 0.5361, - 0.5122, - 0.4612, - 0.4871, - 0.4749, - 0.4058, - 0.4756, - 0.4678, - 0.3804, - 0.4832, - 0.4822, - 0.3799, - 0.5103, - 0.5034, - 0.3953, - 0.5073, - 0.4839, - 0.3884, - ] - ) + # fmt: off + expected_slice = np.array([0.4648, 0.4404, 0.4177, 0.5063, 0.4800, 0.4287, 0.5425, 0.5190, 0.4717, 0.5430, 0.5195, 0.4766, 0.5361, 0.5122, 0.4612, 0.4871, 0.4749, 0.4058, 0.4756, 0.4678, 0.3804, 0.4832, 0.4822, 0.3799, 0.5103, 0.5034, 0.3953, 0.5073, 0.4839, 0.3884]) + # fmt: on max_diff = numpy_cosine_similarity_distance(expected_slice.flatten(), image_slice.flatten()) diff --git a/tests/pipelines/stable_diffusion_3/test_pipeline_stable_diffusion_3_img2img.py b/tests/pipelines/stable_diffusion_3/test_pipeline_stable_diffusion_3_img2img.py index 7f913cb63ddf..6714fd139695 100644 --- a/tests/pipelines/stable_diffusion_3/test_pipeline_stable_diffusion_3_img2img.py +++ b/tests/pipelines/stable_diffusion_3/test_pipeline_stable_diffusion_3_img2img.py @@ -128,37 +128,22 @@ def get_dummy_inputs(self, device, seed=0): } return inputs - def test_stable_diffusion_3_img2img_different_prompts(self): - pipe = self.pipeline_class(**self.get_dummy_components()).to(torch_device) + def test_inference(self): + components = self.get_dummy_components() + pipe = self.pipeline_class(**components) inputs = self.get_dummy_inputs(torch_device) - output_same_prompt = pipe(**inputs).images[0] - - inputs = self.get_dummy_inputs(torch_device) - inputs["prompt_2"] = "a different prompt" - inputs["prompt_3"] = "another different prompt" - output_different_prompts = pipe(**inputs).images[0] - - max_diff = np.abs(output_same_prompt - output_different_prompts).max() - - # Outputs should be different here - assert max_diff > 1e-2 - - def test_stable_diffusion_3_img2img_different_negative_prompts(self): - pipe = self.pipeline_class(**self.get_dummy_components()).to(torch_device) - - inputs = self.get_dummy_inputs(torch_device) - output_same_prompt = pipe(**inputs).images[0] - - inputs = self.get_dummy_inputs(torch_device) - inputs["negative_prompt_2"] = "deformed" - inputs["negative_prompt_3"] = "blurry" - output_different_prompts = pipe(**inputs).images[0] + image = pipe(**inputs).images[0] + generated_slice = image.flatten() + generated_slice = np.concatenate([generated_slice[:8], generated_slice[-8:]]) - max_diff = np.abs(output_same_prompt - output_different_prompts).max() + # fmt: off + expected_slice = np.array([0.4564, 0.5486, 0.4868, 0.5923, 0.3775, 0.5543, 0.4807, 0.4177, 0.3778, 0.5957, 0.5726, 0.4333, 0.6312, 0.5062, 0.4838, 0.5984]) + # fmt: on - # Outputs should be different here - assert max_diff > 1e-2 + self.assertTrue( + np.allclose(generated_slice, expected_slice, atol=1e-3), "Output does not match expected slice." + ) @unittest.skip("Skip for now.") def test_multi_vae(self): @@ -207,112 +192,16 @@ def test_sd3_img2img_inference(self): inputs = self.get_inputs(torch_device) image = pipe(**inputs).images[0] image_slice = image[0, :10, :10] + + # fmt: off expected_slices = Expectations( { - ("xpu", 3): np.array( - [ - 0.5117, - 0.4421, - 0.3852, - 0.5044, - 0.4219, - 0.3262, - 0.5024, - 0.4329, - 0.3276, - 0.4978, - 0.4412, - 0.3355, - 0.4983, - 0.4338, - 0.3279, - 0.4893, - 0.4241, - 0.3129, - 0.4875, - 0.4253, - 0.3030, - 0.4961, - 0.4267, - 0.2988, - 0.5029, - 0.4255, - 0.3054, - 0.5132, - 0.4248, - 0.3222, - ] - ), - ("cuda", 7): np.array( - [ - 0.5435, - 0.4673, - 0.5732, - 0.4438, - 0.3557, - 0.4912, - 0.4331, - 0.3491, - 0.4915, - 0.4287, - 0.347, - 0.4849, - 0.4355, - 0.3469, - 0.4871, - 0.4431, - 0.3538, - 0.4912, - 0.4521, - 0.3643, - 0.5059, - 0.4587, - 0.373, - 0.5166, - 0.4685, - 0.3845, - 0.5264, - 0.4746, - 0.3914, - 0.5342, - ] - ), - ("cuda", 8): np.array( - [ - 0.5146, - 0.4385, - 0.3826, - 0.5098, - 0.4150, - 0.3218, - 0.5142, - 0.4312, - 0.3298, - 0.5127, - 0.4431, - 0.3411, - 0.5171, - 0.4424, - 0.3374, - 0.5088, - 0.4348, - 0.3242, - 0.5073, - 0.4380, - 0.3174, - 0.5132, - 0.4397, - 0.3115, - 0.5132, - 0.4343, - 0.3118, - 0.5219, - 0.4328, - 0.3256, - ] - ), + ("xpu", 3): np.array([0.5117, 0.4421, 0.3852, 0.5044, 0.4219, 0.3262, 0.5024, 0.4329, 0.3276, 0.4978, 0.4412, 0.3355, 0.4983, 0.4338, 0.3279, 0.4893, 0.4241, 0.3129, 0.4875, 0.4253, 0.3030, 0.4961, 0.4267, 0.2988, 0.5029, 0.4255, 0.3054, 0.5132, 0.4248, 0.3222]), + ("cuda", 7): np.array([0.5435, 0.4673, 0.5732, 0.4438, 0.3557, 0.4912, 0.4331, 0.3491, 0.4915, 0.4287, 0.347, 0.4849, 0.4355, 0.3469, 0.4871, 0.4431, 0.3538, 0.4912, 0.4521, 0.3643, 0.5059, 0.4587, 0.373, 0.5166, 0.4685, 0.3845, 0.5264, 0.4746, 0.3914, 0.5342]), + ("cuda", 8): np.array([0.5146, 0.4385, 0.3826, 0.5098, 0.4150, 0.3218, 0.5142, 0.4312, 0.3298, 0.5127, 0.4431, 0.3411, 0.5171, 0.4424, 0.3374, 0.5088, 0.4348, 0.3242, 0.5073, 0.4380, 0.3174, 0.5132, 0.4397, 0.3115, 0.5132, 0.4343, 0.3118, 0.5219, 0.4328, 0.3256]), } ) + # fmt: on expected_slice = expected_slices.get_expectation() diff --git a/tests/pipelines/stable_diffusion_3/test_pipeline_stable_diffusion_3_inpaint.py b/tests/pipelines/stable_diffusion_3/test_pipeline_stable_diffusion_3_inpaint.py index 4090306dec72..b537d6a0b638 100644 --- a/tests/pipelines/stable_diffusion_3/test_pipeline_stable_diffusion_3_inpaint.py +++ b/tests/pipelines/stable_diffusion_3/test_pipeline_stable_diffusion_3_inpaint.py @@ -132,37 +132,23 @@ def get_dummy_inputs(self, device, seed=0): } return inputs - def test_stable_diffusion_3_inpaint_different_prompts(self): - pipe = self.pipeline_class(**self.get_dummy_components()).to(torch_device) + def test_inference(self): + components = self.get_dummy_components() + pipe = self.pipeline_class(**components) inputs = self.get_dummy_inputs(torch_device) - output_same_prompt = pipe(**inputs).images[0] + image = pipe(**inputs).images[0] + generated_slice = image.flatten() + generated_slice = np.concatenate([generated_slice[:8], generated_slice[-8:]]) - inputs = self.get_dummy_inputs(torch_device) - inputs["prompt_2"] = "a different prompt" - inputs["prompt_3"] = "another different prompt" - output_different_prompts = pipe(**inputs).images[0] - - max_diff = np.abs(output_same_prompt - output_different_prompts).max() - - # Outputs should be different here - assert max_diff > 1e-2 - - def test_stable_diffusion_3_inpaint_different_negative_prompts(self): - pipe = self.pipeline_class(**self.get_dummy_components()).to(torch_device) - - inputs = self.get_dummy_inputs(torch_device) - output_same_prompt = pipe(**inputs).images[0] - - inputs = self.get_dummy_inputs(torch_device) - inputs["negative_prompt_2"] = "deformed" - inputs["negative_prompt_3"] = "blurry" - output_different_prompts = pipe(**inputs).images[0] + # fmt: off + expected_slice = np.array([0.5035, 0.6661, 0.5859, 0.413, 0.4224, 0.4234, 0.7181, 0.5062, 0.5183, 0.6877, 0.5074, 0.585, 0.6111, 0.5422, 0.5306, 0.5891]) + # fmt: on - max_diff = np.abs(output_same_prompt - output_different_prompts).max() - - # Outputs should be different here - assert max_diff > 1e-2 + self.assertTrue( + np.allclose(generated_slice, expected_slice, atol=1e-3), "Output does not match expected slice." + ) + @unittest.skip("Skip for now.") def test_multi_vae(self): pass