|
54 | 54 | get_autoencoder_tiny_config, |
55 | 55 | get_consistency_vae_config, |
56 | 56 | ) |
| 57 | +from ..models.transformers.test_models_transformer_flux import create_flux_ip_adapter_state_dict |
57 | 58 | from ..models.unets.test_models_unet_2d_condition import ( |
58 | 59 | create_ip_adapter_faceid_state_dict, |
59 | 60 | create_ip_adapter_state_dict, |
@@ -483,6 +484,94 @@ def test_ip_adapter_faceid(self, expected_max_diff: float = 1e-4): |
483 | 484 | ) |
484 | 485 |
|
485 | 486 |
|
| 487 | +class FluxIPAdapterTesterMixin: |
| 488 | + """ |
| 489 | + This mixin is designed to be used with PipelineTesterMixin and unittest.TestCase classes. |
| 490 | + It provides a set of common tests for pipelines that support IP Adapters. |
| 491 | + """ |
| 492 | + |
| 493 | + def test_pipeline_signature(self): |
| 494 | + parameters = inspect.signature(self.pipeline_class.__call__).parameters |
| 495 | + |
| 496 | + assert issubclass(self.pipeline_class, FluxIPAdapterTesterMixin) |
| 497 | + self.assertIn( |
| 498 | + "ip_adapter_image", |
| 499 | + parameters, |
| 500 | + "`ip_adapter_image` argument must be supported by the `__call__` method", |
| 501 | + ) |
| 502 | + self.assertIn( |
| 503 | + "ip_adapter_image_embeds", |
| 504 | + parameters, |
| 505 | + "`ip_adapter_image_embeds` argument must be supported by the `__call__` method", |
| 506 | + ) |
| 507 | + |
| 508 | + def _get_dummy_image_embeds(self, image_embed_dim: int = 768): |
| 509 | + return torch.randn((1, 1, image_embed_dim), device=torch_device) |
| 510 | + |
| 511 | + def _modify_inputs_for_ip_adapter_test(self, inputs: Dict[str, Any]): |
| 512 | + inputs["negative_prompt"] = "" |
| 513 | + inputs["true_cfg_scale"] = 4.0 |
| 514 | + inputs["output_type"] = "np" |
| 515 | + inputs["return_dict"] = False |
| 516 | + return inputs |
| 517 | + |
| 518 | + def test_ip_adapter(self, expected_max_diff: float = 1e-4, expected_pipe_slice=None): |
| 519 | + r"""Tests for IP-Adapter. |
| 520 | +
|
| 521 | + The following scenarios are tested: |
| 522 | + - Single IP-Adapter with scale=0 should produce same output as no IP-Adapter. |
| 523 | + - Single IP-Adapter with scale!=0 should produce different output compared to no IP-Adapter. |
| 524 | + """ |
| 525 | + # Raising the tolerance for this test when it's run on a CPU because we |
| 526 | + # compare against static slices and that can be shaky (with a VVVV low probability). |
| 527 | + expected_max_diff = 9e-4 if torch_device == "cpu" else expected_max_diff |
| 528 | + |
| 529 | + components = self.get_dummy_components() |
| 530 | + pipe = self.pipeline_class(**components).to(torch_device) |
| 531 | + pipe.set_progress_bar_config(disable=None) |
| 532 | + image_embed_dim = pipe.transformer.config.pooled_projection_dim |
| 533 | + |
| 534 | + # forward pass without ip adapter |
| 535 | + inputs = self._modify_inputs_for_ip_adapter_test(self.get_dummy_inputs(torch_device)) |
| 536 | + if expected_pipe_slice is None: |
| 537 | + output_without_adapter = pipe(**inputs)[0] |
| 538 | + else: |
| 539 | + output_without_adapter = expected_pipe_slice |
| 540 | + |
| 541 | + adapter_state_dict = create_flux_ip_adapter_state_dict(pipe.transformer) |
| 542 | + pipe.transformer._load_ip_adapter_weights(adapter_state_dict) |
| 543 | + |
| 544 | + # forward pass with single ip adapter, but scale=0 which should have no effect |
| 545 | + inputs = self._modify_inputs_for_ip_adapter_test(self.get_dummy_inputs(torch_device)) |
| 546 | + inputs["ip_adapter_image_embeds"] = [self._get_dummy_image_embeds(image_embed_dim)] |
| 547 | + inputs["negative_ip_adapter_image_embeds"] = [self._get_dummy_image_embeds(image_embed_dim)] |
| 548 | + pipe.set_ip_adapter_scale(0.0) |
| 549 | + output_without_adapter_scale = pipe(**inputs)[0] |
| 550 | + if expected_pipe_slice is not None: |
| 551 | + output_without_adapter_scale = output_without_adapter_scale[0, -3:, -3:, -1].flatten() |
| 552 | + |
| 553 | + # forward pass with single ip adapter, but with scale of adapter weights |
| 554 | + inputs = self._modify_inputs_for_ip_adapter_test(self.get_dummy_inputs(torch_device)) |
| 555 | + inputs["ip_adapter_image_embeds"] = [self._get_dummy_image_embeds(image_embed_dim)] |
| 556 | + inputs["negative_ip_adapter_image_embeds"] = [self._get_dummy_image_embeds(image_embed_dim)] |
| 557 | + pipe.set_ip_adapter_scale(42.0) |
| 558 | + output_with_adapter_scale = pipe(**inputs)[0] |
| 559 | + if expected_pipe_slice is not None: |
| 560 | + output_with_adapter_scale = output_with_adapter_scale[0, -3:, -3:, -1].flatten() |
| 561 | + |
| 562 | + max_diff_without_adapter_scale = np.abs(output_without_adapter_scale - output_without_adapter).max() |
| 563 | + max_diff_with_adapter_scale = np.abs(output_with_adapter_scale - output_without_adapter).max() |
| 564 | + |
| 565 | + self.assertLess( |
| 566 | + max_diff_without_adapter_scale, |
| 567 | + expected_max_diff, |
| 568 | + "Output without ip-adapter must be same as normal inference", |
| 569 | + ) |
| 570 | + self.assertGreater( |
| 571 | + max_diff_with_adapter_scale, 1e-2, "Output with ip-adapter must be different from normal inference" |
| 572 | + ) |
| 573 | + |
| 574 | + |
486 | 575 | class PipelineLatentTesterMixin: |
487 | 576 | """ |
488 | 577 | This mixin is designed to be used with PipelineTesterMixin and unittest.TestCase classes. |
|
0 commit comments