update testpipeline

staoxiao · staoxiao · commit 565e51c1b777 · 2025-02-11T20:54:04.000+08:00
diff --git a/src/diffusers/pipelines/omnigen/pipeline_omnigen.py b/src/diffusers/pipelines/omnigen/pipeline_omnigen.py
@@ -139,7 +139,7 @@ class OmniGenPipeline(
 
     model_cpu_offload_seq = "transformer->vae"
     _optional_components = []
-    _callback_tensor_inputs = ["latents", "input_images_latents"]
+    _callback_tensor_inputs = ["latents"]
 
     def __init__(
         self,
@@ -435,6 +435,7 @@ def __call__(
             width=width,
             use_img_cfg=use_img_cfg,
             use_input_image_size_as_output=use_input_image_size_as_output,
+            num_images_per_prompt=num_images_per_prompt,
         )
         processed_data["input_ids"] = processed_data["input_ids"].to(device)
         processed_data["attention_mask"] = processed_data["attention_mask"].to(device)
@@ -448,6 +449,7 @@ def __call__(
         timesteps, num_inference_steps = retrieve_timesteps(
             self.scheduler, num_inference_steps, device, timesteps, sigmas=sigmas
         )
+        self._num_timesteps = len(timesteps)
 
         # 6. Prepare latents.
         if use_input_image_size_as_output:
@@ -496,6 +498,14 @@ def __call__(
                 latents_dtype = latents.dtype
                 latents = self.scheduler.step(noise_pred, t, latents, return_dict=False)[0]
 
+                if callback_on_step_end is not None:
+                    callback_kwargs = {}
+                    for k in callback_on_step_end_tensor_inputs:
+                        callback_kwargs[k] = locals()[k]
+                    callback_outputs = callback_on_step_end(self, i, t, callback_kwargs)
+
+                    latents = callback_outputs.pop("latents", latents)
+
                 if latents.dtype != latents_dtype:
                     if torch.backends.mps.is_available():
                         # some platforms (eg. apple mps) misbehave due to a pytorch bug: https://github.com/pytorch/pytorch/pull/99272
diff --git a/src/diffusers/pipelines/omnigen/processor_omnigen.py b/src/diffusers/pipelines/omnigen/processor_omnigen.py
@@ -135,6 +135,7 @@ def __call__(
         use_img_cfg: bool = True,
         separate_cfg_input: bool = False,
         use_input_image_size_as_output: bool = False,
+        num_images_per_prompt: int = 1,
     ) -> Dict:
         if isinstance(instructions, str):
             instructions = [instructions]
@@ -161,17 +162,18 @@ def __call__(
                 else:
                     img_cfg_mllm_input = neg_mllm_input
 
-            if use_input_image_size_as_output:
-                input_data.append(
-                    (
-                        mllm_input,
-                        neg_mllm_input,
-                        img_cfg_mllm_input,
-                        [mllm_input["pixel_values"][0].size(-2), mllm_input["pixel_values"][0].size(-1)],
+            for _ in range(num_images_per_prompt):
+                if use_input_image_size_as_output:
+                    input_data.append(
+                        (
+                            mllm_input,
+                            neg_mllm_input,
+                            img_cfg_mllm_input,
+                            [mllm_input["pixel_values"][0].size(-2), mllm_input["pixel_values"][0].size(-1)],
+                        )
                     )
-                )
-            else:
-                input_data.append((mllm_input, neg_mllm_input, img_cfg_mllm_input, [height, width]))
+                else:
+                    input_data.append((mllm_input, neg_mllm_input, img_cfg_mllm_input, [height, width]))
 
         return self.collator(input_data)
 
diff --git a/src/diffusers/utils/testing_utils.py b/src/diffusers/utils/testing_utils.py
@@ -1077,28 +1077,28 @@ def _is_torch_fp64_available(device):
     # Function definitions
     BACKEND_EMPTY_CACHE = {
         "cuda": torch.cuda.empty_cache,
-        "xpu": torch.xpu.empty_cache,
+        # "xpu": torch.xpu.empty_cache,
         "cpu": None,
         "mps": torch.mps.empty_cache,
         "default": None,
     }
     BACKEND_DEVICE_COUNT = {
         "cuda": torch.cuda.device_count,
-        "xpu": torch.xpu.device_count,
+        # "xpu": torch.xpu.device_count,
         "cpu": lambda: 0,
         "mps": lambda: 0,
         "default": 0,
     }
     BACKEND_MANUAL_SEED = {
         "cuda": torch.cuda.manual_seed,
-        "xpu": torch.xpu.manual_seed,
+        # "xpu": torch.xpu.manual_seed,
         "cpu": torch.manual_seed,
         "mps": torch.mps.manual_seed,
         "default": torch.manual_seed,
     }
     BACKEND_RESET_PEAK_MEMORY_STATS = {
         "cuda": torch.cuda.reset_peak_memory_stats,
-        "xpu": getattr(torch.xpu, "reset_peak_memory_stats", None),
+        # "xpu": getattr(torch.xpu, "reset_peak_memory_stats", None),
         "cpu": None,
         "mps": None,
         "default": None,
@@ -1112,7 +1112,7 @@ def _is_torch_fp64_available(device):
     }
     BACKEND_MAX_MEMORY_ALLOCATED = {
         "cuda": torch.cuda.max_memory_allocated,
-        "xpu": getattr(torch.xpu, "max_memory_allocated", None),
+        # "xpu": getattr(torch.xpu, "max_memory_allocated", None),
         "cpu": 0,
         "mps": 0,
         "default": 0,
diff --git a/tests/pipelines/omnigen/test_pipeline_omnigen.py b/tests/pipelines/omnigen/test_pipeline_omnigen.py
@@ -34,112 +34,14 @@ def get_dummy_components(self):
         torch.manual_seed(0)
 
         transformer = OmniGenTransformer2DModel(
-            rope_scaling={
-                "long_factor": [
-                    1.0299999713897705,
-                    1.0499999523162842,
-                    1.0499999523162842,
-                    1.0799999237060547,
-                    1.2299998998641968,
-                    1.2299998998641968,
-                    1.2999999523162842,
-                    1.4499999284744263,
-                    1.5999999046325684,
-                    1.6499998569488525,
-                    1.8999998569488525,
-                    2.859999895095825,
-                    3.68999981880188,
-                    5.419999599456787,
-                    5.489999771118164,
-                    5.489999771118164,
-                    9.09000015258789,
-                    11.579999923706055,
-                    15.65999984741211,
-                    15.769999504089355,
-                    15.789999961853027,
-                    18.360000610351562,
-                    21.989999771118164,
-                    23.079999923706055,
-                    30.009998321533203,
-                    32.35000228881836,
-                    32.590003967285156,
-                    35.56000518798828,
-                    39.95000457763672,
-                    53.840003967285156,
-                    56.20000457763672,
-                    57.95000457763672,
-                    59.29000473022461,
-                    59.77000427246094,
-                    59.920005798339844,
-                    61.190006256103516,
-                    61.96000671386719,
-                    62.50000762939453,
-                    63.3700065612793,
-                    63.48000717163086,
-                    63.48000717163086,
-                    63.66000747680664,
-                    63.850006103515625,
-                    64.08000946044922,
-                    64.760009765625,
-                    64.80001068115234,
-                    64.81001281738281,
-                    64.81001281738281,
-                ],
-                "short_factor": [
-                    1.05,
-                    1.05,
-                    1.05,
-                    1.1,
-                    1.1,
-                    1.1,
-                    1.2500000000000002,
-                    1.2500000000000002,
-                    1.4000000000000004,
-                    1.4500000000000004,
-                    1.5500000000000005,
-                    1.8500000000000008,
-                    1.9000000000000008,
-                    2.000000000000001,
-                    2.000000000000001,
-                    2.000000000000001,
-                    2.000000000000001,
-                    2.000000000000001,
-                    2.000000000000001,
-                    2.000000000000001,
-                    2.000000000000001,
-                    2.000000000000001,
-                    2.000000000000001,
-                    2.000000000000001,
-                    2.000000000000001,
-                    2.000000000000001,
-                    2.000000000000001,
-                    2.000000000000001,
-                    2.000000000000001,
-                    2.000000000000001,
-                    2.000000000000001,
-                    2.000000000000001,
-                    2.1000000000000005,
-                    2.1000000000000005,
-                    2.2,
-                    2.3499999999999996,
-                    2.3499999999999996,
-                    2.3499999999999996,
-                    2.3499999999999996,
-                    2.3999999999999995,
-                    2.3999999999999995,
-                    2.6499999999999986,
-                    2.6999999999999984,
-                    2.8999999999999977,
-                    2.9499999999999975,
-                    3.049999999999997,
-                    3.049999999999997,
-                    3.049999999999997,
-                ],
-                "type": "su",
-            },
-            patch_size=2,
+            hidden_size=16,
+            num_attention_heads=4,
+            num_key_value_heads=4,
+            intermediate_size=32,
+            num_layers=1,
             in_channels=4,
-            pos_embed_max_size=192,
+            time_step_dim=4,
+            rope_scaling={"long_factor": list(range(1, 3)), "short_factor": list(range(1, 3))},
         )
 
         torch.manual_seed(0)
@@ -174,7 +76,7 @@ def get_dummy_inputs(self, device, seed=0):
         inputs = {
             "prompt": "A painting of a squirrel eating a burger",
             "generator": generator,
-            "num_inference_steps": 2,
+            "num_inference_steps": 1,
             "guidance_scale": 3.0,
             "output_type": "np",
             "height": 16,