mindspore-lab
diff --git a/‎mindone/diffusers/image_processor.py‎
Lines changed: 22 additions & 2 deletions b/‎mindone/diffusers/image_processor.py‎
Lines changed: 22 additions & 2 deletions
diff --git a/‎mindone/diffusers/loaders/__init__.py‎
Lines changed: 4 additions & 0 deletions b/‎mindone/diffusers/loaders/__init__.py‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎mindone/diffusers/loaders/ip_adapter.py‎
Lines changed: 8 additions & 11 deletions b/‎mindone/diffusers/loaders/ip_adapter.py‎
Lines changed: 8 additions & 11 deletions
@@ -1,4 +1,4 @@
-# Copyright 2024 The HuggingFace Team. All rights reserved.
+# Copyright 2025 The HuggingFace Team. All rights reserved.
 #
 # This code is adapted from https://github.com/huggingface/diffusers
 # with modifications to run diffusers on mindspore.
@@ -43,12 +43,15 @@
 def is_valid_image(image) -> bool:
     r"""
     Checks if the input is a valid image.
+
     A valid image can be:
     - A `PIL.Image.Image`.
     - A 2D or 3D `np.ndarray` or `ms.Tensor` (grayscale or color image).
+
     Args:
         image (`Union[PIL.Image.Image, np.ndarray, ms.Tensor]`):
             The image to validate. It can be a PIL image, a NumPy array, or a MindSpore tensor.
+
     Returns:
         `bool`:
             `True` if the input is a valid image, `False` otherwise.
@@ -59,15 +62,18 @@ def is_valid_image(image) -> bool:
 def is_valid_image_imagelist(images):
     r"""
     Checks if the input is a valid image or list of images.
+
     The input can be one of the following formats:
     - A 4D tensor or numpy array (batch of images).
     - A valid single image: `PIL.Image.Image`, 2D `np.ndarray` or `ms.Tensor` (grayscale image), 3D `np.ndarray` or
       `ms.Tensor`.
     - A list of valid images.
+
     Args:
         images (`Union[np.ndarray, ms.Tensor, PIL.Image.Image, List]`):
             The image(s) to check. Can be a batch of images (4D tensor/array), a single image, or a list of valid
             images.
+
     Returns:
         `bool`:
             `True` if the input is valid, `False` otherwise.
@@ -112,6 +118,7 @@ def __init__(
         vae_scale_factor: int = 8,
         vae_latent_channels: int = 4,
         resample: str = "lanczos",
+        reducing_gap: int = None,
         do_normalize: bool = True,
         do_binarize: bool = False,
         do_convert_rgb: bool = False,
@@ -133,6 +140,7 @@ def numpy_to_pil(images: np.ndarray) -> List[PIL.Image.Image]:
         Args:
             images (`np.ndarray`):
                 The image array to convert to PIL format.
+
         Returns:
             `List[PIL.Image.Image]`:
                 A list of PIL images.
@@ -152,9 +160,11 @@ def numpy_to_pil(images: np.ndarray) -> List[PIL.Image.Image]:
     def pil_to_numpy(images: Union[List[PIL.Image.Image], PIL.Image.Image]) -> np.ndarray:
         r"""
         Convert a PIL image or a list of PIL images to NumPy arrays.
+
         Args:
             images (`PIL.Image.Image` or `List[PIL.Image.Image]`):
                 The PIL image or list of images to convert to NumPy format.
+
         Returns:
             `np.ndarray`:
                 A NumPy array representation of the images.
@@ -192,6 +202,7 @@ def ms_to_numpy(images: ms.Tensor) -> np.ndarray:
         Args:
             images (`ms.Tensor`):
                 The MindSpore tensor to convert to NumPy format.
+
         Returns:
             `np.ndarray`:
                 A NumPy array representation of the images.
@@ -207,6 +218,7 @@ def normalize(images: Union[np.ndarray, ms.Tensor]) -> Union[np.ndarray, ms.Tens
         Args:
             images (`np.ndarray` or `ms.Tensor`):
                 The image array to normalize.
+
         Returns:
             `np.ndarray` or `ms.Tensor`:
                 The normalized image array.
@@ -221,6 +233,7 @@ def denormalize(images: Union[np.ndarray, ms.Tensor]) -> Union[np.ndarray, ms.Te
         Args:
             images (`np.ndarray` or `ms.Tensor`):
                 The image array to denormalize.
+
         Returns:
             `np.ndarray` or `ms.Tensor`:
                 The denormalized image array.
@@ -235,6 +248,7 @@ def convert_to_rgb(image: PIL.Image.Image) -> PIL.Image.Image:
         Args:
             image (`PIL.Image.Image`):
                 The PIL image to convert to RGB.
+
         Returns:
             `PIL.Image.Image`:
                 The RGB-converted PIL image.
@@ -251,6 +265,7 @@ def convert_to_grayscale(image: PIL.Image.Image) -> PIL.Image.Image:
         Args:
             image (`PIL.Image.Image`):
                 The input image to convert.
+
         Returns:
             `PIL.Image.Image`:
                 The image converted to grayscale.
@@ -267,6 +282,7 @@ def blur(image: PIL.Image.Image, blur_factor: int = 4) -> PIL.Image.Image:
         Args:
             image (`PIL.Image.Image`):
                 The PIL image to convert to grayscale.
+
         Returns:
             `PIL.Image.Image`:
                 The grayscale-converted PIL image.
@@ -484,7 +500,11 @@ def resize(
             raise ValueError(f"Only PIL image input is supported for resize_mode {resize_mode}")
         if isinstance(image, PIL.Image.Image):
             if resize_mode == "default":
-                image = image.resize((width, height), resample=PIL_INTERPOLATION[self.config.resample])
+                image = image.resize(
+                    (width, height),
+                    resample=PIL_INTERPOLATION[self.config.resample],
+                    reducing_gap=self.config.reducing_gap,
+                )
             elif resize_mode == "fill":
                 image = self._resize_and_fill(image, width, height)
             elif resize_mode == "crop":
 
@@ -63,6 +63,7 @@ def text_encoder_attn_modules(text_encoder):
         "AmusedLoraLoaderMixin",
         "StableDiffusionLoraLoaderMixin",
         "SD3LoraLoaderMixin",
+        "AuraFlowLoraLoaderMixin",
         "StableDiffusionXLLoraLoaderMixin",
         "LTXVideoLoraLoaderMixin",
         "LoraLoaderMixin",
@@ -74,6 +75,7 @@ def text_encoder_attn_modules(text_encoder):
         "SanaLoraLoaderMixin",
         "Lumina2LoraLoaderMixin",
         "WanLoraLoaderMixin",
+        "HiDreamImageLoraLoaderMixin",
     ],
     "peft": ["PeftAdapterMixin"],
     "single_file": ["FromSingleFileMixin"],
@@ -86,9 +88,11 @@ def text_encoder_attn_modules(text_encoder):
     from .ip_adapter import FluxIPAdapterMixin, IPAdapterMixin, SD3IPAdapterMixin
     from .lora_pipeline import (
         AmusedLoraLoaderMixin,
+        AuraFlowLoraLoaderMixin,
         CogVideoXLoraLoaderMixin,
         CogView4LoraLoaderMixin,
         FluxLoraLoaderMixin,
+        HiDreamImageLoraLoaderMixin,
         HunyuanVideoLoraLoaderMixin,
         LoraLoaderMixin,
         LTXVideoLoraLoaderMixin,
 
@@ -1,4 +1,4 @@
-# Copyright 2024 The HuggingFace Team. All rights reserved.
+# Copyright 2025 The HuggingFace Team. All rights reserved.
 #
 # This code is adapted from https://github.com/huggingface/diffusers
 # with modifications to run diffusers on mindspore.
@@ -452,16 +452,13 @@ def load_ip_adapter(
                 if image_encoder_pretrained_model_name_or_path is not None:
                     if not isinstance(pretrained_model_name_or_path_or_dict, dict):
                         logger.info(f"loading image_encoder from {image_encoder_pretrained_model_name_or_path}")
-                        image_encoder = (
-                            CLIPVisionModelWithProjection.from_pretrained(
-                                image_encoder_pretrained_model_name_or_path,
-                                subfolder=image_encoder_subfolder,
-                                cache_dir=cache_dir,
-                                local_files_only=local_files_only,
-                            )
-                            .to(dtype=image_encoder_dtype)
-                            .set_train(False)
-                        )
+                        image_encoder = CLIPVisionModelWithProjection.from_pretrained(
+                            image_encoder_pretrained_model_name_or_path,
+                            subfolder=image_encoder_subfolder,
+                            cache_dir=cache_dir,
+                            local_files_only=local_files_only,
+                            mindspore_dtype=image_encoder_dtype,
+                        ).set_train(False)
                         self.register_modules(image_encoder=image_encoder)
                     else:
                         raise ValueError(