Skip to content

Commit edede6c

Browse files
authored
feat(diffusers): upgrade mindone.diffusers from v0.33.1 to v0.34.0 (#1158)
* feat(diffusers): upgrade mindone.diffusers from v0.33.1 to v0.34.0 * fix bugs
1 parent fc997b6 commit edede6c

File tree

348 files changed

+6045
-3420
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

348 files changed

+6045
-3420
lines changed

mindone/diffusers/image_processor.py

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright 2024 The HuggingFace Team. All rights reserved.
1+
# Copyright 2025 The HuggingFace Team. All rights reserved.
22
#
33
# This code is adapted from https://github.com/huggingface/diffusers
44
# with modifications to run diffusers on mindspore.
@@ -43,12 +43,15 @@
4343
def is_valid_image(image) -> bool:
4444
r"""
4545
Checks if the input is a valid image.
46+
4647
A valid image can be:
4748
- A `PIL.Image.Image`.
4849
- A 2D or 3D `np.ndarray` or `ms.Tensor` (grayscale or color image).
50+
4951
Args:
5052
image (`Union[PIL.Image.Image, np.ndarray, ms.Tensor]`):
5153
The image to validate. It can be a PIL image, a NumPy array, or a MindSpore tensor.
54+
5255
Returns:
5356
`bool`:
5457
`True` if the input is a valid image, `False` otherwise.
@@ -59,15 +62,18 @@ def is_valid_image(image) -> bool:
5962
def is_valid_image_imagelist(images):
6063
r"""
6164
Checks if the input is a valid image or list of images.
65+
6266
The input can be one of the following formats:
6367
- A 4D tensor or numpy array (batch of images).
6468
- A valid single image: `PIL.Image.Image`, 2D `np.ndarray` or `ms.Tensor` (grayscale image), 3D `np.ndarray` or
6569
`ms.Tensor`.
6670
- A list of valid images.
71+
6772
Args:
6873
images (`Union[np.ndarray, ms.Tensor, PIL.Image.Image, List]`):
6974
The image(s) to check. Can be a batch of images (4D tensor/array), a single image, or a list of valid
7075
images.
76+
7177
Returns:
7278
`bool`:
7379
`True` if the input is valid, `False` otherwise.
@@ -112,6 +118,7 @@ def __init__(
112118
vae_scale_factor: int = 8,
113119
vae_latent_channels: int = 4,
114120
resample: str = "lanczos",
121+
reducing_gap: int = None,
115122
do_normalize: bool = True,
116123
do_binarize: bool = False,
117124
do_convert_rgb: bool = False,
@@ -133,6 +140,7 @@ def numpy_to_pil(images: np.ndarray) -> List[PIL.Image.Image]:
133140
Args:
134141
images (`np.ndarray`):
135142
The image array to convert to PIL format.
143+
136144
Returns:
137145
`List[PIL.Image.Image]`:
138146
A list of PIL images.
@@ -152,9 +160,11 @@ def numpy_to_pil(images: np.ndarray) -> List[PIL.Image.Image]:
152160
def pil_to_numpy(images: Union[List[PIL.Image.Image], PIL.Image.Image]) -> np.ndarray:
153161
r"""
154162
Convert a PIL image or a list of PIL images to NumPy arrays.
163+
155164
Args:
156165
images (`PIL.Image.Image` or `List[PIL.Image.Image]`):
157166
The PIL image or list of images to convert to NumPy format.
167+
158168
Returns:
159169
`np.ndarray`:
160170
A NumPy array representation of the images.
@@ -192,6 +202,7 @@ def ms_to_numpy(images: ms.Tensor) -> np.ndarray:
192202
Args:
193203
images (`ms.Tensor`):
194204
The MindSpore tensor to convert to NumPy format.
205+
195206
Returns:
196207
`np.ndarray`:
197208
A NumPy array representation of the images.
@@ -207,6 +218,7 @@ def normalize(images: Union[np.ndarray, ms.Tensor]) -> Union[np.ndarray, ms.Tens
207218
Args:
208219
images (`np.ndarray` or `ms.Tensor`):
209220
The image array to normalize.
221+
210222
Returns:
211223
`np.ndarray` or `ms.Tensor`:
212224
The normalized image array.
@@ -221,6 +233,7 @@ def denormalize(images: Union[np.ndarray, ms.Tensor]) -> Union[np.ndarray, ms.Te
221233
Args:
222234
images (`np.ndarray` or `ms.Tensor`):
223235
The image array to denormalize.
236+
224237
Returns:
225238
`np.ndarray` or `ms.Tensor`:
226239
The denormalized image array.
@@ -235,6 +248,7 @@ def convert_to_rgb(image: PIL.Image.Image) -> PIL.Image.Image:
235248
Args:
236249
image (`PIL.Image.Image`):
237250
The PIL image to convert to RGB.
251+
238252
Returns:
239253
`PIL.Image.Image`:
240254
The RGB-converted PIL image.
@@ -251,6 +265,7 @@ def convert_to_grayscale(image: PIL.Image.Image) -> PIL.Image.Image:
251265
Args:
252266
image (`PIL.Image.Image`):
253267
The input image to convert.
268+
254269
Returns:
255270
`PIL.Image.Image`:
256271
The image converted to grayscale.
@@ -267,6 +282,7 @@ def blur(image: PIL.Image.Image, blur_factor: int = 4) -> PIL.Image.Image:
267282
Args:
268283
image (`PIL.Image.Image`):
269284
The PIL image to convert to grayscale.
285+
270286
Returns:
271287
`PIL.Image.Image`:
272288
The grayscale-converted PIL image.
@@ -484,7 +500,11 @@ def resize(
484500
raise ValueError(f"Only PIL image input is supported for resize_mode {resize_mode}")
485501
if isinstance(image, PIL.Image.Image):
486502
if resize_mode == "default":
487-
image = image.resize((width, height), resample=PIL_INTERPOLATION[self.config.resample])
503+
image = image.resize(
504+
(width, height),
505+
resample=PIL_INTERPOLATION[self.config.resample],
506+
reducing_gap=self.config.reducing_gap,
507+
)
488508
elif resize_mode == "fill":
489509
image = self._resize_and_fill(image, width, height)
490510
elif resize_mode == "crop":

mindone/diffusers/loaders/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ def text_encoder_attn_modules(text_encoder):
6363
"AmusedLoraLoaderMixin",
6464
"StableDiffusionLoraLoaderMixin",
6565
"SD3LoraLoaderMixin",
66+
"AuraFlowLoraLoaderMixin",
6667
"StableDiffusionXLLoraLoaderMixin",
6768
"LTXVideoLoraLoaderMixin",
6869
"LoraLoaderMixin",
@@ -74,6 +75,7 @@ def text_encoder_attn_modules(text_encoder):
7475
"SanaLoraLoaderMixin",
7576
"Lumina2LoraLoaderMixin",
7677
"WanLoraLoaderMixin",
78+
"HiDreamImageLoraLoaderMixin",
7779
],
7880
"peft": ["PeftAdapterMixin"],
7981
"single_file": ["FromSingleFileMixin"],
@@ -86,9 +88,11 @@ def text_encoder_attn_modules(text_encoder):
8688
from .ip_adapter import FluxIPAdapterMixin, IPAdapterMixin, SD3IPAdapterMixin
8789
from .lora_pipeline import (
8890
AmusedLoraLoaderMixin,
91+
AuraFlowLoraLoaderMixin,
8992
CogVideoXLoraLoaderMixin,
9093
CogView4LoraLoaderMixin,
9194
FluxLoraLoaderMixin,
95+
HiDreamImageLoraLoaderMixin,
9296
HunyuanVideoLoraLoaderMixin,
9397
LoraLoaderMixin,
9498
LTXVideoLoraLoaderMixin,

mindone/diffusers/loaders/ip_adapter.py

Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright 2024 The HuggingFace Team. All rights reserved.
1+
# Copyright 2025 The HuggingFace Team. All rights reserved.
22
#
33
# This code is adapted from https://github.com/huggingface/diffusers
44
# with modifications to run diffusers on mindspore.
@@ -452,16 +452,13 @@ def load_ip_adapter(
452452
if image_encoder_pretrained_model_name_or_path is not None:
453453
if not isinstance(pretrained_model_name_or_path_or_dict, dict):
454454
logger.info(f"loading image_encoder from {image_encoder_pretrained_model_name_or_path}")
455-
image_encoder = (
456-
CLIPVisionModelWithProjection.from_pretrained(
457-
image_encoder_pretrained_model_name_or_path,
458-
subfolder=image_encoder_subfolder,
459-
cache_dir=cache_dir,
460-
local_files_only=local_files_only,
461-
)
462-
.to(dtype=image_encoder_dtype)
463-
.set_train(False)
464-
)
455+
image_encoder = CLIPVisionModelWithProjection.from_pretrained(
456+
image_encoder_pretrained_model_name_or_path,
457+
subfolder=image_encoder_subfolder,
458+
cache_dir=cache_dir,
459+
local_files_only=local_files_only,
460+
mindspore_dtype=image_encoder_dtype,
461+
).set_train(False)
465462
self.register_modules(image_encoder=image_encoder)
466463
else:
467464
raise ValueError(

0 commit comments

Comments
 (0)