Skip to content

Commit c34ce42

Browse files
committed
Merge remote-tracking branch 'upstream/main' into dduf
2 parents f62527f + f7cb595 commit c34ce42

File tree

10 files changed

+115
-12
lines changed

10 files changed

+115
-12
lines changed

examples/research_projects/sd3_lora_colab/train_dreambooth_lora_sd3_miniature.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -765,7 +765,7 @@ def load_model_hook(models, input_dir):
765765
lora_state_dict = StableDiffusion3Pipeline.lora_state_dict(input_dir)
766766

767767
transformer_state_dict = {
768-
f'{k.replace("transformer.", "")}': v for k, v in lora_state_dict.items() if k.startswith("unet.")
768+
f'{k.replace("transformer.", "")}': v for k, v in lora_state_dict.items() if k.startswith("transformer.")
769769
}
770770
transformer_state_dict = convert_unet_state_dict_to_peft(transformer_state_dict)
771771
incompatible_keys = set_peft_model_state_dict(transformer_, transformer_state_dict, adapter_name="default")

setup.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,7 @@
135135
"transformers>=4.41.2",
136136
"urllib3<=2.0.0",
137137
"black",
138+
"phonemizer",
138139
]
139140

140141
# this is a lookup table with items like:
@@ -227,6 +228,7 @@ def run(self):
227228
"scipy",
228229
"torchvision",
229230
"transformers",
231+
"phonemizer",
230232
)
231233
extras["torch"] = deps_list("torch", "accelerate")
232234

src/diffusers/dependency_versions_table.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,4 +43,5 @@
4343
"transformers": "transformers>=4.41.2",
4444
"urllib3": "urllib3<=2.0.0",
4545
"black": "black",
46+
"phonemizer": "phonemizer",
4647
}

src/diffusers/loaders/single_file_utils.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,7 @@
186186
"inpainting": 512,
187187
"inpainting_v2": 512,
188188
"controlnet": 512,
189+
"instruct-pix2pix": 512,
189190
"v2": 768,
190191
"v1": 512,
191192
}
@@ -605,10 +606,14 @@ def infer_diffusers_model_type(checkpoint):
605606
if any(
606607
g in checkpoint for g in ["guidance_in.in_layer.bias", "model.diffusion_model.guidance_in.in_layer.bias"]
607608
):
608-
if checkpoint["img_in.weight"].shape[1] == 384:
609-
model_type = "flux-fill"
609+
if "model.diffusion_model.img_in.weight" in checkpoint:
610+
key = "model.diffusion_model.img_in.weight"
611+
else:
612+
key = "img_in.weight"
610613

611-
elif checkpoint["img_in.weight"].shape[1] == 128:
614+
if checkpoint[key].shape[1] == 384:
615+
model_type = "flux-fill"
616+
elif checkpoint[key].shape[1] == 128:
612617
model_type = "flux-depth"
613618
else:
614619
model_type = "flux-dev"

src/diffusers/pipelines/audioldm2/pipeline_audioldm2.py

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -237,7 +237,7 @@ def disable_vae_slicing(self):
237237
"""
238238
self.vae.disable_slicing()
239239

240-
def enable_model_cpu_offload(self, gpu_id=0):
240+
def enable_model_cpu_offload(self, gpu_id: Optional[int] = None, device: Union[torch.device, str] = "cuda"):
241241
r"""
242242
Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared
243243
to `enable_sequential_cpu_offload`, this method moves one whole model at a time to the GPU when its `forward`
@@ -249,11 +249,23 @@ def enable_model_cpu_offload(self, gpu_id=0):
249249
else:
250250
raise ImportError("`enable_model_cpu_offload` requires `accelerate v0.17.0` or higher.")
251251

252-
device = torch.device(f"cuda:{gpu_id}")
252+
torch_device = torch.device(device)
253+
device_index = torch_device.index
254+
255+
if gpu_id is not None and device_index is not None:
256+
raise ValueError(
257+
f"You have passed both `gpu_id`={gpu_id} and an index as part of the passed device `device`={device}"
258+
f"Cannot pass both. Please make sure to either not define `gpu_id` or not pass the index as part of the device: `device`={torch_device.type}"
259+
)
260+
261+
device_type = torch_device.type
262+
device = torch.device(f"{device_type}:{gpu_id or torch_device.index}")
253263

254264
if self.device.type != "cpu":
255265
self.to("cpu", silence_dtype_warnings=True)
256-
torch.cuda.empty_cache() # otherwise we don't see the memory savings (but they probably exist)
266+
device_mod = getattr(torch, device.type, None)
267+
if hasattr(device_mod, "empty_cache") and device_mod.is_available():
268+
device_mod.empty_cache() # otherwise we don't see the memory savings (but they probably exist)
257269

258270
model_sequence = [
259271
self.text_encoder.text_model,

src/diffusers/pipelines/latte/pipeline_latte.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
from ...utils import (
3131
BACKENDS_MAPPING,
3232
BaseOutput,
33+
deprecate,
3334
is_bs4_available,
3435
is_ftfy_available,
3536
is_torch_xla_available,
@@ -848,7 +849,14 @@ def __call__(
848849
if XLA_AVAILABLE:
849850
xm.mark_step()
850851

851-
if not output_type == "latents":
852+
if output_type == "latents":
853+
deprecation_message = (
854+
"Passing `output_type='latents'` is deprecated. Please pass `output_type='latent'` instead."
855+
)
856+
deprecate("output_type_latents", "1.0.0", deprecation_message, standard_warn=False)
857+
output_type = "latent"
858+
859+
if not output_type == "latent":
852860
video = self.decode_latents(latents, video_length, decode_chunk_size=14)
853861
video = self.video_processor.postprocess_video(video=video, output_type=output_type)
854862
else:

tests/pipelines/audioldm2/test_audioldm2.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -471,8 +471,8 @@ def test_xformers_attention_forwardGenerator_pass(self):
471471
pass
472472

473473
def test_dict_tuple_outputs_equivalent(self):
474-
# increase tolerance from 1e-4 -> 2e-4 to account for large composite model
475-
super().test_dict_tuple_outputs_equivalent(expected_max_difference=2e-4)
474+
# increase tolerance from 1e-4 -> 3e-4 to account for large composite model
475+
super().test_dict_tuple_outputs_equivalent(expected_max_difference=3e-4)
476476

477477
def test_inference_batch_single_identical(self):
478478
# increase tolerance from 1e-4 -> 2e-4 to account for large composite model

tests/single_file/single_file_testing_utils.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,8 @@ def download_diffusers_config(repo_id, tmpdir):
4747

4848

4949
class SDSingleFileTesterMixin:
50+
single_file_kwargs = {}
51+
5052
def _compare_component_configs(self, pipe, single_file_pipe):
5153
for param_name, param_value in single_file_pipe.text_encoder.config.to_dict().items():
5254
if param_name in ["torch_dtype", "architectures", "_name_or_path"]:
@@ -154,7 +156,7 @@ def test_single_file_components_with_original_config_local_files_only(
154156
self._compare_component_configs(pipe, single_file_pipe)
155157

156158
def test_single_file_format_inference_is_same_as_pretrained(self, expected_max_diff=1e-4):
157-
sf_pipe = self.pipeline_class.from_single_file(self.ckpt_path, safety_checker=None)
159+
sf_pipe = self.pipeline_class.from_single_file(self.ckpt_path, safety_checker=None, **self.single_file_kwargs)
158160
sf_pipe.unet.set_attn_processor(AttnProcessor())
159161
sf_pipe.enable_model_cpu_offload(device=torch_device)
160162

@@ -170,7 +172,7 @@ def test_single_file_format_inference_is_same_as_pretrained(self, expected_max_d
170172

171173
max_diff = numpy_cosine_similarity_distance(image.flatten(), image_single_file.flatten())
172174

173-
assert max_diff < expected_max_diff
175+
assert max_diff < expected_max_diff, f"{image.flatten()} != {image_single_file.flatten()}"
174176

175177
def test_single_file_components_with_diffusers_config(
176178
self,
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
# coding=utf-8
2+
# Copyright 2024 HuggingFace Inc.
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
16+
import gc
17+
import unittest
18+
19+
import torch
20+
21+
from diffusers import (
22+
FluxTransformer2DModel,
23+
)
24+
from diffusers.utils.testing_utils import (
25+
backend_empty_cache,
26+
enable_full_determinism,
27+
require_torch_accelerator,
28+
torch_device,
29+
)
30+
31+
32+
enable_full_determinism()
33+
34+
35+
@require_torch_accelerator
36+
class FluxTransformer2DModelSingleFileTests(unittest.TestCase):
37+
model_class = FluxTransformer2DModel
38+
ckpt_path = "https://huggingface.co/black-forest-labs/FLUX.1-dev/blob/main/flux1-dev.safetensors"
39+
alternate_keys_ckpt_paths = ["https://huggingface.co/Comfy-Org/flux1-dev/blob/main/flux1-dev-fp8.safetensors"]
40+
41+
repo_id = "black-forest-labs/FLUX.1-dev"
42+
43+
def setUp(self):
44+
super().setUp()
45+
gc.collect()
46+
backend_empty_cache(torch_device)
47+
48+
def tearDown(self):
49+
super().tearDown()
50+
gc.collect()
51+
backend_empty_cache(torch_device)
52+
53+
def test_single_file_components(self):
54+
model = self.model_class.from_pretrained(self.repo_id, subfolder="transformer")
55+
model_single_file = self.model_class.from_single_file(self.ckpt_path)
56+
57+
PARAMS_TO_IGNORE = ["torch_dtype", "_name_or_path", "_use_default_values", "_diffusers_version"]
58+
for param_name, param_value in model_single_file.config.items():
59+
if param_name in PARAMS_TO_IGNORE:
60+
continue
61+
assert (
62+
model.config[param_name] == param_value
63+
), f"{param_name} differs between single file loading and pretrained loading"
64+
65+
def test_checkpoint_loading(self):
66+
for ckpt_path in self.alternate_keys_ckpt_paths:
67+
torch.cuda.empty_cache()
68+
model = self.model_class.from_single_file(ckpt_path)
69+
70+
del model
71+
gc.collect()
72+
torch.cuda.empty_cache()

tests/single_file/test_stable_diffusion_single_file.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,7 @@ class StableDiffusionInstructPix2PixPipelineSingleFileSlowTests(unittest.TestCas
132132
"https://raw.githubusercontent.com/timothybrooks/instruct-pix2pix/refs/heads/main/configs/generate.yaml"
133133
)
134134
repo_id = "timbrooks/instruct-pix2pix"
135+
single_file_kwargs = {"extract_ema": True}
135136

136137
def setUp(self):
137138
super().setUp()

0 commit comments

Comments
 (0)