Skip to content

Commit 0319326

Browse files
authored
Merge branch 'main' into patch-1
2 parents 2a6729d + b73c738 commit 0319326

File tree

9 files changed

+46
-108
lines changed

9 files changed

+46
-108
lines changed

examples/controlnet/train_controlnet_sd3.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1330,7 +1330,7 @@ def get_sigmas(timesteps, n_dim=4, dtype=torch.float32):
13301330
# controlnet(s) inference
13311331
controlnet_image = batch["conditioning_pixel_values"].to(dtype=weight_dtype)
13321332
controlnet_image = vae.encode(controlnet_image).latent_dist.sample()
1333-
controlnet_image = controlnet_image * vae.config.scaling_factor
1333+
controlnet_image = (controlnet_image - vae.config.shift_factor) * vae.config.scaling_factor
13341334

13351335
control_block_res_samples = controlnet(
13361336
hidden_states=noisy_model_input,

examples/server/requirements.txt

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
# This file was autogenerated by uv via the following command:
22
# uv pip compile requirements.in -o requirements.txt
3-
aiohappyeyeballs==2.4.3
3+
aiohappyeyeballs==2.6.1
44
# via aiohttp
5-
aiohttp==3.10.10
5+
aiohttp==3.12.14
66
# via -r requirements.in
7-
aiosignal==1.3.1
7+
aiosignal==1.4.0
88
# via aiohttp
99
annotated-types==0.7.0
1010
# via pydantic
@@ -29,7 +29,6 @@ filelock==3.16.1
2929
# huggingface-hub
3030
# torch
3131
# transformers
32-
# triton
3332
frozenlist==1.5.0
3433
# via
3534
# aiohttp
@@ -111,7 +110,9 @@ prometheus-client==0.21.0
111110
prometheus-fastapi-instrumentator==7.0.0
112111
# via -r requirements.in
113112
propcache==0.2.0
114-
# via yarl
113+
# via
114+
# aiohttp
115+
# yarl
115116
py-consul==1.5.3
116117
# via -r requirements.in
117118
pydantic==2.9.2
@@ -155,7 +156,9 @@ triton==3.3.0
155156
# via torch
156157
typing-extensions==4.12.2
157158
# via
159+
# aiosignal
158160
# anyio
161+
# exceptiongroup
159162
# fastapi
160163
# huggingface-hub
161164
# multidict
@@ -168,5 +171,5 @@ urllib3==2.5.0
168171
# via requests
169172
uvicorn==0.32.0
170173
# via -r requirements.in
171-
yarl==1.16.0
174+
yarl==1.18.3
172175
# via aiohttp

src/diffusers/loaders/single_file_model.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
from .. import __version__
2525
from ..quantizers import DiffusersAutoQuantizer
2626
from ..utils import deprecate, is_accelerate_available, logging
27-
from ..utils.torch_utils import device_synchronize, empty_device_cache
27+
from ..utils.torch_utils import empty_device_cache
2828
from .single_file_utils import (
2929
SingleFileComponentError,
3030
convert_animatediff_checkpoint_to_diffusers,
@@ -431,10 +431,7 @@ def from_single_file(cls, pretrained_model_link_or_path_or_dict: Optional[str] =
431431
keep_in_fp32_modules=keep_in_fp32_modules,
432432
unexpected_keys=unexpected_keys,
433433
)
434-
# Ensure tensors are correctly placed on device by synchronizing before returning control to user. This is
435-
# required because we move tensors with non_blocking=True, which is slightly faster for model loading.
436434
empty_device_cache()
437-
device_synchronize()
438435
else:
439436
_, unexpected_keys = model.load_state_dict(diffusers_format_checkpoint, strict=False)
440437

src/diffusers/loaders/single_file_utils.py

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@
4646
)
4747
from ..utils.constants import DIFFUSERS_REQUEST_TIMEOUT
4848
from ..utils.hub_utils import _get_model_file
49-
from ..utils.torch_utils import device_synchronize, empty_device_cache
49+
from ..utils.torch_utils import empty_device_cache
5050

5151

5252
if is_transformers_available():
@@ -1690,10 +1690,7 @@ def create_diffusers_clip_model_from_ldm(
16901690

16911691
if is_accelerate_available():
16921692
load_model_dict_into_meta(model, diffusers_format_checkpoint, dtype=torch_dtype)
1693-
# Ensure tensors are correctly placed on device by synchronizing before returning control to user. This is
1694-
# required because we move tensors with non_blocking=True, which is slightly faster for model loading.
16951693
empty_device_cache()
1696-
device_synchronize()
16971694
else:
16981695
model.load_state_dict(diffusers_format_checkpoint, strict=False)
16991696

@@ -2153,10 +2150,7 @@ def create_diffusers_t5_model_from_checkpoint(
21532150

21542151
if is_accelerate_available():
21552152
load_model_dict_into_meta(model, diffusers_format_checkpoint, dtype=torch_dtype)
2156-
# Ensure tensors are correctly placed on device by synchronizing before returning control to user. This is
2157-
# required because we move tensors with non_blocking=True, which is slightly faster for model loading.
21582153
empty_device_cache()
2159-
device_synchronize()
21602154
else:
21612155
model.load_state_dict(diffusers_format_checkpoint)
21622156

src/diffusers/loaders/transformer_flux.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
)
2020
from ..models.modeling_utils import _LOW_CPU_MEM_USAGE_DEFAULT, load_model_dict_into_meta
2121
from ..utils import is_accelerate_available, is_torch_version, logging
22-
from ..utils.torch_utils import device_synchronize, empty_device_cache
22+
from ..utils.torch_utils import empty_device_cache
2323

2424

2525
if is_accelerate_available():
@@ -82,7 +82,6 @@ def _convert_ip_adapter_image_proj_to_diffusers(self, state_dict, low_cpu_mem_us
8282
device_map = {"": self.device}
8383
load_model_dict_into_meta(image_projection, updated_state_dict, device_map=device_map, dtype=self.dtype)
8484
empty_device_cache()
85-
device_synchronize()
8685

8786
return image_projection
8887

@@ -158,7 +157,6 @@ def _convert_ip_adapter_attn_to_diffusers(self, state_dicts, low_cpu_mem_usage=_
158157
key_id += 1
159158

160159
empty_device_cache()
161-
device_synchronize()
162160

163161
return attn_procs
164162

src/diffusers/loaders/transformer_sd3.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
from ..models.embeddings import IPAdapterTimeImageProjection
1919
from ..models.modeling_utils import _LOW_CPU_MEM_USAGE_DEFAULT, load_model_dict_into_meta
2020
from ..utils import is_accelerate_available, is_torch_version, logging
21-
from ..utils.torch_utils import device_synchronize, empty_device_cache
21+
from ..utils.torch_utils import empty_device_cache
2222

2323

2424
logger = logging.get_logger(__name__)
@@ -82,7 +82,6 @@ def _convert_ip_adapter_attn_to_diffusers(
8282
)
8383

8484
empty_device_cache()
85-
device_synchronize()
8685

8786
return attn_procs
8887

@@ -152,7 +151,6 @@ def _convert_ip_adapter_image_proj_to_diffusers(
152151
device_map = {"": self.device}
153152
load_model_dict_into_meta(image_proj, updated_state_dict, device_map=device_map, dtype=self.dtype)
154153
empty_device_cache()
155-
device_synchronize()
156154

157155
return image_proj
158156

src/diffusers/loaders/unet.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@
4343
is_torch_version,
4444
logging,
4545
)
46-
from ..utils.torch_utils import device_synchronize, empty_device_cache
46+
from ..utils.torch_utils import empty_device_cache
4747
from .lora_base import _func_optionally_disable_offloading
4848
from .lora_pipeline import LORA_WEIGHT_NAME, LORA_WEIGHT_NAME_SAFE, TEXT_ENCODER_NAME, UNET_NAME
4949
from .utils import AttnProcsLayers
@@ -755,7 +755,6 @@ def _convert_ip_adapter_image_proj_to_diffusers(self, state_dict, low_cpu_mem_us
755755
device_map = {"": self.device}
756756
load_model_dict_into_meta(image_projection, updated_state_dict, device_map=device_map, dtype=self.dtype)
757757
empty_device_cache()
758-
device_synchronize()
759758

760759
return image_projection
761760

@@ -854,7 +853,6 @@ def _convert_ip_adapter_attn_to_diffusers(self, state_dicts, low_cpu_mem_usage=_
854853
key_id += 2
855854

856855
empty_device_cache()
857-
device_synchronize()
858856

859857
return attn_procs
860858

src/diffusers/models/modeling_utils.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@
6262
load_or_create_model_card,
6363
populate_model_card,
6464
)
65-
from ..utils.torch_utils import device_synchronize, empty_device_cache
65+
from ..utils.torch_utils import empty_device_cache
6666
from .model_loading_utils import (
6767
_caching_allocator_warmup,
6868
_determine_device_map,
@@ -1540,10 +1540,7 @@ def _load_pretrained_model(
15401540
assign_to_params_buffers = check_support_param_buffer_assignment(model, state_dict)
15411541
error_msgs += _load_state_dict_into_model(model, state_dict, assign_to_params_buffers)
15421542

1543-
# Ensure tensors are correctly placed on device by synchronizing before returning control to user. This is
1544-
# required because we move tensors with non_blocking=True, which is slightly faster for model loading.
15451543
empty_device_cache()
1546-
device_synchronize()
15471544

15481545
if offload_index is not None and len(offload_index) > 0:
15491546
save_offload_index(offload_index, offload_folder)

tests/pipelines/flux/test_pipeline_flux.py

Lines changed: 30 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -155,7 +155,7 @@ def test_flux_different_prompts(self):
155155

156156
# Outputs should be different here
157157
# For some reasons, they don't show large differences
158-
assert max_diff > 1e-6
158+
self.assertGreater(max_diff, 1e-6, "Outputs should be different for different prompts.")
159159

160160
def test_fused_qkv_projections(self):
161161
device = "cpu" # ensure determinism for the device-dependent torch.Generator
@@ -187,14 +187,17 @@ def test_fused_qkv_projections(self):
187187
image = pipe(**inputs).images
188188
image_slice_disabled = image[0, -3:, -3:, -1]
189189

190-
assert np.allclose(original_image_slice, image_slice_fused, atol=1e-3, rtol=1e-3), (
191-
"Fusion of QKV projections shouldn't affect the outputs."
190+
self.assertTrue(
191+
np.allclose(original_image_slice, image_slice_fused, atol=1e-3, rtol=1e-3),
192+
("Fusion of QKV projections shouldn't affect the outputs."),
192193
)
193-
assert np.allclose(image_slice_fused, image_slice_disabled, atol=1e-3, rtol=1e-3), (
194-
"Outputs, with QKV projection fusion enabled, shouldn't change when fused QKV projections are disabled."
194+
self.assertTrue(
195+
np.allclose(image_slice_fused, image_slice_disabled, atol=1e-3, rtol=1e-3),
196+
("Outputs, with QKV projection fusion enabled, shouldn't change when fused QKV projections are disabled."),
195197
)
196-
assert np.allclose(original_image_slice, image_slice_disabled, atol=1e-2, rtol=1e-2), (
197-
"Original outputs should match when fused QKV projections are disabled."
198+
self.assertTrue(
199+
np.allclose(original_image_slice, image_slice_disabled, atol=1e-2, rtol=1e-2),
200+
("Original outputs should match when fused QKV projections are disabled."),
198201
)
199202

200203
def test_flux_image_output_shape(self):
@@ -209,7 +212,11 @@ def test_flux_image_output_shape(self):
209212
inputs.update({"height": height, "width": width})
210213
image = pipe(**inputs).images[0]
211214
output_height, output_width, _ = image.shape
212-
assert (output_height, output_width) == (expected_height, expected_width)
215+
self.assertEqual(
216+
(output_height, output_width),
217+
(expected_height, expected_width),
218+
f"Output shape {image.shape} does not match expected shape {(expected_height, expected_width)}",
219+
)
213220

214221
def test_flux_true_cfg(self):
215222
pipe = self.pipeline_class(**self.get_dummy_components()).to(torch_device)
@@ -220,7 +227,9 @@ def test_flux_true_cfg(self):
220227
inputs["negative_prompt"] = "bad quality"
221228
inputs["true_cfg_scale"] = 2.0
222229
true_cfg_out = pipe(**inputs, generator=torch.manual_seed(0)).images[0]
223-
assert not np.allclose(no_true_cfg_out, true_cfg_out)
230+
self.assertFalse(
231+
np.allclose(no_true_cfg_out, true_cfg_out), "Outputs should be different when true_cfg_scale is set."
232+
)
224233

225234

226235
@nightly
@@ -269,45 +278,17 @@ def test_flux_inference(self):
269278

270279
image = pipe(**inputs).images[0]
271280
image_slice = image[0, :10, :10]
281+
# fmt: off
272282
expected_slice = np.array(
273-
[
274-
0.3242,
275-
0.3203,
276-
0.3164,
277-
0.3164,
278-
0.3125,
279-
0.3125,
280-
0.3281,
281-
0.3242,
282-
0.3203,
283-
0.3301,
284-
0.3262,
285-
0.3242,
286-
0.3281,
287-
0.3242,
288-
0.3203,
289-
0.3262,
290-
0.3262,
291-
0.3164,
292-
0.3262,
293-
0.3281,
294-
0.3184,
295-
0.3281,
296-
0.3281,
297-
0.3203,
298-
0.3281,
299-
0.3281,
300-
0.3164,
301-
0.3320,
302-
0.3320,
303-
0.3203,
304-
],
283+
[0.3242, 0.3203, 0.3164, 0.3164, 0.3125, 0.3125, 0.3281, 0.3242, 0.3203, 0.3301, 0.3262, 0.3242, 0.3281, 0.3242, 0.3203, 0.3262, 0.3262, 0.3164, 0.3262, 0.3281, 0.3184, 0.3281, 0.3281, 0.3203, 0.3281, 0.3281, 0.3164, 0.3320, 0.3320, 0.3203],
305284
dtype=np.float32,
306285
)
286+
# fmt: on
307287

308288
max_diff = numpy_cosine_similarity_distance(expected_slice.flatten(), image_slice.flatten())
309-
310-
assert max_diff < 1e-4
289+
self.assertLess(
290+
max_diff, 1e-4, f"Image slice is different from expected slice: {image_slice} != {expected_slice}"
291+
)
311292

312293

313294
@slow
@@ -377,42 +358,14 @@ def test_flux_ip_adapter_inference(self):
377358
image = pipe(**inputs).images[0]
378359
image_slice = image[0, :10, :10]
379360

361+
# fmt: off
380362
expected_slice = np.array(
381-
[
382-
0.1855,
383-
0.1680,
384-
0.1406,
385-
0.1953,
386-
0.1699,
387-
0.1465,
388-
0.2012,
389-
0.1738,
390-
0.1484,
391-
0.2051,
392-
0.1797,
393-
0.1523,
394-
0.2012,
395-
0.1719,
396-
0.1445,
397-
0.2070,
398-
0.1777,
399-
0.1465,
400-
0.2090,
401-
0.1836,
402-
0.1484,
403-
0.2129,
404-
0.1875,
405-
0.1523,
406-
0.2090,
407-
0.1816,
408-
0.1484,
409-
0.2110,
410-
0.1836,
411-
0.1543,
412-
],
363+
[0.1855, 0.1680, 0.1406, 0.1953, 0.1699, 0.1465, 0.2012, 0.1738, 0.1484, 0.2051, 0.1797, 0.1523, 0.2012, 0.1719, 0.1445, 0.2070, 0.1777, 0.1465, 0.2090, 0.1836, 0.1484, 0.2129, 0.1875, 0.1523, 0.2090, 0.1816, 0.1484, 0.2110, 0.1836, 0.1543],
413364
dtype=np.float32,
414365
)
366+
# fmt: on
415367

416368
max_diff = numpy_cosine_similarity_distance(expected_slice.flatten(), image_slice.flatten())
417-
418-
assert max_diff < 1e-4, f"{image_slice} != {expected_slice}"
369+
self.assertLess(
370+
max_diff, 1e-4, f"Image slice is different from expected slice: {image_slice} != {expected_slice}"
371+
)

0 commit comments

Comments
 (0)