Skip to content

Commit 1dc755c

Browse files
committed
Merge remote-tracking branch 'upstream/main' into dtype-map
2 parents b1237f7 + e8fc8b1 commit 1dc755c

File tree

5 files changed

+45
-12
lines changed

5 files changed

+45
-12
lines changed

docs/source/en/installation.md

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -161,10 +161,10 @@ Your Python environment will find the `main` version of 🤗 Diffusers on the ne
161161

162162
Model weights and files are downloaded from the Hub to a cache which is usually your home directory. You can change the cache location by specifying the `HF_HOME` or `HUGGINFACE_HUB_CACHE` environment variables or configuring the `cache_dir` parameter in methods like [`~DiffusionPipeline.from_pretrained`].
163163

164-
Cached files allow you to run 🤗 Diffusers offline. To prevent 🤗 Diffusers from connecting to the internet, set the `HF_HUB_OFFLINE` environment variable to `True` and 🤗 Diffusers will only load previously downloaded files in the cache.
164+
Cached files allow you to run 🤗 Diffusers offline. To prevent 🤗 Diffusers from connecting to the internet, set the `HF_HUB_OFFLINE` environment variable to `1` and 🤗 Diffusers will only load previously downloaded files in the cache.
165165

166166
```shell
167-
export HF_HUB_OFFLINE=True
167+
export HF_HUB_OFFLINE=1
168168
```
169169

170170
For more details about managing and cleaning the cache, take a look at the [caching](https://huggingface.co/docs/huggingface_hub/guides/manage-cache) guide.
@@ -179,14 +179,16 @@ Telemetry is only sent when loading models and pipelines from the Hub,
179179
and it is not collected if you're loading local files.
180180

181181
We understand that not everyone wants to share additional information,and we respect your privacy.
182-
You can disable telemetry collection by setting the `DISABLE_TELEMETRY` environment variable from your terminal:
182+
You can disable telemetry collection by setting the `HF_HUB_DISABLE_TELEMETRY` environment variable from your terminal:
183183

184184
On Linux/MacOS:
185+
185186
```bash
186-
export DISABLE_TELEMETRY=YES
187+
export HF_HUB_DISABLE_TELEMETRY=1
187188
```
188189

189190
On Windows:
191+
190192
```bash
191-
set DISABLE_TELEMETRY=YES
193+
set HF_HUB_DISABLE_TELEMETRY=1
192194
```

examples/community/lpw_stable_diffusion_xl.py

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1773,7 +1773,7 @@ def denoising_value_valid(dnv):
17731773
f"Incorrect configuration settings! The config of `pipeline.unet`: {self.unet.config} expects"
17741774
f" {self.unet.config.in_channels} but received `num_channels_latents`: {num_channels_latents} +"
17751775
f" `num_channels_mask`: {num_channels_mask} + `num_channels_masked_image`: {num_channels_masked_image}"
1776-
f" = {num_channels_latents+num_channels_masked_image+num_channels_mask}. Please verify the config of"
1776+
f" = {num_channels_latents + num_channels_masked_image + num_channels_mask}. Please verify the config of"
17771777
" `pipeline.unet` or your `mask_image` or `image` input."
17781778
)
17791779
elif num_channels_unet != 4:
@@ -1924,7 +1924,22 @@ def denoising_value_valid(dnv):
19241924
self.upcast_vae()
19251925
latents = latents.to(next(iter(self.vae.post_quant_conv.parameters())).dtype)
19261926

1927-
image = self.vae.decode(latents / self.vae.config.scaling_factor, return_dict=False)[0]
1927+
# unscale/denormalize the latents
1928+
# denormalize with the mean and std if available and not None
1929+
has_latents_mean = hasattr(self.vae.config, "latents_mean") and self.vae.config.latents_mean is not None
1930+
has_latents_std = hasattr(self.vae.config, "latents_std") and self.vae.config.latents_std is not None
1931+
if has_latents_mean and has_latents_std:
1932+
latents_mean = (
1933+
torch.tensor(self.vae.config.latents_mean).view(1, 4, 1, 1).to(latents.device, latents.dtype)
1934+
)
1935+
latents_std = (
1936+
torch.tensor(self.vae.config.latents_std).view(1, 4, 1, 1).to(latents.device, latents.dtype)
1937+
)
1938+
latents = latents * latents_std / self.vae.config.scaling_factor + latents_mean
1939+
else:
1940+
latents = latents / self.vae.config.scaling_factor
1941+
1942+
image = self.vae.decode(latents, return_dict=False)[0]
19281943

19291944
# cast back to fp16 if needed
19301945
if needs_upcasting:

src/diffusers/pipelines/ltx/pipeline_ltx_image2video.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -487,19 +487,21 @@ def prepare_latents(
487487
) -> torch.Tensor:
488488
height = height // self.vae_spatial_compression_ratio
489489
width = width // self.vae_spatial_compression_ratio
490-
num_frames = (
491-
(num_frames - 1) // self.vae_temporal_compression_ratio + 1 if latents is None else latents.size(2)
492-
)
490+
num_frames = (num_frames - 1) // self.vae_temporal_compression_ratio + 1
493491

494492
shape = (batch_size, num_channels_latents, num_frames, height, width)
495493
mask_shape = (batch_size, 1, num_frames, height, width)
496494

497495
if latents is not None:
498-
conditioning_mask = latents.new_zeros(shape)
496+
conditioning_mask = latents.new_zeros(mask_shape)
499497
conditioning_mask[:, :, 0] = 1.0
500498
conditioning_mask = self._pack_latents(
501499
conditioning_mask, self.transformer_spatial_patch_size, self.transformer_temporal_patch_size
502-
)
500+
).squeeze(-1)
501+
if latents.ndim != 3 or latents.shape[:2] != conditioning_mask.shape:
502+
raise ValueError(
503+
f"Provided `latents` tensor has shape {latents.shape}, but the expected shape is {conditioning_mask.shape + (num_channels_latents,)}."
504+
)
503505
return latents.to(device=device, dtype=dtype), conditioning_mask
504506

505507
if isinstance(generator, list):

src/diffusers/pipelines/wan/pipeline_wan.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -458,6 +458,13 @@ def __call__(
458458
callback_on_step_end_tensor_inputs,
459459
)
460460

461+
if num_frames % self.vae_scale_factor_temporal != 1:
462+
logger.warning(
463+
f"`num_frames - 1` has to be divisible by {self.vae_scale_factor_temporal}. Rounding to the nearest number."
464+
)
465+
num_frames = num_frames // self.vae_scale_factor_temporal * self.vae_scale_factor_temporal + 1
466+
num_frames = max(num_frames, 1)
467+
461468
self._guidance_scale = guidance_scale
462469
self._attention_kwargs = attention_kwargs
463470
self._current_timestep = None

src/diffusers/pipelines/wan/pipeline_wan_i2v.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -559,6 +559,13 @@ def __call__(
559559
callback_on_step_end_tensor_inputs,
560560
)
561561

562+
if num_frames % self.vae_scale_factor_temporal != 1:
563+
logger.warning(
564+
f"`num_frames - 1` has to be divisible by {self.vae_scale_factor_temporal}. Rounding to the nearest number."
565+
)
566+
num_frames = num_frames // self.vae_scale_factor_temporal * self.vae_scale_factor_temporal + 1
567+
num_frames = max(num_frames, 1)
568+
562569
self._guidance_scale = guidance_scale
563570
self._attention_kwargs = attention_kwargs
564571
self._current_timestep = None

0 commit comments

Comments
 (0)