Skip to content

Commit aaaa341

Browse files
Merge branch 'huggingface:main' into patch-4
2 parents d5bea15 + ed4b752 commit aaaa341

File tree

7 files changed

+80
-6
lines changed

7 files changed

+80
-6
lines changed

.github/workflows/trufflehog.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,3 +13,6 @@ jobs:
1313
fetch-depth: 0
1414
- name: Secret Scanning
1515
uses: trufflesecurity/trufflehog@main
16+
with:
17+
extra_args: --results=verified,unknown
18+

src/diffusers/models/autoencoders/autoencoder_kl_hunyuan_video.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -36,11 +36,11 @@
3636
def prepare_causal_attention_mask(
3737
num_frames: int, height_width: int, dtype: torch.dtype, device: torch.device, batch_size: int = None
3838
) -> torch.Tensor:
39-
seq_len = num_frames * height_width
40-
mask = torch.full((seq_len, seq_len), float("-inf"), dtype=dtype, device=device)
41-
for i in range(seq_len):
42-
i_frame = i // height_width
43-
mask[i, : (i_frame + 1) * height_width] = 0
39+
indices = torch.arange(1, num_frames + 1, dtype=torch.int32, device=device)
40+
indices_blocks = indices.repeat_interleave(height_width)
41+
x, y = torch.meshgrid(indices_blocks, indices_blocks, indexing="xy")
42+
mask = torch.where(x <= y, 0, -float("inf")).to(dtype=dtype)
43+
4444
if batch_size is not None:
4545
mask = mask.unsqueeze(0).expand(batch_size, -1, -1)
4646
return mask

src/diffusers/models/modeling_utils.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
from huggingface_hub import DDUFEntry, create_repo, split_torch_state_dict_into_shards
3232
from huggingface_hub.utils import validate_hf_hub_args
3333
from torch import Tensor, nn
34+
from typing_extensions import Self
3435

3536
from .. import __version__
3637
from ..hooks import apply_layerwise_casting
@@ -605,7 +606,7 @@ def dequantize(self):
605606

606607
@classmethod
607608
@validate_hf_hub_args
608-
def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.PathLike]], **kwargs):
609+
def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.PathLike]], **kwargs) -> Self:
609610
r"""
610611
Instantiate a pretrained PyTorch model from a pretrained model configuration.
611612

tests/models/autoencoders/test_models_autoencoder_hunyuan_video.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
import torch
1919

2020
from diffusers import AutoencoderKLHunyuanVideo
21+
from diffusers.models.autoencoders.autoencoder_kl_hunyuan_video import prepare_causal_attention_mask
2122
from diffusers.utils.testing_utils import (
2223
enable_full_determinism,
2324
floats_tensor,
@@ -182,3 +183,28 @@ def test_forward_with_norm_groups(self):
182183
@unittest.skip("Unsupported test.")
183184
def test_outputs_equivalence(self):
184185
pass
186+
187+
def test_prepare_causal_attention_mask(self):
188+
def prepare_causal_attention_mask_orig(
189+
num_frames: int, height_width: int, dtype: torch.dtype, device: torch.device, batch_size: int = None
190+
) -> torch.Tensor:
191+
seq_len = num_frames * height_width
192+
mask = torch.full((seq_len, seq_len), float("-inf"), dtype=dtype, device=device)
193+
for i in range(seq_len):
194+
i_frame = i // height_width
195+
mask[i, : (i_frame + 1) * height_width] = 0
196+
if batch_size is not None:
197+
mask = mask.unsqueeze(0).expand(batch_size, -1, -1)
198+
return mask
199+
200+
# test with some odd shapes
201+
original_mask = prepare_causal_attention_mask_orig(
202+
num_frames=31, height_width=111, dtype=torch.float32, device=torch_device
203+
)
204+
new_mask = prepare_causal_attention_mask(
205+
num_frames=31, height_width=111, dtype=torch.float32, device=torch_device
206+
)
207+
self.assertTrue(
208+
torch.allclose(original_mask, new_mask),
209+
"Causal attention mask should be the same",
210+
)

tests/models/autoencoders/test_models_autoencoder_oobleck.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,12 @@ def test_forward_with_norm_groups(self):
114114
def test_set_attn_processor_for_determinism(self):
115115
return
116116

117+
@unittest.skip(
118+
"Test not supported because of 'weight_norm_fwd_first_dim_kernel' not implemented for 'Float8_e4m3fn'"
119+
)
120+
def test_layerwise_casting_training(self):
121+
return super().test_layerwise_casting_training()
122+
117123
@unittest.skip(
118124
"The convolution layers of AutoencoderOobleck are wrapped with torch.nn.utils.weight_norm. This causes the hook's pre_forward to not "
119125
"cast the module weights to compute_dtype (as required by forward pass). As a result, forward pass errors out. To fix:\n"

tests/models/test_modeling_common.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1338,6 +1338,36 @@ def test_variant_sharded_ckpt_right_format(self):
13381338
# Example: diffusion_pytorch_model.fp16-00001-of-00002.safetensors
13391339
assert all(f.split(".")[1].split("-")[0] == variant for f in shard_files)
13401340

1341+
def test_layerwise_casting_training(self):
1342+
def test_fn(storage_dtype, compute_dtype):
1343+
if torch.device(torch_device).type == "cpu" and compute_dtype == torch.bfloat16:
1344+
return
1345+
init_dict, inputs_dict = self.prepare_init_args_and_inputs_for_common()
1346+
1347+
model = self.model_class(**init_dict)
1348+
model = model.to(torch_device, dtype=compute_dtype)
1349+
model.enable_layerwise_casting(storage_dtype=storage_dtype, compute_dtype=compute_dtype)
1350+
model.train()
1351+
1352+
inputs_dict = cast_maybe_tensor_dtype(inputs_dict, torch.float32, compute_dtype)
1353+
with torch.amp.autocast(device_type=torch.device(torch_device).type):
1354+
output = model(**inputs_dict)
1355+
1356+
if isinstance(output, dict):
1357+
output = output.to_tuple()[0]
1358+
1359+
input_tensor = inputs_dict[self.main_input_name]
1360+
noise = torch.randn((input_tensor.shape[0],) + self.output_shape).to(torch_device)
1361+
noise = cast_maybe_tensor_dtype(noise, torch.float32, compute_dtype)
1362+
loss = torch.nn.functional.mse_loss(output, noise)
1363+
1364+
loss.backward()
1365+
1366+
test_fn(torch.float16, torch.float32)
1367+
test_fn(torch.float8_e4m3fn, torch.float32)
1368+
test_fn(torch.float8_e5m2, torch.float32)
1369+
test_fn(torch.float8_e4m3fn, torch.bfloat16)
1370+
13411371
def test_layerwise_casting_inference(self):
13421372
from diffusers.hooks.layerwise_casting import DEFAULT_SKIP_MODULES_PATTERN, SUPPORTED_PYTORCH_LAYERS
13431373

tests/models/unets/test_models_unet_1d.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,10 @@ def test_ema_training(self):
6060
def test_training(self):
6161
pass
6262

63+
@unittest.skip("Test not supported.")
64+
def test_layerwise_casting_training(self):
65+
pass
66+
6367
def test_determinism(self):
6468
super().test_determinism()
6569

@@ -239,6 +243,10 @@ def test_ema_training(self):
239243
def test_training(self):
240244
pass
241245

246+
@unittest.skip("Test not supported.")
247+
def test_layerwise_casting_training(self):
248+
pass
249+
242250
def prepare_init_args_and_inputs_for_common(self):
243251
init_dict = {
244252
"in_channels": 14,

0 commit comments

Comments
 (0)