Skip to content

Commit 8eb9487

Browse files
authored
Merge branch 'main' into test-sana-lora-training
2 parents 9918e70 + f35a387 commit 8eb9487

File tree

13 files changed

+38
-22
lines changed

13 files changed

+38
-22
lines changed

docs/source/en/quantization/gguf.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,9 @@ pip install -U gguf
2525

2626
Since GGUF is a single file format, use [`~FromSingleFileMixin.from_single_file`] to load the model and pass in the [`GGUFQuantizationConfig`].
2727

28-
When using GGUF checkpoints, the quantized weights remain in a low memory `dtype`(typically `torch.unint8`) and are dynamically dequantized and cast to the configured `compute_dtype` during each module's forward pass through the model. The `GGUFQuantizationConfig` allows you to set the `compute_dtype`.
28+
When using GGUF checkpoints, the quantized weights remain in a low memory `dtype`(typically `torch.uint8`) and are dynamically dequantized and cast to the configured `compute_dtype` during each module's forward pass through the model. The `GGUFQuantizationConfig` allows you to set the `compute_dtype`.
2929

30-
The functions used for dynamic dequantizatation are based on the great work done by [city96](https://github.com/city96/ComfyUI-GGUF), who created the Pytorch ports of the original (`numpy`)[https://github.com/ggerganov/llama.cpp/blob/master/gguf-py/gguf/quants.py] implementation by [compilade](https://github.com/compilade).
30+
The functions used for dynamic dequantizatation are based on the great work done by [city96](https://github.com/city96/ComfyUI-GGUF), who created the Pytorch ports of the original [`numpy`](https://github.com/ggerganov/llama.cpp/blob/master/gguf-py/gguf/quants.py) implementation by [compilade](https://github.com/compilade).
3131

3232
```python
3333
import torch

docs/source/en/quantization/overview.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,8 @@ If you are new to the quantization field, we recommend you to check out these be
3333
## When to use what?
3434

3535
Diffusers currently supports the following quantization methods.
36-
- [BitsandBytes]()
37-
- [TorchAO]()
38-
- [GGUF]()
36+
- [BitsandBytes](./bitsandbytes.md)
37+
- [TorchAO](./torchao.md)
38+
- [GGUF](./gguf.md)
3939

4040
[This resource](https://huggingface.co/docs/transformers/main/en/quantization/overview#when-to-use-what) provides a good overview of the pros and cons of different quantization techniques.

src/diffusers/loaders/single_file_utils.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,8 @@
151151
"animatediff_scribble": {"pretrained_model_name_or_path": "guoyww/animatediff-sparsectrl-scribble"},
152152
"animatediff_rgb": {"pretrained_model_name_or_path": "guoyww/animatediff-sparsectrl-rgb"},
153153
"flux-dev": {"pretrained_model_name_or_path": "black-forest-labs/FLUX.1-dev"},
154+
"flux-fill": {"pretrained_model_name_or_path": "black-forest-labs/FLUX.1-Fill-dev"},
155+
"flux-depth": {"pretrained_model_name_or_path": "black-forest-labs/FLUX.1-Depth-dev"},
154156
"flux-schnell": {"pretrained_model_name_or_path": "black-forest-labs/FLUX.1-schnell"},
155157
"ltx-video": {"pretrained_model_name_or_path": "Lightricks/LTX-Video"},
156158
"autoencoder-dc-f128c512": {"pretrained_model_name_or_path": "mit-han-lab/dc-ae-f128c512-mix-1.0-diffusers"},
@@ -587,7 +589,13 @@ def infer_diffusers_model_type(checkpoint):
587589
if any(
588590
g in checkpoint for g in ["guidance_in.in_layer.bias", "model.diffusion_model.guidance_in.in_layer.bias"]
589591
):
590-
model_type = "flux-dev"
592+
if checkpoint["img_in.weight"].shape[1] == 384:
593+
model_type = "flux-fill"
594+
595+
elif checkpoint["img_in.weight"].shape[1] == 128:
596+
model_type = "flux-depth"
597+
else:
598+
model_type = "flux-dev"
591599
else:
592600
model_type = "flux-schnell"
593601

src/diffusers/models/embeddings.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -691,7 +691,7 @@ def _get_positional_embeddings(
691691
output_type="pt",
692692
)
693693
pos_embedding = pos_embedding.flatten(0, 1)
694-
joint_pos_embedding = torch.zeros(
694+
joint_pos_embedding = pos_embedding.new_zeros(
695695
1, self.max_text_seq_length + num_patches, self.embed_dim, requires_grad=False
696696
)
697697
joint_pos_embedding.data[:, self.max_text_seq_length :].copy_(pos_embedding)

src/diffusers/pipelines/mochi/pipeline_mochi.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,7 @@ def __init__(
188188
text_encoder: T5EncoderModel,
189189
tokenizer: T5TokenizerFast,
190190
transformer: MochiTransformer3DModel,
191+
force_zeros_for_empty_prompt: bool = False,
191192
):
192193
super().__init__()
193194

@@ -205,10 +206,11 @@ def __init__(
205206

206207
self.video_processor = VideoProcessor(vae_scale_factor=self.vae_spatial_scale_factor)
207208
self.tokenizer_max_length = (
208-
self.tokenizer.model_max_length if hasattr(self, "tokenizer") and self.tokenizer is not None else 77
209+
self.tokenizer.model_max_length if hasattr(self, "tokenizer") and self.tokenizer is not None else 256
209210
)
210211
self.default_height = 480
211212
self.default_width = 848
213+
self.register_to_config(force_zeros_for_empty_prompt=force_zeros_for_empty_prompt)
212214

213215
def _get_t5_prompt_embeds(
214216
self,
@@ -236,7 +238,11 @@ def _get_t5_prompt_embeds(
236238
text_input_ids = text_inputs.input_ids
237239
prompt_attention_mask = text_inputs.attention_mask
238240
prompt_attention_mask = prompt_attention_mask.bool().to(device)
239-
if prompt == "" or prompt[-1] == "":
241+
242+
# The original Mochi implementation zeros out empty negative prompts
243+
# but this can lead to overflow when placing the entire pipeline under the autocast context
244+
# adding this here so that we can enable zeroing prompts if necessary
245+
if self.config.force_zeros_for_empty_prompt and (prompt == "" or prompt[-1] == ""):
240246
text_input_ids = torch.zeros_like(text_input_ids, device=device)
241247
prompt_attention_mask = torch.zeros_like(prompt_attention_mask, dtype=torch.bool, device=device)
242248

src/diffusers/schedulers/scheduling_deis_multistep.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -289,6 +289,7 @@ def set_timesteps(self, num_inference_steps: int, device: Union[str, torch.devic
289289
sigmas = 1.0 - alphas
290290
sigmas = np.flip(self.config.flow_shift * sigmas / (1 + (self.config.flow_shift - 1) * sigmas))[:-1].copy()
291291
timesteps = (sigmas * self.config.num_train_timesteps).copy()
292+
sigmas = np.concatenate([sigmas, sigmas[-1:]]).astype(np.float32)
292293
else:
293294
sigmas = np.interp(timesteps, np.arange(0, len(sigmas)), sigmas)
294295
sigma_last = ((1 - self.alphas_cumprod[0]) / self.alphas_cumprod[0]) ** 0.5

src/diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -291,14 +291,17 @@ def set_timesteps(self, num_inference_steps: int = None, device: Union[str, torc
291291
elif self.config.use_exponential_sigmas:
292292
sigmas = self._convert_to_exponential(in_sigmas=sigmas, num_inference_steps=num_inference_steps)
293293
timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas])
294+
sigmas = np.concatenate([sigmas, sigmas[-1:]]).astype(np.float32)
294295
elif self.config.use_beta_sigmas:
295296
sigmas = self._convert_to_beta(in_sigmas=sigmas, num_inference_steps=num_inference_steps)
296297
timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas])
298+
sigmas = np.concatenate([sigmas, sigmas[-1:]]).astype(np.float32)
297299
elif self.config.use_flow_sigmas:
298300
alphas = np.linspace(1, 1 / self.config.num_train_timesteps, num_inference_steps + 1)
299301
sigmas = 1.0 - alphas
300302
sigmas = np.flip(self.config.flow_shift * sigmas / (1 + (self.config.flow_shift - 1) * sigmas))[:-1].copy()
301303
timesteps = (sigmas * self.config.num_train_timesteps).copy()
304+
sigmas = np.concatenate([sigmas, sigmas[-1:]]).astype(np.float32)
302305
else:
303306
sigmas = np.interp(timesteps, np.arange(0, len(sigmas)), sigmas)
304307
sigma_max = (

src/diffusers/schedulers/scheduling_sasolver.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -318,6 +318,7 @@ def set_timesteps(self, num_inference_steps: int = None, device: Union[str, torc
318318
sigmas = 1.0 - alphas
319319
sigmas = np.flip(self.config.flow_shift * sigmas / (1 + (self.config.flow_shift - 1) * sigmas))[:-1].copy()
320320
timesteps = (sigmas * self.config.num_train_timesteps).copy()
321+
sigmas = np.concatenate([sigmas, sigmas[-1:]]).astype(np.float32)
321322
else:
322323
sigmas = np.interp(timesteps, np.arange(0, len(sigmas)), sigmas)
323324
sigma_last = ((1 - self.alphas_cumprod[0]) / self.alphas_cumprod[0]) ** 0.5

src/diffusers/schedulers/scheduling_unipc_multistep.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -381,6 +381,15 @@ def set_timesteps(self, num_inference_steps: int, device: Union[str, torch.devic
381381
sigmas = 1.0 - alphas
382382
sigmas = np.flip(self.config.flow_shift * sigmas / (1 + (self.config.flow_shift - 1) * sigmas))[:-1].copy()
383383
timesteps = (sigmas * self.config.num_train_timesteps).copy()
384+
if self.config.final_sigmas_type == "sigma_min":
385+
sigma_last = sigmas[-1]
386+
elif self.config.final_sigmas_type == "zero":
387+
sigma_last = 0
388+
else:
389+
raise ValueError(
390+
f"`final_sigmas_type` must be one of 'zero', or 'sigma_min', but got {self.config.final_sigmas_type}"
391+
)
392+
sigmas = np.concatenate([sigmas, [sigma_last]]).astype(np.float32)
384393
else:
385394
sigmas = np.interp(timesteps, np.arange(0, len(sigmas)), sigmas)
386395
if self.config.final_sigmas_type == "sigma_min":

tests/lora/test_lora_layers_cogvideox.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -29,17 +29,13 @@
2929
)
3030
from diffusers.utils.testing_utils import (
3131
floats_tensor,
32-
is_peft_available,
3332
is_torch_version,
3433
require_peft_backend,
3534
skip_mps,
3635
torch_device,
3736
)
3837

3938

40-
if is_peft_available():
41-
pass
42-
4339
sys.path.append(".")
4440

4541
from utils import PeftLoraLoaderMixinTests, check_if_lora_correctly_set # noqa: E402

0 commit comments

Comments
 (0)