Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion examples/cogvideo/train_cogvideox_image_to_video_lora.py
Original file line number Diff line number Diff line change
Expand Up @@ -555,7 +555,7 @@ def _load_dataset_from_local_path(self):

if any(not path.is_file() for path in instance_videos):
raise ValueError(
"Expected '--video_column' to be a path to a file in `--instance_data_root` containing line-separated paths to video data but found atleast one path that is not a valid file."
"Expected '--video_column' to be a path to a file in `--instance_data_root` containing line-separated paths to video data but found at least one path that is not a valid file."
)

return instance_prompts, instance_videos
Expand Down
2 changes: 1 addition & 1 deletion examples/cogvideo/train_cogvideox_lora.py
Original file line number Diff line number Diff line change
Expand Up @@ -539,7 +539,7 @@ def _load_dataset_from_local_path(self):

if any(not path.is_file() for path in instance_videos):
raise ValueError(
"Expected '--video_column' to be a path to a file in `--instance_data_root` containing line-separated paths to video data but found atleast one path that is not a valid file."
"Expected '--video_column' to be a path to a file in `--instance_data_root` containing line-separated paths to video data but found at least one path that is not a valid file."
)

return instance_prompts, instance_videos
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

Please note that this project is not actively maintained. However, you can open an issue and tag @gzguevara.

[DreamBooth](https://arxiv.org/abs/2208.12242) is a method to personalize text2image models like stable diffusion given just a few(3~5) images of a subject. This project consists of **two parts**. Training Stable Diffusion for inpainting requieres prompt-image-mask pairs. The Unet of inpainiting models have 5 additional input channels (4 for the encoded masked-image and 1 for the mask itself).
[DreamBooth](https://arxiv.org/abs/2208.12242) is a method to personalize text2image models like stable diffusion given just a few(3~5) images of a subject. This project consists of **two parts**. Training Stable Diffusion for inpainting requires prompt-image-mask pairs. The Unet of inpainting models have 5 additional input channels (4 for the encoded masked-image and 1 for the mask itself).

**The first part**, the `multi_inpaint_dataset.ipynb` notebook, demonstrates how make a 🤗 dataset of prompt-image-mask pairs. You can, however, skip the first part and move straight to the second part with the example datasets in this project. ([cat toy dataset masked](https://huggingface.co/datasets/gzguevara/cat_toy_masked), [mr. potato head dataset masked](https://huggingface.co/datasets/gzguevara/mr_potato_head_masked))

Expand Down Expand Up @@ -73,7 +73,7 @@ accelerate launch train_multi_subject_dreambooth_inpaint.py \

## 3. Results

A [![Weights & Biases](https://img.shields.io/badge/Weights%20&%20Biases-Report-blue)](https://wandb.ai/gzguevara/uncategorized/reports/Multi-Subject-Dreambooth-for-Inpainting--Vmlldzo2MzY5NDQ4?accessToken=y0nya2d7baguhbryxaikbfr1203amvn1jsmyl07vk122mrs7tnph037u1nqgse8t) is provided showing the training progress by every 50 steps. Note, the reported weights & baises run was performed on a A100 GPU with the following stetting:
A [![Weights & Biases](https://img.shields.io/badge/Weights%20&%20Biases-Report-blue)](https://wandb.ai/gzguevara/uncategorized/reports/Multi-Subject-Dreambooth-for-Inpainting--Vmlldzo2MzY5NDQ4?accessToken=y0nya2d7baguhbryxaikbfr1203amvn1jsmyl07vk122mrs7tnph037u1nqgse8t) is provided showing the training progress by every 50 steps. Note, the reported weights & biases run was performed on a A100 GPU with the following stetting:

```bash
accelerate launch train_multi_subject_dreambooth_inpaint.py \
Expand Down
2 changes: 1 addition & 1 deletion src/diffusers/hooks/faster_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ class FasterCacheConfig:
alpha_low_frequency: float = 1.1
alpha_high_frequency: float = 1.1

# n as described in CFG-Cache explanation in the paper - dependant on the model
# n as described in CFG-Cache explanation in the paper - dependent on the model
unconditional_batch_skip_range: int = 5
unconditional_batch_timestep_skip_range: Tuple[int, int] = (-1, 641)

Expand Down
6 changes: 3 additions & 3 deletions src/diffusers/hooks/hooks.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,9 @@ def initialize_hook(self, module: torch.nn.Module) -> torch.nn.Module:
"""
return module

def deinitalize_hook(self, module: torch.nn.Module) -> torch.nn.Module:
def deinitialize_hook(self, module: torch.nn.Module) -> torch.nn.Module:
r"""
Hook that is executed when a model is deinitalized.
Hook that is executed when a model is deinitialized.

Args:
module (`torch.nn.Module`):
Expand Down Expand Up @@ -192,7 +192,7 @@ def remove_hook(self, name: str, recurse: bool = True) -> None:
else:
self._fn_refs[index + 1].forward = old_forward

self._module_ref = hook.deinitalize_hook(self._module_ref)
self._module_ref = hook.deinitialize_hook(self._module_ref)
del self.hooks[name]
self._hook_order.pop(index)
self._fn_refs.pop(index)
Expand Down
4 changes: 2 additions & 2 deletions src/diffusers/hooks/layerwise_casting.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,9 +60,9 @@ def initialize_hook(self, module: torch.nn.Module):
module.to(dtype=self.storage_dtype, non_blocking=self.non_blocking)
return module

def deinitalize_hook(self, module: torch.nn.Module):
def deinitialize_hook(self, module: torch.nn.Module):
raise NotImplementedError(
"LayerwiseCastingHook does not support deinitalization. A model once enabled with layerwise casting will "
"LayerwiseCastingHook does not support deinitialization. A model once enabled with layerwise casting will "
"have casted its weights to a lower precision dtype for storage. Casting this back to the original dtype "
"will lead to precision loss, which might have an impact on the model's generation quality. The model should "
"be re-initialized and loaded in the original dtype."
Expand Down
2 changes: 1 addition & 1 deletion src/diffusers/loaders/peft.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,7 @@ def load_lora_adapter(

rank = {}
for key, val in state_dict.items():
# Cannot figure out rank from lora layers that don't have atleast 2 dimensions.
# Cannot figure out rank from lora layers that don't have at least 2 dimensions.
# Bias layers in LoRA only have a single dimension
if "lora_B" in key and val.ndim > 1:
# Check out https://github.com/huggingface/peft/pull/2419 for the `^` symbol.
Expand Down
2 changes: 1 addition & 1 deletion src/diffusers/models/autoencoders/autoencoder_kl.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ class AutoencoderKL(ModelMixin, ConfigMixin, FromOriginalModelMixin, PeftAdapter
Synthesis with Latent Diffusion Models](https://arxiv.org/abs/2112.10752) paper.
force_upcast (`bool`, *optional*, default to `True`):
If enabled it will force the VAE to run in float32 for high image resolution pipelines, such as SD-XL. VAE
can be fine-tuned / trained to a lower range without loosing too much precision in which case
can be fine-tuned / trained to a lower range without losing too much precision in which case
`force_upcast` can be set to `False` - see: https://huggingface.co/madebyollin/sdxl-vae-fp16-fix
mid_block_add_attention (`bool`, *optional*, default to `True`):
If enabled, the mid_block of the Encoder and Decoder will have attention blocks. If set to false, the
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -715,7 +715,7 @@ class AutoencoderKLAllegro(ModelMixin, ConfigMixin):
Synthesis with Latent Diffusion Models](https://arxiv.org/abs/2112.10752) paper.
force_upcast (`bool`, default to `True`):
If enabled it will force the VAE to run in float32 for high image resolution pipelines, such as SD-XL. VAE
can be fine-tuned / trained to a lower range without loosing too much precision in which case
can be fine-tuned / trained to a lower range without losing too much precision in which case
`force_upcast` can be set to `False` - see: https://huggingface.co/madebyollin/sdxl-vae-fp16-fix
"""

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -983,7 +983,7 @@ class AutoencoderKLCogVideoX(ModelMixin, ConfigMixin, FromOriginalModelMixin):
Synthesis with Latent Diffusion Models](https://arxiv.org/abs/2112.10752) paper.
force_upcast (`bool`, *optional*, default to `True`):
If enabled it will force the VAE to run in float32 for high image resolution pipelines, such as SD-XL. VAE
can be fine-tuned / trained to a lower range without loosing too much precision in which case
can be fine-tuned / trained to a lower range without losing too much precision in which case
`force_upcast` can be set to `False` - see: https://huggingface.co/madebyollin/sdxl-vae-fp16-fix
"""

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ class AutoencoderKLTemporalDecoder(ModelMixin, ConfigMixin):
Synthesis with Latent Diffusion Models](https://arxiv.org/abs/2112.10752) paper.
force_upcast (`bool`, *optional*, default to `True`):
If enabled it will force the VAE to run in float32 for high image resolution pipelines, such as SD-XL. VAE
can be fine-tuned / trained to a lower range without loosing too much precision in which case
can be fine-tuned / trained to a lower range without losing too much precision in which case
`force_upcast` can be set to `False` - see: https://huggingface.co/madebyollin/sdxl-vae-fp16-fix
"""

Expand Down
2 changes: 1 addition & 1 deletion src/diffusers/pipelines/consisid/consisid_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ def process_face_embeddings(
raise RuntimeError("facexlib align face fail")
align_face = face_helper_1.cropped_faces[0] # (512, 512, 3) # RGB

# incase insightface didn't detect face
# in case insightface didn't detect face
if id_ante_embedding is None:
logger.warning("Failed to detect face using insightface. Extracting embedding with align face")
id_ante_embedding = face_helper_2.get_feat(align_face)
Expand Down
4 changes: 2 additions & 2 deletions tests/lora/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1090,7 +1090,7 @@ def test_simple_inference_with_text_denoiser_block_scale(self):
def test_simple_inference_with_text_denoiser_multi_adapter_block_lora(self):
"""
Tests a simple inference with lora attached to text encoder and unet, attaches
multiple adapters and set differnt weights for different blocks (i.e. block lora)
multiple adapters and set different weights for different blocks (i.e. block lora)
"""
for scheduler_cls in self.scheduler_classes:
components, text_lora_config, denoiser_lora_config = self.get_dummy_components(scheduler_cls)
Expand Down Expand Up @@ -1638,7 +1638,7 @@ def test_simple_inference_with_text_lora_denoiser_fused_multi(

pipe.fuse_lora(components=self.pipeline_class._lora_loadable_modules, adapter_names=["adapter-1"])

# Fusing should still keep the LoRA layers so outpout should remain the same
# Fusing should still keep the LoRA layers so output should remain the same
outputs_lora_1_fused = pipe(**inputs, generator=torch.manual_seed(0))[0]

self.assertTrue(
Expand Down
2 changes: 1 addition & 1 deletion tests/pipelines/cogvideo/test_cogvideox_image2video.py
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,7 @@ def test_vae_tiling(self, expected_diff_max: float = 0.3):
generator_device = "cpu"
components = self.get_dummy_components()

# The reason to modify it this way is because I2V Transformer limits the generation to resolutions used during initalization.
# The reason to modify it this way is because I2V Transformer limits the generation to resolutions used during initialization.
# This limitation comes from using learned positional embeddings which cannot be generated on-the-fly like sincos or RoPE embeddings.
# See the if-statement on "self.use_learned_positional_embeddings" in diffusers/models/embeddings.py
components["transformer"] = CogVideoXTransformer3DModel.from_config(
Expand Down
2 changes: 1 addition & 1 deletion tests/pipelines/consisid/test_consisid.py
Original file line number Diff line number Diff line change
Expand Up @@ -279,7 +279,7 @@ def test_vae_tiling(self, expected_diff_max: float = 0.4):
generator_device = "cpu"
components = self.get_dummy_components()

# The reason to modify it this way is because ConsisID Transformer limits the generation to resolutions used during initalization.
# The reason to modify it this way is because ConsisID Transformer limits the generation to resolutions used during initialization.
# This limitation comes from using learned positional embeddings which cannot be generated on-the-fly like sincos or RoPE embeddings.
# See the if-statement on "self.use_learned_positional_embeddings" in diffusers/models/embeddings.py
components["transformer"] = ConsisIDTransformer3DModel.from_config(
Expand Down
2 changes: 1 addition & 1 deletion tests/pipelines/kolors/test_kolors_img2img.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,6 @@ def test_inference_batch_single_identical(self):
def test_float16_inference(self):
super().test_float16_inference(expected_max_diff=7e-2)

@unittest.skip("Test not supported because kolors img2img doesn't take pooled embeds as inputs unline kolors t2i.")
@unittest.skip("Test not supported because kolors img2img doesn't take pooled embeds as inputs unlike kolors t2i.")
def test_encode_prompt_works_in_isolation(self):
pass
2 changes: 1 addition & 1 deletion tests/pipelines/pag/test_pag_pixart_sigma.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,7 +254,7 @@ def test_attention_slicing_forward_pass(
assert_mean_pixel_difference(to_np(output_with_slicing1[0]), to_np(output_without_slicing[0]))
assert_mean_pixel_difference(to_np(output_with_slicing2[0]), to_np(output_without_slicing[0]))

# Because we have `pag_applied_layers` we cannot direcly apply
# Because we have `pag_applied_layers` we cannot directly apply
# `set_default_attn_processor`
def test_dict_tuple_outputs_equivalent(self, expected_slice=None, expected_max_difference=1e-4):
components = self.get_dummy_components()
Expand Down
2 changes: 1 addition & 1 deletion tests/pipelines/stable_unclip/test_stable_unclip.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,7 @@ def test_stable_unclip(self):
pipe.enable_sequential_cpu_offload()

generator = torch.Generator(device="cpu").manual_seed(0)
output = pipe("anime turle", generator=generator, output_type="np")
output = pipe("anime turtle", generator=generator, output_type="np")

image = output.images[0]

Expand Down
4 changes: 2 additions & 2 deletions tests/pipelines/stable_unclip/test_stable_unclip_img2img.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,7 @@ def test_stable_unclip_l_img2img(self):
pipe.enable_sequential_cpu_offload()

generator = torch.Generator(device="cpu").manual_seed(0)
output = pipe(input_image, "anime turle", generator=generator, output_type="np")
output = pipe(input_image, "anime turtle", generator=generator, output_type="np")

image = output.images[0]

Expand All @@ -273,7 +273,7 @@ def test_stable_unclip_h_img2img(self):
pipe.enable_sequential_cpu_offload()

generator = torch.Generator(device="cpu").manual_seed(0)
output = pipe(input_image, "anime turle", generator=generator, output_type="np")
output = pipe(input_image, "anime turtle", generator=generator, output_type="np")

image = output.images[0]

Expand Down
10 changes: 5 additions & 5 deletions tests/pipelines/test_pipelines_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -2096,11 +2096,11 @@ def test_encode_prompt_works_in_isolation(self, extra_required_param_value_dict=
with torch.no_grad():
encoded_prompt_outputs = pipe_with_just_text_encoder.encode_prompt(**encode_prompt_inputs)

# Programatically determine the reutrn names of `encode_prompt.`
ast_vistor = ReturnNameVisitor()
encode_prompt_tree = ast_vistor.get_ast_tree(cls=self.pipeline_class)
ast_vistor.visit(encode_prompt_tree)
prompt_embed_kwargs = ast_vistor.return_names
# Programmatically determine the return names of `encode_prompt.`
ast_visitor = ReturnNameVisitor()
encode_prompt_tree = ast_visitor.get_ast_tree(cls=self.pipeline_class)
ast_visitor.visit(encode_prompt_tree)
prompt_embed_kwargs = ast_visitor.return_names
prompt_embeds_kwargs = dict(zip(prompt_embed_kwargs, encoded_prompt_outputs))

# Pack the outputs of `encode_prompt`.
Expand Down
2 changes: 1 addition & 1 deletion tests/quantization/bnb/test_4bit.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,7 @@ def test_model_memory_usage(self):

def test_original_dtype(self):
r"""
A simple test to check if the model succesfully stores the original dtype
A simple test to check if the model successfully stores the original dtype
"""
self.assertTrue("_pre_quantization_dtype" in self.model_4bit.config)
self.assertFalse("_pre_quantization_dtype" in self.model_fp16.config)
Expand Down
2 changes: 1 addition & 1 deletion tests/quantization/bnb/test_mixed_int8.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,7 @@ def test_model_memory_usage(self):

def test_original_dtype(self):
r"""
A simple test to check if the model succesfully stores the original dtype
A simple test to check if the model successfully stores the original dtype
"""
self.assertTrue("_pre_quantization_dtype" in self.model_8bit.config)
self.assertFalse("_pre_quantization_dtype" in self.model_fp16.config)
Expand Down
2 changes: 1 addition & 1 deletion tests/single_file/test_model_autoencoder_dc_single_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ def test_single_file_in_type_variant_components(self):
# `in` variant checkpoints require passing in a `config` parameter
# in order to set the scaling factor correctly.
# `in` and `mix` variants have the same keys and we cannot automatically infer a scaling factor.
# We default to using teh `mix` config
# We default to using the `mix` config
repo_id = "mit-han-lab/dc-ae-f128c512-in-1.0-diffusers"
ckpt_path = "https://huggingface.co/mit-han-lab/dc-ae-f128c512-in-1.0/blob/main/model.safetensors"

Expand Down
2 changes: 1 addition & 1 deletion utils/custom_init_isort.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,7 +252,7 @@ def sort_imports(file: str, check_only: bool = True):
code, start_prompt="_import_structure = {", end_prompt="if TYPE_CHECKING:"
)

# We ignore block 0 (everything untils start_prompt) and the last block (everything after end_prompt).
# We ignore block 0 (everything until start_prompt) and the last block (everything after end_prompt).
for block_idx in range(1, len(main_blocks) - 1):
# Check if the block contains some `_import_structure`s thingy to sort.
block = main_blocks[block_idx]
Expand Down