From 51b01942e15e8630f8ba129e4b2f9346bb327a34 Mon Sep 17 00:00:00 2001 From: linoytsaban Date: Fri, 11 Oct 2024 11:06:15 +0300 Subject: [PATCH 01/10] make lora target modules configurable and change the default --- .../dreambooth/train_dreambooth_lora_flux.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/examples/dreambooth/train_dreambooth_lora_flux.py b/examples/dreambooth/train_dreambooth_lora_flux.py index fcc11386abcf..8e1f622e6abc 100644 --- a/examples/dreambooth/train_dreambooth_lora_flux.py +++ b/examples/dreambooth/train_dreambooth_lora_flux.py @@ -554,6 +554,13 @@ def parse_args(input_args=None): "--adam_weight_decay_text_encoder", type=float, default=1e-03, help="Weight decay to use for text_encoder" ) + parser.add_argument( + "--lora_blocks", + type=str, + default=None, + help=('The transformer modules to apply LoRA training on. Please specify the layers in a comma seperated. E.g. - "q_proj,k_proj,v_proj,out_proj" will result in lora training of attention layers only'), + ) + parser.add_argument( "--adam_epsilon", type=float, @@ -1188,12 +1195,18 @@ def main(args): if args.train_text_encoder: text_encoder_one.gradient_checkpointing_enable() - # now we will add new LoRA weights to the attention layers + if args.lora_blocks is not None: + target_modules = [block.strip() for block in args.lora_blocks.split(",")] + else: + target_modules = ["to_k", "to_q", "to_v", "to_out.0", + "add_k_proj", "add_q_proj", "add_v_proj", "to_add_out", "ff.net.0.proj","ff.net.2", "ff_context.net.0.proj","ff_context.net.2"] + + # now we will add new LoRA weights the transformer layers transformer_lora_config = LoraConfig( r=args.rank, lora_alpha=args.rank, init_lora_weights="gaussian", - target_modules=["to_k", "to_q", "to_v", "to_out.0"], + target_modules=target_modules, ) transformer.add_adapter(transformer_lora_config) if args.train_text_encoder: From ad37cdff01c41e1b9beb55a59b94d6532c92532e Mon Sep 17 00:00:00 2001 From: Linoy Date: Fri, 11 Oct 2024 09:05:58 +0000 Subject: [PATCH 02/10] style --- .../dreambooth/train_dreambooth_lora_flux.py | 20 ++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/examples/dreambooth/train_dreambooth_lora_flux.py b/examples/dreambooth/train_dreambooth_lora_flux.py index 8e1f622e6abc..1db05e8c71cc 100644 --- a/examples/dreambooth/train_dreambooth_lora_flux.py +++ b/examples/dreambooth/train_dreambooth_lora_flux.py @@ -558,7 +558,9 @@ def parse_args(input_args=None): "--lora_blocks", type=str, default=None, - help=('The transformer modules to apply LoRA training on. Please specify the layers in a comma seperated. E.g. - "q_proj,k_proj,v_proj,out_proj" will result in lora training of attention layers only'), + help=( + 'The transformer modules to apply LoRA training on. Please specify the layers in a comma seperated. E.g. - "q_proj,k_proj,v_proj,out_proj" will result in lora training of attention layers only' + ), ) parser.add_argument( @@ -1198,8 +1200,20 @@ def main(args): if args.lora_blocks is not None: target_modules = [block.strip() for block in args.lora_blocks.split(",")] else: - target_modules = ["to_k", "to_q", "to_v", "to_out.0", - "add_k_proj", "add_q_proj", "add_v_proj", "to_add_out", "ff.net.0.proj","ff.net.2", "ff_context.net.0.proj","ff_context.net.2"] + target_modules = [ + "to_k", + "to_q", + "to_v", + "to_out.0", + "add_k_proj", + "add_q_proj", + "add_v_proj", + "to_add_out", + "ff.net.0.proj", + "ff.net.2", + "ff_context.net.0.proj", + "ff_context.net.2", + ] # now we will add new LoRA weights the transformer layers transformer_lora_config = LoraConfig( From ff5511c1b5eaae222bcb756c33661e0be0a8fff3 Mon Sep 17 00:00:00 2001 From: linoytsaban Date: Mon, 14 Oct 2024 21:55:42 +0300 Subject: [PATCH 03/10] make lora target modules configurable and change the default --- .../dreambooth/train_dreambooth_lora_flux.py | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/examples/dreambooth/train_dreambooth_lora_flux.py b/examples/dreambooth/train_dreambooth_lora_flux.py index 1db05e8c71cc..7ac6717a9f23 100644 --- a/examples/dreambooth/train_dreambooth_lora_flux.py +++ b/examples/dreambooth/train_dreambooth_lora_flux.py @@ -555,11 +555,11 @@ def parse_args(input_args=None): ) parser.add_argument( - "--lora_blocks", + "--lora_layers", type=str, default=None, help=( - 'The transformer modules to apply LoRA training on. Please specify the layers in a comma seperated. E.g. - "q_proj,k_proj,v_proj,out_proj" will result in lora training of attention layers only' + 'The transformer modules to apply LoRA training on. Please specify the layers in a comma seperated. E.g. - "to_k,to_q,to_v,to_out.0" will result in lora training of attention layers only' ), ) @@ -1197,18 +1197,18 @@ def main(args): if args.train_text_encoder: text_encoder_one.gradient_checkpointing_enable() - if args.lora_blocks is not None: - target_modules = [block.strip() for block in args.lora_blocks.split(",")] + if args.lora_layers is not None: + target_modules = [layer.strip() for layer in args.lora_layers.split(",")] else: target_modules = [ - "to_k", - "to_q", - "to_v", - "to_out.0", - "add_k_proj", - "add_q_proj", - "add_v_proj", - "to_add_out", + "attn.to_k", + "attn.to_q", + "attn.to_v", + "attn.to_out.0", + "attn.add_k_proj", + "attn.add_q_proj", + "attn.add_v_proj", + "attn.to_add_out", "ff.net.0.proj", "ff.net.2", "ff_context.net.0.proj", From faa95afdae6c419d57d7d80b386b7c6eb1550234 Mon Sep 17 00:00:00 2001 From: linoytsaban Date: Tue, 15 Oct 2024 12:02:49 +0300 Subject: [PATCH 04/10] fix bug when using prodigy and training te --- examples/dreambooth/train_dreambooth_lora_flux.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/examples/dreambooth/train_dreambooth_lora_flux.py b/examples/dreambooth/train_dreambooth_lora_flux.py index 7ac6717a9f23..c5f9a4c3e859 100644 --- a/examples/dreambooth/train_dreambooth_lora_flux.py +++ b/examples/dreambooth/train_dreambooth_lora_flux.py @@ -1396,10 +1396,9 @@ def load_model_hook(models, input_dir): f" {args.text_encoder_lr} and learning_rate: {args.learning_rate}. " f"When using prodigy only learning_rate is used as the initial learning rate." ) - # changes the learning rate of text_encoder_parameters_one and text_encoder_parameters_two to be + # changes the learning rate of text_encoder_parameters_one to be # --learning_rate params_to_optimize[1]["lr"] = args.learning_rate - params_to_optimize[2]["lr"] = args.learning_rate optimizer = optimizer_class( params_to_optimize, From b17f9bf558fe72abbe407bc72a9fdbb96ac0c481 Mon Sep 17 00:00:00 2001 From: linoytsaban Date: Tue, 15 Oct 2024 15:13:06 +0300 Subject: [PATCH 05/10] fix mixed precision training as proposed in https://github.com/huggingface/diffusers/pull/9565 for full dreambooth as well --- examples/dreambooth/train_dreambooth_flux.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/examples/dreambooth/train_dreambooth_flux.py b/examples/dreambooth/train_dreambooth_flux.py index 8e0f4e09a461..e5f66c970c59 100644 --- a/examples/dreambooth/train_dreambooth_flux.py +++ b/examples/dreambooth/train_dreambooth_flux.py @@ -161,7 +161,7 @@ def log_validation( f"Running validation... \n Generating {args.num_validation_images} images with prompt:" f" {args.validation_prompt}." ) - pipeline = pipeline.to(accelerator.device, dtype=torch_dtype) + pipeline = pipeline.to(accelerator.device) pipeline.set_progress_bar_config(disable=True) # run inference @@ -1580,7 +1580,7 @@ def get_sigmas(timesteps, n_dim=4, dtype=torch.float32): ) # handle guidance - if transformer.config.guidance_embeds: + if accelerator.unwrap_model(transformer).config.guidance_embeds: guidance = torch.tensor([args.guidance_scale], device=accelerator.device) guidance = guidance.expand(model_input.shape[0]) else: @@ -1694,6 +1694,8 @@ def get_sigmas(timesteps, n_dim=4, dtype=torch.float32): # create pipeline if not args.train_text_encoder: text_encoder_one, text_encoder_two = load_text_encoders(text_encoder_cls_one, text_encoder_cls_two) + text_encoder_one.to(weight_dtype) + text_encoder_two.to(weight_dtype) else: # even when training the text encoder we're only training text encoder one text_encoder_two = text_encoder_cls_two.from_pretrained( args.pretrained_model_name_or_path, From 73b0e0f203b957f5d3f295393dbaaab115b5c1dd Mon Sep 17 00:00:00 2001 From: linoytsaban Date: Mon, 28 Oct 2024 15:20:29 +0200 Subject: [PATCH 06/10] add test and notes --- examples/dreambooth/README_flux.md | 15 ++++++++ .../dreambooth/test_dreambooth_lora_flux.py | 34 +++++++++++++++++++ 2 files changed, 49 insertions(+) diff --git a/examples/dreambooth/README_flux.md b/examples/dreambooth/README_flux.md index 69dfd241395b..a724ca53b927 100644 --- a/examples/dreambooth/README_flux.md +++ b/examples/dreambooth/README_flux.md @@ -170,6 +170,21 @@ accelerate launch train_dreambooth_lora_flux.py \ --push_to_hub ``` +### Target Modules +When LoRA was first adapted from language models to diffusion models, it was applied to the cross-attention layers in the Unet that relate the image representations with the prompts that describe them. +More recently, SOTA text-to-image diffusion models replaced the Unet with a diffusion Transformer(DiT). With this change, we may also want to explore +applying LoRA training onto different types of layers and blocks. To allow more flexibility and control over the targeted modules we added `--lora_layers`- in which you can specify in a comma seperated string +the exact modules for LoRA training. Here are some examples of target modules you can provide: +- for attention only layers: `--lora_layers="attn.to_k,attn.to_q,attn.to_v,attn.to_out.0"` +- to train the same modules as in the fal trainer: `--lora_layers="attn.to_k,attn.to_q,attn.to_v,attn.to_out.0,attn.add_k_proj,attn.add_q_proj,attn.add_v_proj,attn.to_add_out,ff.net.0.proj,ff.net.2,ff_context.net.0.proj,ff_context.net.2"` +- to train the same modules as in ostris ai-toolkit / replicate trainer: `--lora_blocks="attn.to_k,attn.to_q,attn.to_v,attn.to_out.0,attn.add_k_proj,attn.add_q_proj,attn.add_v_proj,attn.to_add_out,ff.net.0.proj,ff.net.2,ff_context.net.0.proj,ff_context.net.2,norm1_context.linear, norm1.linear,norm.linear,proj_mlp,proj_out"` +> [!NOTE] +> `--lora_layers` can also be used to specify which **blocks** to apply LoRA training to. To do so, simply add a block prefix to each layer in the comma seperated string: +> **single DiT blocks**: to target the ith single transformer block, add the prefix `single_transformer_blocks.i`, e.g. - `single_transformer_blocks.i.attn.to_k` +> **MMDiT blocks**: to target the ith MMDiT block, add the prefix `transformer_blocks.i`, e.g. - `transformer_blocks.i.attn.to_k` +> [!NOTE] +> keep in mind that while training more layers can improve quality and expressiveness, it also increases the size of the output LoRA weights. + ### Text Encoder Training Alongside the transformer, fine-tuning of the CLIP text encoder is also supported. diff --git a/examples/dreambooth/test_dreambooth_lora_flux.py b/examples/dreambooth/test_dreambooth_lora_flux.py index d197c8187b87..567b89d2a860 100644 --- a/examples/dreambooth/test_dreambooth_lora_flux.py +++ b/examples/dreambooth/test_dreambooth_lora_flux.py @@ -136,6 +136,40 @@ def test_dreambooth_lora_latent_caching(self): starts_with_transformer = all(key.startswith("transformer") for key in lora_state_dict.keys()) self.assertTrue(starts_with_transformer) + def test_dreambooth_lora_layers(self): + with tempfile.TemporaryDirectory() as tmpdir: + test_args = f""" + {self.script_path} + --pretrained_model_name_or_path {self.pretrained_model_name_or_path} + --instance_data_dir {self.instance_data_dir} + --instance_prompt {self.instance_prompt} + --resolution 64 + --train_batch_size 1 + --gradient_accumulation_steps 1 + --max_train_steps 2 + --cache_latents + --learning_rate 5.0e-04 + --scale_lr + --lora_layers single_transformer_blocks.0.attn.to_k + --lr_scheduler constant + --lr_warmup_steps 0 + --output_dir {tmpdir} + """.split() + + run_command(self._launch_args + test_args) + # save_pretrained smoke test + self.assertTrue(os.path.isfile(os.path.join(tmpdir, "pytorch_lora_weights.safetensors"))) + + # make sure the state_dict has the correct naming in the parameters. + lora_state_dict = safetensors.torch.load_file(os.path.join(tmpdir, "pytorch_lora_weights.safetensors")) + is_lora = all("lora" in k for k in lora_state_dict.keys()) + self.assertTrue(is_lora) + + # when not training the text encoder, all the parameters in the state dict should start + # with `"transformer"` in their names. + starts_with_transformer = all(key.startswith("transformer.single_transformer_blocks.0.attn.to_k") for key in lora_state_dict.keys()) + self.assertTrue(starts_with_transformer) + def test_dreambooth_lora_flux_checkpointing_checkpoints_total_limit(self): with tempfile.TemporaryDirectory() as tmpdir: test_args = f""" From 8c18e1e5f881d0ab823e3ea4b37df353abae33b9 Mon Sep 17 00:00:00 2001 From: Linoy Date: Mon, 28 Oct 2024 13:22:35 +0000 Subject: [PATCH 07/10] style --- examples/dreambooth/test_dreambooth_lora_flux.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/examples/dreambooth/test_dreambooth_lora_flux.py b/examples/dreambooth/test_dreambooth_lora_flux.py index 567b89d2a860..f5660b4fd872 100644 --- a/examples/dreambooth/test_dreambooth_lora_flux.py +++ b/examples/dreambooth/test_dreambooth_lora_flux.py @@ -167,7 +167,9 @@ def test_dreambooth_lora_layers(self): # when not training the text encoder, all the parameters in the state dict should start # with `"transformer"` in their names. - starts_with_transformer = all(key.startswith("transformer.single_transformer_blocks.0.attn.to_k") for key in lora_state_dict.keys()) + starts_with_transformer = all( + key.startswith("transformer.single_transformer_blocks.0.attn.to_k") for key in lora_state_dict.keys() + ) self.assertTrue(starts_with_transformer) def test_dreambooth_lora_flux_checkpointing_checkpoints_total_limit(self): From 4f034b9b570623a7e3f9707baa7a2f95871989fd Mon Sep 17 00:00:00 2001 From: linoytsaban Date: Mon, 28 Oct 2024 16:10:03 +0200 Subject: [PATCH 08/10] address sayaks comments --- examples/dreambooth/test_dreambooth_lora_flux.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/examples/dreambooth/test_dreambooth_lora_flux.py b/examples/dreambooth/test_dreambooth_lora_flux.py index f5660b4fd872..9083885ef05a 100644 --- a/examples/dreambooth/test_dreambooth_lora_flux.py +++ b/examples/dreambooth/test_dreambooth_lora_flux.py @@ -37,7 +37,7 @@ class DreamBoothLoRAFlux(ExamplesTestsAccelerate): instance_prompt = "photo" pretrained_model_name_or_path = "hf-internal-testing/tiny-flux-pipe" script_path = "examples/dreambooth/train_dreambooth_lora_flux.py" - + transformer_layer_type = "single_transformer_blocks.0.attn.to_k" def test_dreambooth_lora_flux(self): with tempfile.TemporaryDirectory() as tmpdir: test_args = f""" @@ -150,7 +150,7 @@ def test_dreambooth_lora_layers(self): --cache_latents --learning_rate 5.0e-04 --scale_lr - --lora_layers single_transformer_blocks.0.attn.to_k + --lora_layers {transformer_layer_type} --lr_scheduler constant --lr_warmup_steps 0 --output_dir {tmpdir} @@ -166,7 +166,8 @@ def test_dreambooth_lora_layers(self): self.assertTrue(is_lora) # when not training the text encoder, all the parameters in the state dict should start - # with `"transformer"` in their names. + # with `"transformer"` in their names. In this test, we only params of + # transformer.single_transformer_blocks.0.attn.to_k should be in the state dict starts_with_transformer = all( key.startswith("transformer.single_transformer_blocks.0.attn.to_k") for key in lora_state_dict.keys() ) From 2e3a7a1d8514aa27d6c3303dc995bd8d1dadee0a Mon Sep 17 00:00:00 2001 From: Linoy Date: Mon, 28 Oct 2024 14:11:14 +0000 Subject: [PATCH 09/10] style --- examples/dreambooth/test_dreambooth_lora_flux.py | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/dreambooth/test_dreambooth_lora_flux.py b/examples/dreambooth/test_dreambooth_lora_flux.py index 9083885ef05a..8544b53966c9 100644 --- a/examples/dreambooth/test_dreambooth_lora_flux.py +++ b/examples/dreambooth/test_dreambooth_lora_flux.py @@ -38,6 +38,7 @@ class DreamBoothLoRAFlux(ExamplesTestsAccelerate): pretrained_model_name_or_path = "hf-internal-testing/tiny-flux-pipe" script_path = "examples/dreambooth/train_dreambooth_lora_flux.py" transformer_layer_type = "single_transformer_blocks.0.attn.to_k" + def test_dreambooth_lora_flux(self): with tempfile.TemporaryDirectory() as tmpdir: test_args = f""" From 7c533aee63b939cb379d01522dad9024f56189e7 Mon Sep 17 00:00:00 2001 From: linoytsaban Date: Mon, 28 Oct 2024 16:29:10 +0200 Subject: [PATCH 10/10] fix test --- examples/dreambooth/test_dreambooth_lora_flux.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/dreambooth/test_dreambooth_lora_flux.py b/examples/dreambooth/test_dreambooth_lora_flux.py index 8544b53966c9..a76825e29448 100644 --- a/examples/dreambooth/test_dreambooth_lora_flux.py +++ b/examples/dreambooth/test_dreambooth_lora_flux.py @@ -151,7 +151,7 @@ def test_dreambooth_lora_layers(self): --cache_latents --learning_rate 5.0e-04 --scale_lr - --lora_layers {transformer_layer_type} + --lora_layers {self.transformer_layer_type} --lr_scheduler constant --lr_warmup_steps 0 --output_dir {tmpdir}