From bca516db733e3977d693c15f041153fe7ec07299 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Tue, 29 Oct 2024 19:03:03 +0700 Subject: [PATCH 1/2] use the lr when using 8bit adam. --- .../train_dreambooth_lora_flux_advanced.py | 1 + .../train_dreambooth_lora_sd15_advanced.py | 1 + .../train_dreambooth_lora_sdxl_advanced.py | 1 + examples/dreambooth/train_dreambooth_flux.py | 1 + examples/dreambooth/train_dreambooth_lora_flux.py | 1 + examples/dreambooth/train_dreambooth_lora_sd3.py | 1 + examples/dreambooth/train_dreambooth_lora_sdxl.py | 1 + examples/dreambooth/train_dreambooth_sd3.py | 1 + .../train_dreambooth_lora_flux_miniature.py | 1 + .../dreambooth/train_dreambooth_lora_sdxl.py | 1 + .../sd3_lora_colab/train_dreambooth_lora_sd3_miniature.py | 1 + 11 files changed, 11 insertions(+) diff --git a/examples/advanced_diffusion_training/train_dreambooth_lora_flux_advanced.py b/examples/advanced_diffusion_training/train_dreambooth_lora_flux_advanced.py index 92d296c0f1e8..9cd26ee6893f 100644 --- a/examples/advanced_diffusion_training/train_dreambooth_lora_flux_advanced.py +++ b/examples/advanced_diffusion_training/train_dreambooth_lora_flux_advanced.py @@ -1841,6 +1841,7 @@ def load_model_hook(models, input_dir): optimizer_class = torch.optim.AdamW optimizer = optimizer_class( params_to_optimize, + lr=args.learning_rate, betas=(args.adam_beta1, args.adam_beta2), weight_decay=args.adam_weight_decay, eps=args.adam_epsilon, diff --git a/examples/advanced_diffusion_training/train_dreambooth_lora_sd15_advanced.py b/examples/advanced_diffusion_training/train_dreambooth_lora_sd15_advanced.py index 024722536d88..d9c4ca9b15e6 100644 --- a/examples/advanced_diffusion_training/train_dreambooth_lora_sd15_advanced.py +++ b/examples/advanced_diffusion_training/train_dreambooth_lora_sd15_advanced.py @@ -1394,6 +1394,7 @@ def load_model_hook(models, input_dir): optimizer = optimizer_class( params_to_optimize, + lr=args.learning_rate, betas=(args.adam_beta1, args.adam_beta2), weight_decay=args.adam_weight_decay, eps=args.adam_epsilon, diff --git a/examples/advanced_diffusion_training/train_dreambooth_lora_sdxl_advanced.py b/examples/advanced_diffusion_training/train_dreambooth_lora_sdxl_advanced.py index bc06cc9213dc..5f9513210193 100644 --- a/examples/advanced_diffusion_training/train_dreambooth_lora_sdxl_advanced.py +++ b/examples/advanced_diffusion_training/train_dreambooth_lora_sdxl_advanced.py @@ -1764,6 +1764,7 @@ def load_model_hook(models, input_dir): optimizer = optimizer_class( params_to_optimize, + lr=args.learning_rate, betas=(args.adam_beta1, args.adam_beta2), weight_decay=args.adam_weight_decay, eps=args.adam_epsilon, diff --git a/examples/dreambooth/train_dreambooth_flux.py b/examples/dreambooth/train_dreambooth_flux.py index f720afef6542..80a0af2436a2 100644 --- a/examples/dreambooth/train_dreambooth_flux.py +++ b/examples/dreambooth/train_dreambooth_flux.py @@ -1262,6 +1262,7 @@ def load_model_hook(models, input_dir): optimizer = optimizer_class( params_to_optimize, + lr=args.learning_rate, betas=(args.adam_beta1, args.adam_beta2), weight_decay=args.adam_weight_decay, eps=args.adam_epsilon, diff --git a/examples/dreambooth/train_dreambooth_lora_flux.py b/examples/dreambooth/train_dreambooth_lora_flux.py index b6e657234850..472bda5fea72 100644 --- a/examples/dreambooth/train_dreambooth_lora_flux.py +++ b/examples/dreambooth/train_dreambooth_lora_flux.py @@ -1371,6 +1371,7 @@ def load_model_hook(models, input_dir): optimizer = optimizer_class( params_to_optimize, + lr=args.learning_rate, betas=(args.adam_beta1, args.adam_beta2), weight_decay=args.adam_weight_decay, eps=args.adam_epsilon, diff --git a/examples/dreambooth/train_dreambooth_lora_sd3.py b/examples/dreambooth/train_dreambooth_lora_sd3.py index fc3c69b8901f..547c7948da93 100644 --- a/examples/dreambooth/train_dreambooth_lora_sd3.py +++ b/examples/dreambooth/train_dreambooth_lora_sd3.py @@ -1438,6 +1438,7 @@ def load_model_hook(models, input_dir): optimizer = optimizer_class( params_to_optimize, + lr=args.learning_rate, betas=(args.adam_beta1, args.adam_beta2), weight_decay=args.adam_weight_decay, eps=args.adam_epsilon, diff --git a/examples/dreambooth/train_dreambooth_lora_sdxl.py b/examples/dreambooth/train_dreambooth_lora_sdxl.py index bf8c8f7d0578..a3be9f7cacfb 100644 --- a/examples/dreambooth/train_dreambooth_lora_sdxl.py +++ b/examples/dreambooth/train_dreambooth_lora_sdxl.py @@ -1372,6 +1372,7 @@ def load_model_hook(models, input_dir): optimizer = optimizer_class( params_to_optimize, + lr=args.learning_rate, betas=(args.adam_beta1, args.adam_beta2), weight_decay=args.adam_weight_decay, eps=args.adam_epsilon, diff --git a/examples/dreambooth/train_dreambooth_sd3.py b/examples/dreambooth/train_dreambooth_sd3.py index 5d10345304ab..4c1244104fd4 100644 --- a/examples/dreambooth/train_dreambooth_sd3.py +++ b/examples/dreambooth/train_dreambooth_sd3.py @@ -1297,6 +1297,7 @@ def load_model_hook(models, input_dir): optimizer = optimizer_class( params_to_optimize, + lr=args.learning_rate, betas=(args.adam_beta1, args.adam_beta2), weight_decay=args.adam_weight_decay, eps=args.adam_epsilon, diff --git a/examples/research_projects/flux_lora_quantization/train_dreambooth_lora_flux_miniature.py b/examples/research_projects/flux_lora_quantization/train_dreambooth_lora_flux_miniature.py index fd2b5568d6d8..fd774feacbf3 100644 --- a/examples/research_projects/flux_lora_quantization/train_dreambooth_lora_flux_miniature.py +++ b/examples/research_projects/flux_lora_quantization/train_dreambooth_lora_flux_miniature.py @@ -848,6 +848,7 @@ def load_model_hook(models, input_dir): optimizer = optimizer_class( params_to_optimize, + lr=args.learning_rate, betas=(args.adam_beta1, args.adam_beta2), weight_decay=args.adam_weight_decay, eps=args.adam_epsilon, diff --git a/examples/research_projects/scheduled_huber_loss_training/dreambooth/train_dreambooth_lora_sdxl.py b/examples/research_projects/scheduled_huber_loss_training/dreambooth/train_dreambooth_lora_sdxl.py index d16780131139..33d72417082c 100644 --- a/examples/research_projects/scheduled_huber_loss_training/dreambooth/train_dreambooth_lora_sdxl.py +++ b/examples/research_projects/scheduled_huber_loss_training/dreambooth/train_dreambooth_lora_sdxl.py @@ -1445,6 +1445,7 @@ def load_model_hook(models, input_dir): optimizer = optimizer_class( params_to_optimize, + lr=args.learning_rate, betas=(args.adam_beta1, args.adam_beta2), weight_decay=args.adam_weight_decay, eps=args.adam_epsilon, diff --git a/examples/research_projects/sd3_lora_colab/train_dreambooth_lora_sd3_miniature.py b/examples/research_projects/sd3_lora_colab/train_dreambooth_lora_sd3_miniature.py index 163ff8f08931..970e329a628d 100644 --- a/examples/research_projects/sd3_lora_colab/train_dreambooth_lora_sd3_miniature.py +++ b/examples/research_projects/sd3_lora_colab/train_dreambooth_lora_sd3_miniature.py @@ -839,6 +839,7 @@ def load_model_hook(models, input_dir): optimizer = optimizer_class( params_to_optimize, + lr=args.learning_rate, betas=(args.adam_beta1, args.adam_beta2), weight_decay=args.adam_weight_decay, eps=args.adam_epsilon, From 100b2cc838b030f5aedccffe5a92defafab66bb2 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Wed, 30 Oct 2024 10:28:50 +0530 Subject: [PATCH 2/2] remove lr as we pack it in params_to_optimize. --- .../train_dreambooth_lora_flux_advanced.py | 15 +++------------ .../train_dreambooth_lora_sd15_advanced.py | 7 +------ .../train_dreambooth_lora_sdxl_advanced.py | 2 -- .../train_cogvideox_image_to_video_lora.py | 1 - examples/cogvideo/train_cogvideox_lora.py | 1 - examples/dreambooth/train_dreambooth_flux.py | 7 +------ examples/dreambooth/train_dreambooth_lora_flux.py | 7 +------ examples/dreambooth/train_dreambooth_lora_sd3.py | 2 -- examples/dreambooth/train_dreambooth_lora_sdxl.py | 2 -- examples/dreambooth/train_dreambooth_sd3.py | 2 -- .../train_dreambooth_lora_flux_miniature.py | 2 -- .../dreambooth/train_dreambooth_lora_sdxl.py | 2 -- .../train_dreambooth_lora_sd3_miniature.py | 1 - 13 files changed, 6 insertions(+), 45 deletions(-) diff --git a/examples/advanced_diffusion_training/train_dreambooth_lora_flux_advanced.py b/examples/advanced_diffusion_training/train_dreambooth_lora_flux_advanced.py index 9cd26ee6893f..bf726e65c94b 100644 --- a/examples/advanced_diffusion_training/train_dreambooth_lora_flux_advanced.py +++ b/examples/advanced_diffusion_training/train_dreambooth_lora_flux_advanced.py @@ -1778,15 +1778,10 @@ def load_model_hook(models, input_dir): if not args.enable_t5_ti: # pure textual inversion - only clip if pure_textual_inversion: - params_to_optimize = [ - text_parameters_one_with_lr, - ] + params_to_optimize = [text_parameters_one_with_lr] te_idx = 0 else: # regular te training or regular pivotal for clip - params_to_optimize = [ - transformer_parameters_with_lr, - text_parameters_one_with_lr, - ] + params_to_optimize = [transformer_parameters_with_lr, text_parameters_one_with_lr] te_idx = 1 elif args.enable_t5_ti: # pivotal tuning of clip & t5 @@ -1809,9 +1804,7 @@ def load_model_hook(models, input_dir): ] te_idx = 1 else: - params_to_optimize = [ - transformer_parameters_with_lr, - ] + params_to_optimize = [transformer_parameters_with_lr] # Optimizer creation if not (args.optimizer.lower() == "prodigy" or args.optimizer.lower() == "adamw"): @@ -1841,7 +1834,6 @@ def load_model_hook(models, input_dir): optimizer_class = torch.optim.AdamW optimizer = optimizer_class( params_to_optimize, - lr=args.learning_rate, betas=(args.adam_beta1, args.adam_beta2), weight_decay=args.adam_weight_decay, eps=args.adam_epsilon, @@ -1872,7 +1864,6 @@ def load_model_hook(models, input_dir): params_to_optimize[-1]["lr"] = args.learning_rate optimizer = optimizer_class( params_to_optimize, - lr=args.learning_rate, betas=(args.adam_beta1, args.adam_beta2), beta3=args.prodigy_beta3, weight_decay=args.adam_weight_decay, diff --git a/examples/advanced_diffusion_training/train_dreambooth_lora_sd15_advanced.py b/examples/advanced_diffusion_training/train_dreambooth_lora_sd15_advanced.py index d9c4ca9b15e6..7fdea56dc5cb 100644 --- a/examples/advanced_diffusion_training/train_dreambooth_lora_sd15_advanced.py +++ b/examples/advanced_diffusion_training/train_dreambooth_lora_sd15_advanced.py @@ -1358,10 +1358,7 @@ def load_model_hook(models, input_dir): else args.adam_weight_decay, "lr": args.text_encoder_lr if args.text_encoder_lr else args.learning_rate, } - params_to_optimize = [ - unet_lora_parameters_with_lr, - text_lora_parameters_one_with_lr, - ] + params_to_optimize = [unet_lora_parameters_with_lr, text_lora_parameters_one_with_lr] else: params_to_optimize = [unet_lora_parameters_with_lr] @@ -1394,7 +1391,6 @@ def load_model_hook(models, input_dir): optimizer = optimizer_class( params_to_optimize, - lr=args.learning_rate, betas=(args.adam_beta1, args.adam_beta2), weight_decay=args.adam_weight_decay, eps=args.adam_epsilon, @@ -1424,7 +1420,6 @@ def load_model_hook(models, input_dir): optimizer = optimizer_class( params_to_optimize, - lr=args.learning_rate, betas=(args.adam_beta1, args.adam_beta2), beta3=args.prodigy_beta3, weight_decay=args.adam_weight_decay, diff --git a/examples/advanced_diffusion_training/train_dreambooth_lora_sdxl_advanced.py b/examples/advanced_diffusion_training/train_dreambooth_lora_sdxl_advanced.py index 5f9513210193..74d52186dd81 100644 --- a/examples/advanced_diffusion_training/train_dreambooth_lora_sdxl_advanced.py +++ b/examples/advanced_diffusion_training/train_dreambooth_lora_sdxl_advanced.py @@ -1764,7 +1764,6 @@ def load_model_hook(models, input_dir): optimizer = optimizer_class( params_to_optimize, - lr=args.learning_rate, betas=(args.adam_beta1, args.adam_beta2), weight_decay=args.adam_weight_decay, eps=args.adam_epsilon, @@ -1795,7 +1794,6 @@ def load_model_hook(models, input_dir): optimizer = optimizer_class( params_to_optimize, - lr=args.learning_rate, betas=(args.adam_beta1, args.adam_beta2), beta3=args.prodigy_beta3, weight_decay=args.adam_weight_decay, diff --git a/examples/cogvideo/train_cogvideox_image_to_video_lora.py b/examples/cogvideo/train_cogvideox_image_to_video_lora.py index 4ef392baa2b5..1f055bcecbed 100644 --- a/examples/cogvideo/train_cogvideox_image_to_video_lora.py +++ b/examples/cogvideo/train_cogvideox_image_to_video_lora.py @@ -947,7 +947,6 @@ def get_optimizer(args, params_to_optimize, use_deepspeed: bool = False): optimizer = optimizer_class( params_to_optimize, - lr=args.learning_rate, betas=(args.adam_beta1, args.adam_beta2), beta3=args.prodigy_beta3, weight_decay=args.adam_weight_decay, diff --git a/examples/cogvideo/train_cogvideox_lora.py b/examples/cogvideo/train_cogvideox_lora.py index 011466bc7d58..e591e0ee5900 100644 --- a/examples/cogvideo/train_cogvideox_lora.py +++ b/examples/cogvideo/train_cogvideox_lora.py @@ -969,7 +969,6 @@ def get_optimizer(args, params_to_optimize, use_deepspeed: bool = False): optimizer = optimizer_class( params_to_optimize, - lr=args.learning_rate, betas=(args.adam_beta1, args.adam_beta2), beta3=args.prodigy_beta3, weight_decay=args.adam_weight_decay, diff --git a/examples/dreambooth/train_dreambooth_flux.py b/examples/dreambooth/train_dreambooth_flux.py index 80a0af2436a2..d23d05f7e38b 100644 --- a/examples/dreambooth/train_dreambooth_flux.py +++ b/examples/dreambooth/train_dreambooth_flux.py @@ -1226,10 +1226,7 @@ def load_model_hook(models, input_dir): "weight_decay": args.adam_weight_decay_text_encoder, "lr": args.text_encoder_lr if args.text_encoder_lr else args.learning_rate, } - params_to_optimize = [ - transformer_parameters_with_lr, - text_parameters_one_with_lr, - ] + params_to_optimize = [transformer_parameters_with_lr, text_parameters_one_with_lr] else: params_to_optimize = [transformer_parameters_with_lr] @@ -1262,7 +1259,6 @@ def load_model_hook(models, input_dir): optimizer = optimizer_class( params_to_optimize, - lr=args.learning_rate, betas=(args.adam_beta1, args.adam_beta2), weight_decay=args.adam_weight_decay, eps=args.adam_epsilon, @@ -1292,7 +1288,6 @@ def load_model_hook(models, input_dir): optimizer = optimizer_class( params_to_optimize, - lr=args.learning_rate, betas=(args.adam_beta1, args.adam_beta2), beta3=args.prodigy_beta3, weight_decay=args.adam_weight_decay, diff --git a/examples/dreambooth/train_dreambooth_lora_flux.py b/examples/dreambooth/train_dreambooth_lora_flux.py index 472bda5fea72..a0a197b1b2ee 100644 --- a/examples/dreambooth/train_dreambooth_lora_flux.py +++ b/examples/dreambooth/train_dreambooth_lora_flux.py @@ -1335,10 +1335,7 @@ def load_model_hook(models, input_dir): "weight_decay": args.adam_weight_decay_text_encoder, "lr": args.text_encoder_lr if args.text_encoder_lr else args.learning_rate, } - params_to_optimize = [ - transformer_parameters_with_lr, - text_parameters_one_with_lr, - ] + params_to_optimize = [transformer_parameters_with_lr, text_parameters_one_with_lr] else: params_to_optimize = [transformer_parameters_with_lr] @@ -1371,7 +1368,6 @@ def load_model_hook(models, input_dir): optimizer = optimizer_class( params_to_optimize, - lr=args.learning_rate, betas=(args.adam_beta1, args.adam_beta2), weight_decay=args.adam_weight_decay, eps=args.adam_epsilon, @@ -1401,7 +1397,6 @@ def load_model_hook(models, input_dir): optimizer = optimizer_class( params_to_optimize, - lr=args.learning_rate, betas=(args.adam_beta1, args.adam_beta2), beta3=args.prodigy_beta3, weight_decay=args.adam_weight_decay, diff --git a/examples/dreambooth/train_dreambooth_lora_sd3.py b/examples/dreambooth/train_dreambooth_lora_sd3.py index 547c7948da93..dcf093a94c5a 100644 --- a/examples/dreambooth/train_dreambooth_lora_sd3.py +++ b/examples/dreambooth/train_dreambooth_lora_sd3.py @@ -1438,7 +1438,6 @@ def load_model_hook(models, input_dir): optimizer = optimizer_class( params_to_optimize, - lr=args.learning_rate, betas=(args.adam_beta1, args.adam_beta2), weight_decay=args.adam_weight_decay, eps=args.adam_epsilon, @@ -1469,7 +1468,6 @@ def load_model_hook(models, input_dir): optimizer = optimizer_class( params_to_optimize, - lr=args.learning_rate, betas=(args.adam_beta1, args.adam_beta2), beta3=args.prodigy_beta3, weight_decay=args.adam_weight_decay, diff --git a/examples/dreambooth/train_dreambooth_lora_sdxl.py b/examples/dreambooth/train_dreambooth_lora_sdxl.py index a3be9f7cacfb..6e621b3caee3 100644 --- a/examples/dreambooth/train_dreambooth_lora_sdxl.py +++ b/examples/dreambooth/train_dreambooth_lora_sdxl.py @@ -1372,7 +1372,6 @@ def load_model_hook(models, input_dir): optimizer = optimizer_class( params_to_optimize, - lr=args.learning_rate, betas=(args.adam_beta1, args.adam_beta2), weight_decay=args.adam_weight_decay, eps=args.adam_epsilon, @@ -1403,7 +1402,6 @@ def load_model_hook(models, input_dir): optimizer = optimizer_class( params_to_optimize, - lr=args.learning_rate, betas=(args.adam_beta1, args.adam_beta2), beta3=args.prodigy_beta3, weight_decay=args.adam_weight_decay, diff --git a/examples/dreambooth/train_dreambooth_sd3.py b/examples/dreambooth/train_dreambooth_sd3.py index 4c1244104fd4..525a4cc906e9 100644 --- a/examples/dreambooth/train_dreambooth_sd3.py +++ b/examples/dreambooth/train_dreambooth_sd3.py @@ -1297,7 +1297,6 @@ def load_model_hook(models, input_dir): optimizer = optimizer_class( params_to_optimize, - lr=args.learning_rate, betas=(args.adam_beta1, args.adam_beta2), weight_decay=args.adam_weight_decay, eps=args.adam_epsilon, @@ -1329,7 +1328,6 @@ def load_model_hook(models, input_dir): optimizer = optimizer_class( params_to_optimize, - lr=args.learning_rate, betas=(args.adam_beta1, args.adam_beta2), beta3=args.prodigy_beta3, weight_decay=args.adam_weight_decay, diff --git a/examples/research_projects/flux_lora_quantization/train_dreambooth_lora_flux_miniature.py b/examples/research_projects/flux_lora_quantization/train_dreambooth_lora_flux_miniature.py index fd774feacbf3..37bbcb050a5c 100644 --- a/examples/research_projects/flux_lora_quantization/train_dreambooth_lora_flux_miniature.py +++ b/examples/research_projects/flux_lora_quantization/train_dreambooth_lora_flux_miniature.py @@ -848,7 +848,6 @@ def load_model_hook(models, input_dir): optimizer = optimizer_class( params_to_optimize, - lr=args.learning_rate, betas=(args.adam_beta1, args.adam_beta2), weight_decay=args.adam_weight_decay, eps=args.adam_epsilon, @@ -869,7 +868,6 @@ def load_model_hook(models, input_dir): optimizer = optimizer_class( params_to_optimize, - lr=args.learning_rate, betas=(args.adam_beta1, args.adam_beta2), beta3=args.prodigy_beta3, weight_decay=args.adam_weight_decay, diff --git a/examples/research_projects/scheduled_huber_loss_training/dreambooth/train_dreambooth_lora_sdxl.py b/examples/research_projects/scheduled_huber_loss_training/dreambooth/train_dreambooth_lora_sdxl.py index 33d72417082c..2a9801038999 100644 --- a/examples/research_projects/scheduled_huber_loss_training/dreambooth/train_dreambooth_lora_sdxl.py +++ b/examples/research_projects/scheduled_huber_loss_training/dreambooth/train_dreambooth_lora_sdxl.py @@ -1445,7 +1445,6 @@ def load_model_hook(models, input_dir): optimizer = optimizer_class( params_to_optimize, - lr=args.learning_rate, betas=(args.adam_beta1, args.adam_beta2), weight_decay=args.adam_weight_decay, eps=args.adam_epsilon, @@ -1476,7 +1475,6 @@ def load_model_hook(models, input_dir): optimizer = optimizer_class( params_to_optimize, - lr=args.learning_rate, betas=(args.adam_beta1, args.adam_beta2), beta3=args.prodigy_beta3, weight_decay=args.adam_weight_decay, diff --git a/examples/research_projects/sd3_lora_colab/train_dreambooth_lora_sd3_miniature.py b/examples/research_projects/sd3_lora_colab/train_dreambooth_lora_sd3_miniature.py index 970e329a628d..163ff8f08931 100644 --- a/examples/research_projects/sd3_lora_colab/train_dreambooth_lora_sd3_miniature.py +++ b/examples/research_projects/sd3_lora_colab/train_dreambooth_lora_sd3_miniature.py @@ -839,7 +839,6 @@ def load_model_hook(models, input_dir): optimizer = optimizer_class( params_to_optimize, - lr=args.learning_rate, betas=(args.adam_beta1, args.adam_beta2), weight_decay=args.adam_weight_decay, eps=args.adam_epsilon,