From ee477638fe7db5ec2fdceedb7cd14911d43cb2d1 Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Fri, 27 Sep 2024 17:23:06 +0530
Subject: [PATCH 1/4] support kohya flux loras that have tes.

---
 .../loaders/lora_conversion_utils.py          | 41 ++++++++++++++++++-
 tests/lora/test_lora_layers_flux.py           | 24 ++++++++++-
 2 files changed, 62 insertions(+), 3 deletions(-)

diff --git a/src/diffusers/loaders/lora_conversion_utils.py b/src/diffusers/loaders/lora_conversion_utils.py
index f6dea33e8e82..d829cc3a844b 100644
--- a/src/diffusers/loaders/lora_conversion_utils.py
+++ b/src/diffusers/loaders/lora_conversion_utils.py
@@ -516,10 +516,47 @@ def _convert_sd_scripts_to_ai_toolkit(sds_sd):
                 f"transformer.single_transformer_blocks.{i}.norm.linear",
             )
 
+        remaining_keys = list(sds_sd.keys())
+        te_state_dict = {}
+        if remaining_keys:
+            if not all(k.startswith("lora_te1") for k in remaining_keys):
+                raise ValueError(f"Incompatible keys detected: \n\n {', '.join(remaining_keys)}")
+            for key in remaining_keys:
+                if not key.endswith("lora_down.weight"):
+                    continue
+
+                lora_name = key.split(".")[0]
+                lora_name_up = f"{lora_name}.lora_up.weight"
+                lora_name_alpha = f"{lora_name}.alpha"
+                diffusers_name = _convert_text_encoder_lora_key(key, lora_name)
+
+                if lora_name.startswith(("lora_te_", "lora_te1_")):
+                    down_weight = sds_sd.pop(key)
+                    sd_lora_rank = down_weight.shape[0]
+                    te_state_dict[diffusers_name] = down_weight
+                    te_state_dict[diffusers_name.replace(".down.", ".up.")] = sds_sd.pop(lora_name_up)
+
+                if lora_name_alpha in sds_sd:
+                    alpha = sds_sd.pop(lora_name_alpha).item()
+                    scale = alpha / sd_lora_rank
+
+                    scale_down = scale
+                    scale_up = 1.0
+                    while scale_down * 2 < scale_up:
+                        scale_down *= 2
+                        scale_up /= 2
+
+                    te_state_dict[diffusers_name] *= scale_down
+                    te_state_dict[diffusers_name.replace(".down.", ".up.")] *= scale_up
+
         if len(sds_sd) > 0:
-            logger.warning(f"Unsuppored keys for ai-toolkit: {sds_sd.keys()}")
+            logger.warning(f"Unsupported keys for ai-toolkit: {sds_sd.keys()}")
+
+        if te_state_dict:
+            te_state_dict = {f"text_encoder.{module_name}": params for module_name, params in te_state_dict.items()}
 
-        return ait_sd
+        new_state_dict = {**ait_sd, **te_state_dict}
+        return new_state_dict
 
     return _convert_sd_scripts_to_ai_toolkit(state_dict)
 
diff --git a/tests/lora/test_lora_layers_flux.py b/tests/lora/test_lora_layers_flux.py
index 0c336ebc3cbf..5af8c04ac410 100644
--- a/tests/lora/test_lora_layers_flux.py
+++ b/tests/lora/test_lora_layers_flux.py
@@ -27,6 +27,7 @@
 from diffusers.utils.testing_utils import (
     floats_tensor,
     is_peft_available,
+    print_tensor_test,
     require_peft_backend,
     require_torch_gpu,
     slow,
@@ -166,7 +167,7 @@ def test_modify_padding_mode(self):
 @slow
 @require_torch_gpu
 @require_peft_backend
-@unittest.skip("We cannot run inference on this model with the current CI hardware")
+# @unittest.skip("We cannot run inference on this model with the current CI hardware")
 # TODO (DN6, sayakpaul): move these tests to a beefier GPU
 class FluxLoRAIntegrationTests(unittest.TestCase):
     """internal note: The integration slices were obtained on audace."""
@@ -228,6 +229,27 @@ def test_flux_kohya(self):
 
         assert np.allclose(out_slice, expected_slice, atol=1e-4, rtol=1e-4)
 
+    def test_flux_kohya_with_text_encoder(self):
+        self.pipeline.load_lora_weights("cocktailpeanut/optimus", weight_name="optimus.safetensors")
+        self.pipeline.fuse_lora()
+        self.pipeline.unload_lora_weights()
+        self.pipeline.enable_model_cpu_offload()
+
+        prompt = "optimus is cleaning the house with broomstick"
+        out = self.pipeline(
+            prompt,
+            num_inference_steps=self.num_inference_steps,
+            guidance_scale=4.5,
+            output_type="np",
+            generator=torch.manual_seed(self.seed),
+        ).images
+
+        out_slice = out[0, -3:, -3:, -1].flatten()
+        print_tensor_test(out_slice)
+        expected_slice = np.array([0.6367, 0.6367, 0.6328, 0.6367, 0.6328, 0.6289, 0.6367, 0.6328, 0.6484])
+
+        assert np.allclose(out_slice, expected_slice, atol=1e-4, rtol=1e-4)
+
     def test_flux_xlabs(self):
         self.pipeline.load_lora_weights("XLabs-AI/flux-lora-collection", weight_name="disney_lora.safetensors")
         self.pipeline.fuse_lora()

From 8e22af26c4f4614c12f4b009ab22b2cfab126597 Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Fri, 27 Sep 2024 17:33:50 +0530
Subject: [PATCH 2/4] tests

---
 tests/lora/test_lora_layers_flux.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/lora/test_lora_layers_flux.py b/tests/lora/test_lora_layers_flux.py
index 5af8c04ac410..dd617b4bead0 100644
--- a/tests/lora/test_lora_layers_flux.py
+++ b/tests/lora/test_lora_layers_flux.py
@@ -246,7 +246,7 @@ def test_flux_kohya_with_text_encoder(self):
 
         out_slice = out[0, -3:, -3:, -1].flatten()
         print_tensor_test(out_slice)
-        expected_slice = np.array([0.6367, 0.6367, 0.6328, 0.6367, 0.6328, 0.6289, 0.6367, 0.6328, 0.6484])
+        expected_slice = np.array([0.4023, 0.4043, 0.4023, 0.3965, 0.3984, 0.3984, 0.3906, 0.3906, 0.4219])
 
         assert np.allclose(out_slice, expected_slice, atol=1e-4, rtol=1e-4)
 

From 21f34e352b278150c5841e72e2b9aadedf6ecb4c Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Fri, 27 Sep 2024 17:34:56 +0530
Subject: [PATCH 3/4] skip for now.

---
 tests/lora/test_lora_layers_flux.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/lora/test_lora_layers_flux.py b/tests/lora/test_lora_layers_flux.py
index dd617b4bead0..5b65c39761b3 100644
--- a/tests/lora/test_lora_layers_flux.py
+++ b/tests/lora/test_lora_layers_flux.py
@@ -167,7 +167,7 @@ def test_modify_padding_mode(self):
 @slow
 @require_torch_gpu
 @require_peft_backend
-# @unittest.skip("We cannot run inference on this model with the current CI hardware")
+@unittest.skip("We cannot run inference on this model with the current CI hardware")
 # TODO (DN6, sayakpaul): move these tests to a beefier GPU
 class FluxLoRAIntegrationTests(unittest.TestCase):
     """internal note: The integration slices were obtained on audace."""

From ba83b6e7cac6b65d8d6702eeae7d53de5e807f13 Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Fri, 27 Sep 2024 18:01:53 +0530
Subject: [PATCH 4/4] remove print.

---
 tests/lora/test_lora_layers_flux.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tests/lora/test_lora_layers_flux.py b/tests/lora/test_lora_layers_flux.py
index 5b65c39761b3..a75f9df91047 100644
--- a/tests/lora/test_lora_layers_flux.py
+++ b/tests/lora/test_lora_layers_flux.py
@@ -27,7 +27,6 @@
 from diffusers.utils.testing_utils import (
     floats_tensor,
     is_peft_available,
-    print_tensor_test,
     require_peft_backend,
     require_torch_gpu,
     slow,
@@ -245,7 +244,6 @@ def test_flux_kohya_with_text_encoder(self):
         ).images
 
         out_slice = out[0, -3:, -3:, -1].flatten()
-        print_tensor_test(out_slice)
         expected_slice = np.array([0.4023, 0.4043, 0.4023, 0.3965, 0.3984, 0.3984, 0.3906, 0.3906, 0.4219])
 
         assert np.allclose(out_slice, expected_slice, atol=1e-4, rtol=1e-4)