Merge pull request #5 from asomoza/main

asomoza · web-flow · commit ece48a6008fe · 2023-11-10T05:22:22.000-03:00
LCM and SSD-1B support added
diff --git a/CHANGELOG.MD b/CHANGELOG.MD
@@ -0,0 +1,13 @@
+# Changelog
+
+## 0.1.0 - 2023-11-09
+
+- initial release
+- Windows installer
+
+## 0.1.1 - 2023-11-10
+
+- Support for LCM models
+- Support for SSD-1B models
+- Support for LMC LoRAs
+- Diffusers 0.23 dependency required
diff --git a/README.md b/README.md
@@ -38,11 +38,13 @@ It is highly recommended to use the included VAE with the FP16 fix, since the VA
 - Will completely run offline after the first installation.
 - Powerfull features only avalaible as a desktop application.
 - Easy sharing of models and Lora's metadata since the information its stored in each model, including sample image, sample generation, triggers and tags for filtering.
+- Latent Consistency Models (LCM) and LoRAs for fast inference.
+- Segmind Stable Diffusion (SSD-1B) models for VRAM savings.
 
 ## Limitations
 
 - Only runs with Stable Diffusion XL models.
-- It has the default 75 CLIP token limitation for the prompts.
+- It has the default CLIP 75 token limitation for the prompts.
 
 You can read why [here](https://github.com/ZCode-opensource/image-artisan-xl/blob/main/EXPLANATIONS.MD).
 
diff --git a/pyproject.toml b/pyproject.toml
@@ -4,15 +4,15 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "iartisanxl"
-version = "0.0.1"
+version = "0.1.1"
 authors = [
   { name="Alvaro Somoza", email="somoza.alvaro@gmail.com" },
 ]
 description = "Dekstop application for generating images using Stable Diffusion."
 requires-python = ">=3.11"
 dependencies = [
     "accelerate>=0.24.1 ",
-    "diffusers>=0.22.0",
+    "diffusers>=0.23.0",
     "Pillow>=9.3.0",
     "PyOpenGL",
     "PyOpenGL_accelerate",
diff --git a/src/iartisanxl/convert_model/convert_functions.py b/src/iartisanxl/convert_model/convert_functions.py
@@ -763,6 +763,10 @@ def convert_ldm_unet_checkpoint(
             "label_emb.0.2.bias"
         ]
 
+    # Relevant to StableDiffusionUpscalePipeline
+    if "num_class_embeds" in config:
+        new_checkpoint["class_embedding.weight"] = unet_state_dict["label_emb.weight"]
+
     new_checkpoint["conv_in.weight"] = unet_state_dict["input_blocks.0.0.weight"]
     new_checkpoint["conv_in.bias"] = unet_state_dict["input_blocks.0.0.bias"]
 
@@ -845,6 +849,7 @@ def convert_ldm_unet_checkpoint(
 
         if len(attentions):
             paths = renew_attention_paths(attentions)
+
             meta_path = {
                 "old": f"input_blocks.{i}.1",
                 "new": f"down_blocks.{block_id}.attentions.{layer_in_block_id}",
diff --git a/src/iartisanxl/generation/schedulers/schedulers.py b/src/iartisanxl/generation/schedulers/schedulers.py
@@ -15,6 +15,7 @@
     DEISMultistepScheduler,
     DDPMScheduler,
     DPMSolverSDEScheduler,
+    LCMScheduler,
 )
 
 
@@ -68,4 +69,5 @@ class Scheduler:
     Scheduler("LMS Karras", LMSDiscreteScheduler, dict(use_karras_sigmas=True)),
     Scheduler("Euler Ancestral", EulerAncestralDiscreteScheduler, dict()),
     Scheduler("KDPM 2 Ancestral", KDPM2AncestralDiscreteScheduler, dict()),
+    Scheduler("LCM", LCMScheduler, dict()),
 ]
diff --git a/src/iartisanxl/pipelines/txt_pipeline.py b/src/iartisanxl/pipelines/txt_pipeline.py
@@ -358,6 +358,20 @@ def _get_add_time_ids(
         add_time_ids = torch.tensor([add_time_ids], dtype=dtype)
         return add_time_ids
 
+    def get_guidance_scale_embedding(self, w, embedding_dim=512, dtype=torch.float32):
+        assert len(w.shape) == 1
+        w = w * 1000.0
+
+        half_dim = embedding_dim // 2
+        emb = torch.log(torch.tensor(10000.0)) / (half_dim - 1)
+        emb = torch.exp(torch.arange(half_dim, dtype=dtype) * -emb)
+        emb = w.to(dtype)[:, None] * emb[None, :]
+        emb = torch.cat([torch.sin(emb), torch.cos(emb)], dim=1)
+        if embedding_dim % 2 == 1:  # zero pad
+            emb = torch.nn.functional.pad(emb, (0, 1))  # pylint: disable=not-callable
+        assert emb.shape == (w.shape[0], embedding_dim)
+        return emb
+
     @torch.no_grad()
     def __call__(
         self,
@@ -387,6 +401,10 @@ def __call__(
         device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
         self.logger.debug("Using device: %s", device)
 
+        do_classifier_free_guidance = False
+        if guidance_scale > 1:
+            do_classifier_free_guidance = True
+
         status_update("Encoding the prompt...")
         text_encoder_lora_scale = (
             cross_attention_kwargs.get("scale", None)
@@ -455,16 +473,25 @@ def __call__(
             negative_add_time_ids = add_time_ids
 
         status_update("Preparing emdeddings...")
-        prompt_embeds = torch.cat([negative_prompt_embeds, prompt_embeds], dim=0)
-        add_text_embeds = torch.cat(
-            [negative_pooled_prompt_embeds, add_text_embeds], dim=0
-        )
-        add_time_ids = torch.cat([negative_add_time_ids, add_time_ids], dim=0)
+
+        if do_classifier_free_guidance:
+            prompt_embeds = torch.cat([negative_prompt_embeds, prompt_embeds], dim=0)
+            add_text_embeds = torch.cat(
+                [negative_pooled_prompt_embeds, add_text_embeds], dim=0
+            )
+            add_time_ids = torch.cat([negative_add_time_ids, add_time_ids], dim=0)
 
         prompt_embeds = prompt_embeds.to(device)
         add_text_embeds = add_text_embeds.to(device)
         add_time_ids = add_time_ids.to(device)
 
+        timestep_cond = None
+        if self.unet.config.time_cond_proj_dim is not None:
+            guidance_scale_tensor = torch.tensor(guidance_scale - 1).repeat(1)
+            timestep_cond = self.get_guidance_scale_embedding(
+                guidance_scale_tensor, embedding_dim=self.unet.config.time_cond_proj_dim
+            ).to(device=device, dtype=latents.dtype)
+
         status_update("Generating image...")
         num_warmup_steps = max(
             len(timesteps) - num_inference_steps * self.scheduler.order, 0
@@ -478,7 +505,9 @@ def __call__(
                 return
 
             # expand the latents
-            latent_model_input = torch.cat([latents] * 2)
+            latent_model_input = (
+                torch.cat([latents] * 2) if do_classifier_free_guidance else latents
+            )
             latent_model_input = self.scheduler.scale_model_input(latent_model_input, t)
 
             # predict the noise residual
@@ -491,6 +520,7 @@ def __call__(
                 latent_model_input,
                 t,
                 encoder_hidden_states=prompt_embeds,
+                timestep_cond=timestep_cond,
                 cross_attention_kwargs=cross_attention_kwargs,
                 added_cond_kwargs=added_cond_kwargs,
                 return_dict=False,
@@ -501,10 +531,11 @@ def __call__(
                 return
 
             # perform guidance
-            noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
-            noise_pred = noise_pred_uncond + guidance_scale * (
-                noise_pred_text - noise_pred_uncond
-            )
+            if do_classifier_free_guidance:
+                noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
+                noise_pred = noise_pred_uncond + guidance_scale * (
+                    noise_pred_text - noise_pred_uncond
+                )
 
             # compute the previous noisy sample x_t -> x_t-1
             latents = self.scheduler.step(

Original file line number	Diff line number	Diff line change
`@@ -15,6 +15,7 @@`
`15`	`15`	`DEISMultistepScheduler,`
`16`	`16`	`DDPMScheduler,`
`17`	`17`	`DPMSolverSDEScheduler,`
	`18`	`+ LCMScheduler,`
`18`	`19`	`)`
`19`	`20`
`20`	`21`
`@@ -68,4 +69,5 @@ class Scheduler:`
`68`	`69`	`Scheduler("LMS Karras", LMSDiscreteScheduler, dict(use_karras_sigmas=True)),`
`69`	`70`	`Scheduler("Euler Ancestral", EulerAncestralDiscreteScheduler, dict()),`
`70`	`71`	`Scheduler("KDPM 2 Ancestral", KDPM2AncestralDiscreteScheduler, dict()),`
	`72`	`+ Scheduler("LCM", LCMScheduler, dict()),`
`71`	`73`	`]`