Skip to content

Commit ece48a6

Browse files
authored
Merge pull request #5 from asomoza/main
LCM and SSD-1B support added
2 parents 628a019 + 5ab2fee commit ece48a6

File tree

6 files changed

+66
-13
lines changed

6 files changed

+66
-13
lines changed

CHANGELOG.MD

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# Changelog
2+
3+
## 0.1.0 - 2023-11-09
4+
5+
- initial release
6+
- Windows installer
7+
8+
## 0.1.1 - 2023-11-10
9+
10+
- Support for LCM models
11+
- Support for SSD-1B models
12+
- Support for LMC LoRAs
13+
- Diffusers 0.23 dependency required

README.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,11 +38,13 @@ It is highly recommended to use the included VAE with the FP16 fix, since the VA
3838
- Will completely run offline after the first installation.
3939
- Powerfull features only avalaible as a desktop application.
4040
- Easy sharing of models and Lora's metadata since the information its stored in each model, including sample image, sample generation, triggers and tags for filtering.
41+
- Latent Consistency Models (LCM) and LoRAs for fast inference.
42+
- Segmind Stable Diffusion (SSD-1B) models for VRAM savings.
4143

4244
## Limitations
4345

4446
- Only runs with Stable Diffusion XL models.
45-
- It has the default 75 CLIP token limitation for the prompts.
47+
- It has the default CLIP 75 token limitation for the prompts.
4648

4749
You can read why [here](https://github.com/ZCode-opensource/image-artisan-xl/blob/main/EXPLANATIONS.MD).
4850

pyproject.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,15 +4,15 @@ build-backend = "setuptools.build_meta"
44

55
[project]
66
name = "iartisanxl"
7-
version = "0.0.1"
7+
version = "0.1.1"
88
authors = [
99
{ name="Alvaro Somoza", email="[email protected]" },
1010
]
1111
description = "Dekstop application for generating images using Stable Diffusion."
1212
requires-python = ">=3.11"
1313
dependencies = [
1414
"accelerate>=0.24.1 ",
15-
"diffusers>=0.22.0",
15+
"diffusers>=0.23.0",
1616
"Pillow>=9.3.0",
1717
"PyOpenGL",
1818
"PyOpenGL_accelerate",

src/iartisanxl/convert_model/convert_functions.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -763,6 +763,10 @@ def convert_ldm_unet_checkpoint(
763763
"label_emb.0.2.bias"
764764
]
765765

766+
# Relevant to StableDiffusionUpscalePipeline
767+
if "num_class_embeds" in config:
768+
new_checkpoint["class_embedding.weight"] = unet_state_dict["label_emb.weight"]
769+
766770
new_checkpoint["conv_in.weight"] = unet_state_dict["input_blocks.0.0.weight"]
767771
new_checkpoint["conv_in.bias"] = unet_state_dict["input_blocks.0.0.bias"]
768772

@@ -845,6 +849,7 @@ def convert_ldm_unet_checkpoint(
845849

846850
if len(attentions):
847851
paths = renew_attention_paths(attentions)
852+
848853
meta_path = {
849854
"old": f"input_blocks.{i}.1",
850855
"new": f"down_blocks.{block_id}.attentions.{layer_in_block_id}",

src/iartisanxl/generation/schedulers/schedulers.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
DEISMultistepScheduler,
1616
DDPMScheduler,
1717
DPMSolverSDEScheduler,
18+
LCMScheduler,
1819
)
1920

2021

@@ -68,4 +69,5 @@ class Scheduler:
6869
Scheduler("LMS Karras", LMSDiscreteScheduler, dict(use_karras_sigmas=True)),
6970
Scheduler("Euler Ancestral", EulerAncestralDiscreteScheduler, dict()),
7071
Scheduler("KDPM 2 Ancestral", KDPM2AncestralDiscreteScheduler, dict()),
72+
Scheduler("LCM", LCMScheduler, dict()),
7173
]

src/iartisanxl/pipelines/txt_pipeline.py

Lines changed: 41 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -358,6 +358,20 @@ def _get_add_time_ids(
358358
add_time_ids = torch.tensor([add_time_ids], dtype=dtype)
359359
return add_time_ids
360360

361+
def get_guidance_scale_embedding(self, w, embedding_dim=512, dtype=torch.float32):
362+
assert len(w.shape) == 1
363+
w = w * 1000.0
364+
365+
half_dim = embedding_dim // 2
366+
emb = torch.log(torch.tensor(10000.0)) / (half_dim - 1)
367+
emb = torch.exp(torch.arange(half_dim, dtype=dtype) * -emb)
368+
emb = w.to(dtype)[:, None] * emb[None, :]
369+
emb = torch.cat([torch.sin(emb), torch.cos(emb)], dim=1)
370+
if embedding_dim % 2 == 1: # zero pad
371+
emb = torch.nn.functional.pad(emb, (0, 1)) # pylint: disable=not-callable
372+
assert emb.shape == (w.shape[0], embedding_dim)
373+
return emb
374+
361375
@torch.no_grad()
362376
def __call__(
363377
self,
@@ -387,6 +401,10 @@ def __call__(
387401
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
388402
self.logger.debug("Using device: %s", device)
389403

404+
do_classifier_free_guidance = False
405+
if guidance_scale > 1:
406+
do_classifier_free_guidance = True
407+
390408
status_update("Encoding the prompt...")
391409
text_encoder_lora_scale = (
392410
cross_attention_kwargs.get("scale", None)
@@ -455,16 +473,25 @@ def __call__(
455473
negative_add_time_ids = add_time_ids
456474

457475
status_update("Preparing emdeddings...")
458-
prompt_embeds = torch.cat([negative_prompt_embeds, prompt_embeds], dim=0)
459-
add_text_embeds = torch.cat(
460-
[negative_pooled_prompt_embeds, add_text_embeds], dim=0
461-
)
462-
add_time_ids = torch.cat([negative_add_time_ids, add_time_ids], dim=0)
476+
477+
if do_classifier_free_guidance:
478+
prompt_embeds = torch.cat([negative_prompt_embeds, prompt_embeds], dim=0)
479+
add_text_embeds = torch.cat(
480+
[negative_pooled_prompt_embeds, add_text_embeds], dim=0
481+
)
482+
add_time_ids = torch.cat([negative_add_time_ids, add_time_ids], dim=0)
463483

464484
prompt_embeds = prompt_embeds.to(device)
465485
add_text_embeds = add_text_embeds.to(device)
466486
add_time_ids = add_time_ids.to(device)
467487

488+
timestep_cond = None
489+
if self.unet.config.time_cond_proj_dim is not None:
490+
guidance_scale_tensor = torch.tensor(guidance_scale - 1).repeat(1)
491+
timestep_cond = self.get_guidance_scale_embedding(
492+
guidance_scale_tensor, embedding_dim=self.unet.config.time_cond_proj_dim
493+
).to(device=device, dtype=latents.dtype)
494+
468495
status_update("Generating image...")
469496
num_warmup_steps = max(
470497
len(timesteps) - num_inference_steps * self.scheduler.order, 0
@@ -478,7 +505,9 @@ def __call__(
478505
return
479506

480507
# expand the latents
481-
latent_model_input = torch.cat([latents] * 2)
508+
latent_model_input = (
509+
torch.cat([latents] * 2) if do_classifier_free_guidance else latents
510+
)
482511
latent_model_input = self.scheduler.scale_model_input(latent_model_input, t)
483512

484513
# predict the noise residual
@@ -491,6 +520,7 @@ def __call__(
491520
latent_model_input,
492521
t,
493522
encoder_hidden_states=prompt_embeds,
523+
timestep_cond=timestep_cond,
494524
cross_attention_kwargs=cross_attention_kwargs,
495525
added_cond_kwargs=added_cond_kwargs,
496526
return_dict=False,
@@ -501,10 +531,11 @@ def __call__(
501531
return
502532

503533
# perform guidance
504-
noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
505-
noise_pred = noise_pred_uncond + guidance_scale * (
506-
noise_pred_text - noise_pred_uncond
507-
)
534+
if do_classifier_free_guidance:
535+
noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
536+
noise_pred = noise_pred_uncond + guidance_scale * (
537+
noise_pred_text - noise_pred_uncond
538+
)
508539

509540
# compute the previous noisy sample x_t -> x_t-1
510541
latents = self.scheduler.step(

0 commit comments

Comments
 (0)