diff --git a/.gitignore b/.gitignore index aa25096..17d80df 100644 --- a/.gitignore +++ b/.gitignore @@ -8,4 +8,5 @@ wandb exps* .vscode build -lora_diffusion.egg-info \ No newline at end of file +lora_diffusion.egg-info +training_batch_preview \ No newline at end of file diff --git a/lora_diffusion/cli_lora_add.py b/lora_diffusion/cli_lora_add.py index fc7f7e4..df9303f 100644 --- a/lora_diffusion/cli_lora_add.py +++ b/lora_diffusion/cli_lora_add.py @@ -1,4 +1,9 @@ -from typing import Literal, Union, Dict +import sys +if sys.version_info >= (3,8): + from typing import Literal +else : + from typing_extensions import Literal +from typing import Union, Dict import os import shutil import fire @@ -6,14 +11,28 @@ from safetensors.torch import safe_open, save_file import torch -from .lora import ( - tune_lora_scale, - patch_pipe, - collapse_lora, - monkeypatch_remove_lora, -) -from .lora_manager import lora_join -from .to_ckpt_v2 import convert_to_ckpt + +try: + from .lora import ( + tune_lora_scale, + patch_pipe, + collapse_lora, + monkeypatch_remove_lora, + ) + + from .lora_manager import lora_join + from .to_ckpt_v2 import convert_to_ckpt + +except: # allows running the repo without installing it (can mess up existing dependencies) + from lora_diffusion import ( + tune_lora_scale, + patch_pipe, + collapse_lora, + monkeypatch_remove_lora, + ) + + from lora_diffusion.lora_manager import lora_join + from lora_diffusion.to_ckpt_v2 import convert_to_ckpt def _text_lora_path(path: str) -> str: @@ -185,3 +204,7 @@ def add( def main(): fire.Fire(add) + + +if __name__ == "__main__": + main() diff --git a/lora_diffusion/cli_lora_pti.py b/lora_diffusion/cli_lora_pti.py index 7de4bae..6538b4f 100644 --- a/lora_diffusion/cli_lora_pti.py +++ b/lora_diffusion/cli_lora_pti.py @@ -1,15 +1,16 @@ # Bootstrapped from: # https://github.com/huggingface/diffusers/blob/main/examples/dreambooth/train_dreambooth.py -import argparse -import hashlib -import inspect import itertools import math import os + +import json +import time import random import re from pathlib import Path +import numpy as np from typing import Optional, List, Literal import torch @@ -32,6 +33,14 @@ import wandb import fire +import sys +if sys.version_info >= (3,8): + from typing import Literal +else : + from typing_extensions import Literal + +from typing import Optional, List + from lora_diffusion import ( PivotalTuningDatasetCapation, extract_lora_ups_down, @@ -46,6 +55,33 @@ ) +def preview_training_batch(train_dataloader, mode, n_imgs=40): + outdir = f"training_batch_preview/{mode}" + os.makedirs(outdir, exist_ok=True) + imgs_saved = 0 + + while True: + for batch_i, batch in enumerate(train_dataloader): + imgs = batch["pixel_values"] + for i, img_torch in enumerate(imgs): + img_torch = (img_torch + 1) / 2 + # convert to pil and save to disk: + img = Image.fromarray( + (255.0 * img_torch) + .permute(1, 2, 0) + .detach() + .cpu() + .numpy() + .astype(np.uint8) + ).convert("RGB") + img.save(f"{outdir}/preview_{imgs_saved}.jpg") + imgs_saved += 1 + + if imgs_saved > n_imgs: + print(f"\nSaved {imgs_saved} preview training imgs to {outdir}") + return + + def get_models( pretrained_model_name_or_path, pretrained_vae_name_or_path, @@ -108,6 +144,12 @@ def get_models( initializer_token_id = token_ids[0] token_embeds[placeholder_token_id] = token_embeds[initializer_token_id] + # print some stats about the token embedding: + t = token_embeds[placeholder_token_id] + print( + f"init_token {init_tok} --> mean: {t.mean().item():.3f}, std: {t.std().item():.3f}, norm: {t.norm():.4f}" + ) + vae = AutoencoderKL.from_pretrained( pretrained_vae_name_or_path or pretrained_model_name_or_path, subfolder=None if pretrained_vae_name_or_path else "vae", @@ -188,6 +230,7 @@ def collate_fn(examples): train_dataloader = torch.utils.data.DataLoader( train_dataset, batch_size=train_batch_size, + num_workers=4, shuffle=True, collate_fn=collate_fn, ) @@ -249,6 +292,7 @@ def collate_fn(examples): train_dataloader = torch.utils.data.DataLoader( train_dataset, + num_workers=4, batch_size=train_batch_size, shuffle=True, collate_fn=collate_fn, @@ -263,6 +307,7 @@ def loss_step( vae, text_encoder, scheduler, + optimized_embeddings=None, train_inpainting=False, t_mutliplier=1.0, mixed_precision=False, @@ -286,11 +331,11 @@ def loss_step( scale_factor=1 / 8, ) else: - latents = batch["pixel_values"] + latents = batch["pixel_values"].to(dtype=weight_dtype).to(unet.device) if train_inpainting: masked_image_latents = batch["masked_image_latents"] - mask = batch["mask_values"] + mask = batch["mask_values"].to(dtype=weight_dtype).to(unet.device) noise = torch.randn_like(latents) bsz = latents.shape[0] @@ -367,6 +412,12 @@ def loss_step( .mean() ) + if optimized_embeddings is not None: + embedding_norm = optimized_embeddings.norm(dim=1).mean() + target_norm = 0.39 + embedding_norm_loss = (embedding_norm - target_norm) ** 2 + loss += 0.005 * embedding_norm_loss + return loss @@ -396,6 +447,7 @@ def train_inversion( clip_ti_decay: bool = True, ): + print("Performing Inversion....") progress_bar = tqdm(range(num_steps)) progress_bar.set_description("Steps") global_step = 0 @@ -408,6 +460,7 @@ def train_inversion( index_updates = ~index_no_updates loss_sum = 0.0 + losses = [] for epoch in range(math.ceil(num_steps / len(dataloader))): unet.eval() @@ -424,6 +477,7 @@ def train_inversion( vae, text_encoder, scheduler, + optimized_embeddings=None, train_inpainting=train_inpainting, mixed_precision=mixed_precision, cached_latents=cached_latents, @@ -431,6 +485,7 @@ def train_inversion( / accum_iter ) + losses.append(loss.detach().mean().item()) loss.backward() loss_sum += loss.detach().item() @@ -466,19 +521,22 @@ def train_inversion( ) * ( pre_norm + lambda_ * (0.4 - pre_norm) ) - print(pre_norm) + # print(pre_norm) - current_norm = ( - text_encoder.get_input_embeddings() - .weight[index_updates, :] - .norm(dim=-1) - ) + optimizing_embeds = text_encoder.get_input_embeddings().weight[ + index_updates, : + ] + current_norm = optimizing_embeds.norm(dim=-1) + # reset original embeddings (we're only optimizing the new token ones) text_encoder.get_input_embeddings().weight[ index_no_updates ] = orig_embeds_params[index_no_updates] - print(f"Current Norm : {current_norm}") + for i, t in enumerate(optimizing_embeds): + print( + f"token {i} --> mean: {t.mean().item():.3f}, std: {t.std().item():.3f}, norm: {t.norm():.4f}" + ) global_step += 1 progress_bar.update(1) @@ -490,6 +548,7 @@ def train_inversion( progress_bar.set_postfix(**logs) if global_step % save_steps == 0: + plot_loss_curve(losses, "textual_inversion") save_all( unet=unet, text_encoder=text_encoder, @@ -542,6 +601,20 @@ def train_inversion( return +import matplotlib.pyplot as plt + + +def plot_loss_curve(losses, name, moving_avg=20): + losses = np.array(losses) + losses = np.convolve(losses, np.ones(moving_avg) / moving_avg, mode="valid") + plt.plot(losses) + plt.xlabel("Step") + plt.ylabel("Loss") + plt.title(f"Losses during {name} phase:") + plt.savefig(f"{name}.png") + plt.clf() + + def perform_tuning( unet, vae, @@ -562,12 +635,13 @@ def perform_tuning( tokenizer, test_image_path: str, cached_latents: bool, + index_no_updates=None, log_wandb: bool = False, wandb_log_prompt_cnt: int = 10, class_token: str = "person", train_inpainting: bool = False, ): - + print("Performing Tuning....") progress_bar = tqdm(range(num_steps)) progress_bar.set_description("Steps") global_step = 0 @@ -577,12 +651,22 @@ def perform_tuning( unet.train() text_encoder.train() + # Save the current token embeddings: + orig_embeds_params = text_encoder.get_input_embeddings().weight.data.clone() + if log_wandb: preped_clip = prepare_clip_model_sets() + print(f"Performing {math.ceil(num_steps / len(dataloader))} epochs of training!") loss_sum = 0.0 + losses = [] for epoch in range(math.ceil(num_steps / len(dataloader))): + if not cached_latents: + dataloader.dataset.tune_h_flip_prob( + epoch / math.ceil(num_steps / len(dataloader)) + ) + for batch in dataloader: lr_scheduler_lora.step() @@ -594,6 +678,7 @@ def perform_tuning( vae, text_encoder, scheduler, + optimized_embeddings=text_encoder.get_input_embeddings().weight[:, :], train_inpainting=train_inpainting, t_mutliplier=0.8, mixed_precision=True, @@ -613,10 +698,21 @@ def perform_tuning( "lr": lr_scheduler_lora.get_last_lr()[0], } progress_bar.set_postfix(**logs) + losses.append(loss.detach().item()) + + if index_no_updates is not None: + with torch.no_grad(): + # reset original embeddings (we're only optimizing the new tokens) + text_encoder.get_input_embeddings().weight[ + index_no_updates + ] = orig_embeds_params[index_no_updates] global_step += 1 if global_step % save_steps == 0: + # plot the loss curve: + plot_loss_curve(losses, "tuning") + save_all( unet, text_encoder, @@ -701,7 +797,7 @@ def train( pretrained_vae_name_or_path: str = None, revision: Optional[str] = None, perform_inversion: bool = True, - use_template: Literal[None, "object", "style"] = None, + use_template: Literal[None, "object", "style", "person"] = None, train_inpainting: bool = False, placeholder_tokens: str = "", placeholder_token_at_data: Optional[str] = None, @@ -750,8 +846,12 @@ def train( enable_xformers_memory_efficient_attention: bool = False, out_name: str = "final_lora", ): + script_start_time = time.time() torch.manual_seed(seed) + # Get a dict with all the arguments: + args_dict = locals() + if log_wandb: wandb.init( project=wandb_project_name, @@ -771,7 +871,6 @@ def train( print("PTI : Placeholder Tokens not given, using null token") else: placeholder_tokens = placeholder_tokens.split("|") - assert ( sorted(placeholder_tokens) == placeholder_tokens ), f"Placeholder tokens should be sorted. Use something like {'|'.join(sorted(placeholder_tokens))}'" @@ -886,8 +985,13 @@ def train( if cached_latents: vae = None + # STEP 1 : Perform Inversion if perform_inversion: + if not cached_latents: + preview_training_batch(train_dataloader, "inversion") + + print("PTI : Performing Inversion") ti_optimizer = optim.AdamW( text_encoder.get_input_embeddings().parameters(), lr=ti_lr, @@ -896,6 +1000,14 @@ def train( weight_decay=weight_decay_ti, ) + token_ids_positions_to_update = np.where(index_no_updates.cpu().numpy() == 0) + print( + "Training embedding of size", + text_encoder.get_input_embeddings() + .weight[token_ids_positions_to_update] + .shape, + ) + lr_scheduler = get_scheduler( lr_scheduler, optimizer=ti_optimizer, @@ -930,6 +1042,7 @@ def train( ) del ti_optimizer + print("############### Inversion Done ###############") # Next perform Tuning with LoRA: if not use_extended_lora: @@ -940,17 +1053,16 @@ def train( dropout_p=lora_dropout_p, scale=lora_scale, ) + print("PTI : not use_extended_lora...") else: print("PTI : USING EXTENDED UNET!!!") lora_unet_target_modules = ( lora_unet_target_modules | UNET_EXTENDED_TARGET_REPLACE ) print("PTI : Will replace modules: ", lora_unet_target_modules) - unet_lora_params, _ = inject_trainable_lora_extended( unet, r=lora_rank, target_replace_module=lora_unet_target_modules ) - print(f"PTI : has {len(unet_lora_params)} lora") print("PTI : Before training:") inspect_lora(unet) @@ -980,6 +1092,7 @@ def train( param.requires_grad = False else: text_encoder.requires_grad_(False) + if train_text_encoder: text_encoder_lora_params, _ = inject_trainable_lora( text_encoder, @@ -995,9 +1108,18 @@ def train( inspect_lora(text_encoder) lora_optimizers = optim.AdamW(params_to_optimize, weight_decay=weight_decay_lora) + with torch.no_grad(): + n_optimizable_unet_params = sum( + p.numel() for p in unet.parameters() if p.requires_grad + ) + +sum(p.numel() for p in text_encoder.parameters() if p.requires_grad) + + print("PTI : n_optimizable_unet_params: ", n_optimizable_unet_params) + print(f"PTI : has {len(unet_lora_params)} lora") unet.train() if train_text_encoder: + print("Training text encoder!") text_encoder.train() train_dataset.blur_amount = 70 @@ -1008,6 +1130,8 @@ def train( num_warmup_steps=lr_warmup_steps_lora, num_training_steps=max_train_steps_tuning, ) + if not cached_latents: + preview_training_batch(train_dataloader, "tuning") perform_tuning( unet, @@ -1015,6 +1139,7 @@ def train( text_encoder, train_dataloader, max_train_steps_tuning, + index_no_updates=index_no_updates, cached_latents=cached_latents, scheduler=noise_scheduler, optimizer=lora_optimizers, @@ -1035,6 +1160,19 @@ def train( train_inpainting=train_inpainting, ) + print("############### Tuning Done ###############") + training_time = time.time() - script_start_time + print(f"Training time: {training_time/60:.1f} minutes") + args_dict["training_time_s"] = int(training_time) + + # Save the args_dict to the output directory as a json file: + with open(os.path.join(output_dir, "lora_training_args.json"), "w") as f: + json.dump(args_dict, f, default=lambda o: "", indent=2) + def main(): fire.Fire(train) + + +if __name__ == "__main__": + main() diff --git a/lora_diffusion/cli_pt_to_safetensors.py b/lora_diffusion/cli_pt_to_safetensors.py index 9a4be40..aefac92 100644 --- a/lora_diffusion/cli_pt_to_safetensors.py +++ b/lora_diffusion/cli_pt_to_safetensors.py @@ -62,9 +62,11 @@ def convert(*paths, outpath, overwrite=False, **settings): } prefix = f"{name}." - - arg_settings = { k[len(prefix) :]: v for k, v in settings.items() if k.startswith(prefix) } - model_settings = { **model_settings, **arg_settings } + + arg_settings = { + k[len(prefix) :]: v for k, v in settings.items() if k.startswith(prefix) + } + model_settings = {**model_settings, **arg_settings} print(f"Loading Lora for {name} from {path} with settings {model_settings}") diff --git a/lora_diffusion/dataset.py b/lora_diffusion/dataset.py index f1c28fd..01a22fb 100644 --- a/lora_diffusion/dataset.py +++ b/lora_diffusion/dataset.py @@ -1,7 +1,7 @@ import random from pathlib import Path from typing import Dict, List, Optional, Tuple, Union - +import numpy as np from PIL import Image from torch import zeros_like from torch.utils.data import Dataset @@ -39,7 +39,38 @@ "a photo of a small {}", ] -STYLE_TEMPLATE = [ +PERSON_TEMPLATE = [ + "{}", + "{}", + "a picture of {}", + "a closeup of {}", + "a closeup photo of {}", + "a close-up picture of {}", + "a photo of {}", + "a photo of {}", + "the photo of {}", + "a cropped photo of {}", + "a funny photo of {}", + "a selfie of {}", + "a photo of the handsome {}", + "a photo of the beautiful {}", + "a selfie taken by the handsome {}", + "a selfie taken by {}", + "{} taking a selfie", + "{} is having fun, 4k photograph", + "{} wearing a plaidered shirt standing next to another person", + "smiling {} in a hoodie and sweater", + "a photo of the cool {}", + "a close-up photo of {}", + "a bright photo of {}", + "a cropped photo of {}", + "a brilliant HD photo of {}", + "a beautiful picture of {}", + "a photo showing {}", + "a great photo of {}", +] + +STYLE_TEMPLATE_ORIG = [ "a painting in the style of {}", "a rendering in the style of {}", "a cropped painting in the style of {}", @@ -61,10 +92,28 @@ "a large painting in the style of {}", ] +STYLE_TEMPLATE = [ + "a painting in the style of {}", + "a rendering in the style of {}", + "an artwork in the style of {}", + "a magnificent painting in the style of {}", + "a picture in the style of {}", + "a photograph, {} style", + "{} style painting", + "a {}-styled artwork", + "a nice painting in the style of {}", + "a goregous example of {} style", + "image in the style of {}", + "{}, painting", + "{} artwork", +] + + NULL_TEMPLATE = ["{}"] TEMPLATE_MAP = { "object": OBJECT_TEMPLATE, + "person": PERSON_TEMPLATE, "style": STYLE_TEMPLATE, "null": NULL_TEMPLATE, } @@ -116,6 +165,35 @@ def _generate_random_mask(image): return mask, masked_image +def expand_rectangle(mask, f): + rows, cols = np.where(mask == 255) + top_row, bottom_row = np.min(rows), np.max(rows) + left_col, right_col = np.min(cols), np.max(cols) + + rect_height, rect_width = bottom_row - top_row + 1, right_col - left_col + 1 + new_height, new_width = np.round(rect_height * f), np.round(rect_width * f) + + center_row, center_col = top_row + rect_height // 2, left_col + rect_width // 2 + top_row, bottom_row = np.round(center_row - new_height / 2), np.round( + center_row + new_height / 2 + ) + left_col, right_col = np.round(center_col - new_width / 2), np.round( + center_col + new_width / 2 + ) + + top_row, bottom_row = int(np.clip(top_row, 0, mask.shape[0] - 1)), int( + np.clip(bottom_row, 0, mask.shape[0] - 1) + ) + left_col, right_col = int(np.clip(left_col, 0, mask.shape[1] - 1)), int( + np.clip(right_col, 0, mask.shape[1] - 1) + ) + + expanded_mask = np.ones_like(mask) + expanded_mask[top_row : bottom_row + 1, left_col : right_col + 1] = 255 + + return expanded_mask + + class PivotalTuningDatasetCapation(Dataset): """ A dataset to prepare the instance and class images with the prompts for fine-tuning the model. @@ -141,6 +219,8 @@ def __init__( self.tokenizer = tokenizer self.resize = resize self.train_inpainting = train_inpainting + self.h_flip_prob = 0.5 + self.final_flip_prob = 0.33 if use_template == "person" else 0.5 instance_data_root = Path(instance_data_root) if not instance_data_root.exists(): @@ -156,6 +236,10 @@ def __init__( # Prepare the instance images if use_mask_captioned_data: src_imgs = glob.glob(str(instance_data_root) + "/*src.jpg") + src_imgs = sorted( + src_imgs, key=lambda x: int(str(Path(x).stem).split(".")[0]) + ) + for f in src_imgs: idx = int(str(Path(f).stem).split(".")[0]) mask_path = f"{instance_data_root}/{idx}.mask.png" @@ -218,6 +302,18 @@ def __init__( ] ) for idx, mask in enumerate(masks): + avg_pixel_value = np.array(mask.getdata()).mean() + if avg_pixel_value == 1.0: + print(f"No mask detected for {idx}..") + else: + if 1: + # convert to numpy array: + mask = np.array(mask) + # Make the rectangular mask region bigger: + mask = expand_rectangle(mask, 1.25) + # convert back to PIL image: + mask = Image.fromarray(mask).convert("L") + mask.save(f"{instance_data_root}/{idx}.mask.png") break @@ -237,12 +333,13 @@ def __init__( self.h_flip = h_flip self.image_transforms = transforms.Compose( [ + transforms.RandomAffine(degrees=0, translate=(0, 0), scale=(1.0, 1.2)), transforms.Resize( size, interpolation=transforms.InterpolationMode.BILINEAR ) if resize else transforms.Lambda(lambda x: x), - transforms.ColorJitter(0.1, 0.1) + transforms.ColorJitter(0.1, 0.1, 0.02, 0.02) if color_jitter else transforms.Lambda(lambda x: x), transforms.CenterCrop(size), @@ -253,6 +350,15 @@ def __init__( self.blur_amount = blur_amount + print("Captions:") + print(self.captions) + + def tune_h_flip_prob(self, training_progress): + if self.h_flip: + # Tune the h_flip probability to be 0.5 training_progress is 0 and end_prob when training_progress is 1 + self.h_flip_prob = 0.5 + (self.final_flip_prob - 0.5) * training_progress + print(f"h_flip_prob: {self.h_flip_prob:.3f}") + def __len__(self): return self._length @@ -283,18 +389,14 @@ def __getitem__(self, index): for token, value in self.token_map.items(): text = text.replace(token, value) - print(text) + if random.random() < 0.1: + print(text) if self.use_mask: - example["mask"] = ( - self.image_transforms( - Image.open(self.mask_path[index % self.num_instance_images]) - ) - * 0.5 - + 1.0 - ) + img_mask = Image.open(self.mask_path[index % self.num_instance_images]) + example["mask"] = self.image_transforms(img_mask) * 0.5 + 1.0 - if self.h_flip and random.random() > 0.5: + if self.h_flip and random.random() < self.h_flip_prob: hflip = transforms.RandomHorizontalFlip(p=1) example["instance_images"] = hflip(example["instance_images"]) diff --git a/lora_diffusion/lora.py b/lora_diffusion/lora.py index 8753f15..aae1d8b 100644 --- a/lora_diffusion/lora.py +++ b/lora_diffusion/lora.py @@ -1,7 +1,12 @@ import json import math from itertools import groupby -from typing import Callable, Dict, List, Optional, Set, Tuple, Type, Union +import sys +if sys.version_info >= (3,9): + from typing import Type +else : + from typing_extensions import Type +from typing import Callable, Dict, List, Optional, Set, Tuple, Union import numpy as np import PIL @@ -914,7 +919,7 @@ def apply_learned_embed_in_clip( trained_tokens = list(learned_embeds.keys()) for token in trained_tokens: - print(token) + print("Adding new token: ", token) embeds = learned_embeds[token] # cast to dtype of text_encoder diff --git a/lora_diffusion/preprocess_files.py b/lora_diffusion/preprocess_files.py index bedb89f..315765a 100644 --- a/lora_diffusion/preprocess_files.py +++ b/lora_diffusion/preprocess_files.py @@ -2,7 +2,12 @@ # Have BLIP auto caption # Have CLIPSeg auto mask concept -from typing import List, Literal, Union, Optional, Tuple +import sys +if sys.version_info >= (3,8): + from typing import Literal +else : + from typing_extensions import Literal +from typing import List, Union, Optional, Tuple import os from PIL import Image, ImageFilter import torch @@ -244,7 +249,7 @@ def _center_of_mass(mask: Image.Image): def load_and_save_masks_and_captions( files: Union[str, List[str]], output_dir: str, - caption_text: Optional[str] = None, + captions_text: Optional[Union[List[str], str]] = None, target_prompts: Optional[Union[List[str], str]] = None, target_size: int = 512, crop_based_on_salience: bool = True, @@ -263,8 +268,10 @@ def load_and_save_masks_and_captions( # check if it is a directory if os.path.isdir(files): # get all the .png .jpg in the directory - files = glob.glob(os.path.join(files, "*.png")) + glob.glob( - os.path.join(files, "*.jpg") + files = ( + glob.glob(os.path.join(files, "*.png")) + + glob.glob(os.path.join(files, "*.jpg")) + + glob.glob(os.path.join(files, "*.jpeg")) ) if len(files) == 0: @@ -278,8 +285,10 @@ def load_and_save_masks_and_captions( images = [Image.open(file) for file in files] # captions - print(f"Generating {len(images)} captions...") - captions = blip_captioning_dataset(images, text=caption_text) + captions = caption_text + if not isinstance(caption_text, List): + print(f"Generating {len(images)} captions...") + captions = blip_captioning_dataset(images, text=caption_text) if target_prompts is None: target_prompts = captions @@ -325,3 +334,7 @@ def load_and_save_masks_and_captions( def main(): fire.Fire(load_and_save_masks_and_captions) + + +if __name__ == "__main__": + main() diff --git a/requirements.txt b/requirements.txt index 89eebcd..f05192c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,3 +8,4 @@ safetensors opencv-python torchvision mediapipe +typing_extensions; python_version < '3.9' \ No newline at end of file diff --git a/setup.py b/setup.py index 6d286b3..2b5e609 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ setup( name="lora_diffusion", py_modules=["lora_diffusion"], - version="0.1.7", + version="0.1.8", description="Low Rank Adaptation for Diffusion Models. Works with Stable Diffusion out-of-the-box.", author="Simo Ryu", packages=find_packages(), diff --git a/textual_inversion.png b/textual_inversion.png new file mode 100644 index 0000000..8c39982 Binary files /dev/null and b/textual_inversion.png differ