support external legacy config files with no personalization section

lstein · lstein · commit 3d4f4b677f47 · 2023-03-30T21:39:05.000-04:00
diff --git a/ldm/invoke/model_manager.py b/ldm/invoke/model_manager.py
@@ -19,7 +19,7 @@
 from enum import Enum
 from pathlib import Path
 from shutil import move, rmtree
-from typing import Any, Callable, Optional, Union
+from typing import Any, Callable, Optional, Union, List
 
 import safetensors
 import safetensors.torch
@@ -368,11 +368,19 @@ def _load_ckpt_model(self, model_name, mconfig):
         # check whether this is a v2 file and force conversion
         convert = Globals.ckpt_convert or self.is_v2_config(config)
 
+        if matching_config := self._scan_for_matching_file(Path(weights),suffixes=['.yaml']):
+            print(f'   | Using external config file {matching_config}')
+            config = matching_config
+
         # get the path to the custom vae, if any
         vae_path = None
+        # first we use whatever is in the config file
         if vae:
             path = Path(vae if os.path.isabs(vae) else os.path.normpath(os.path.join(Globals.root, vae)))
-            vae_path = path if path.exists() else None
+            if path.exists():
+                vae_path = path
+        # then we look for a file with the same basename
+        vae_path = vae_path or self._scan_for_matching_file(Path(weights))
             
         # if converting automatically to diffusers, then we do the conversion and return
         # a diffusers pipeline
@@ -449,7 +457,7 @@ def _load_ckpt_model(self, model_name, mconfig):
 
         # look and load a matching vae file. Code borrowed from AUTOMATIC1111 modules/sd_models.py
         if vae_path:
-            print(f"   | Loading VAE weights from: {vae}")
+            print(f"   | Loading VAE weights from: {vae_path}")
             if vae_path.suffix in [".ckpt", ".pt"]:
                 self.scan_model(vae_path.name, vae_path)
                 vae_ckpt = torch.load(vae_path, map_location="cpu")
@@ -458,7 +466,7 @@ def _load_ckpt_model(self, model_name, mconfig):
             vae_dict = {k: v for k, v in vae_ckpt["state_dict"].items() if k[0:4] != "loss"}
             model.first_stage_model.load_state_dict(vae_dict, strict=False)
         else:
-            print(f"   | VAE file {vae} not found. Skipping.")
+            print("   | Using VAE built into model.")
 
         model.to(self.device)
         # model.to doesn't change the cond_stage_model.device used to move the tokenizer output, so set it here
@@ -915,12 +923,9 @@ def heuristic_import(
             convert = True
             print("   | This SD-v2 model will be converted to diffusers format for use")
 
-        # look for a custom vae
-        vae_path = None
-        for suffix in ["pt", "ckpt", "safetensors"]:
-            if (model_path.with_suffix(f".vae.{suffix}")).exists():
-                vae_path = model_path.with_suffix(f".vae.{suffix}")
-                print(f"   | Using VAE file {vae_path.name}")
+        if (vae_path := self._scan_for_matching_file(model_path)):
+            print(f"   | Using VAE file {vae_path.name}")
+        
         if convert:
             diffuser_path = Path(
                 Globals.root, "models", Globals.converted_ckpts_dir, model_path.stem
@@ -1316,6 +1321,22 @@ def _cached_sha256(self, path, data) -> Union[str, bytes]:
             f.write(hash)
         return hash
 
+    @classmethod
+    def _scan_for_matching_file(
+            self,model_path: Path,
+            suffixes: List[str]=['.vae.pt','.vae.ckpt','.vae.safetensors']
+    )->Path:
+        """
+        Find a file with same basename as the indicated model, but with one
+        of the suffixes passed.
+        """
+        # look for a custom vae
+        vae_path = None
+        for suffix in suffixes:
+            if model_path.with_suffix(suffix).exists():
+                vae_path = model_path.with_suffix(suffix)
+        return vae_path
+                               
     def _load_vae(self, vae_config) -> AutoencoderKL:
         vae_args = {}
         try:
diff --git a/ldm/models/diffusion/ddpm.py b/ldm/models/diffusion/ddpm.py
@@ -19,7 +19,7 @@
 from tqdm import tqdm
 from torchvision.utils import make_grid
 from pytorch_lightning.utilities.distributed import rank_zero_only
-from omegaconf import ListConfig
+from omegaconf import ListConfig, OmegaConf
 import urllib
 
 from ldm.modules.textual_inversion_manager import TextualInversionManager
@@ -617,7 +617,7 @@ def __init__(
         self,
         first_stage_config,
         cond_stage_config,
-        personalization_config,
+        personalization_config=None,
         num_timesteps_cond=None,
         cond_stage_key='image',
         cond_stage_trainable=False,
@@ -676,6 +676,8 @@ def __init__(
         for param in self.model.parameters():
             param.requires_grad = False
 
+        personalization_config = personalization_config or self._fallback_personalization_config()
+
         self.embedding_manager = self.instantiate_embedding_manager(
             personalization_config, self.cond_stage_model
         )
@@ -800,6 +802,24 @@ def instantiate_embedding_manager(self, config, embedder):
 
         return model
 
+    def _fallback_personalization_config(self)->dict:
+        """
+        This protects us against custom legacy config files that
+        don't contain the personalization_config section.
+        """
+        return OmegaConf.create(
+            dict(
+                target='ldm.modules.embedding_manager.EmbeddingManager',
+                params=dict(
+                    placeholder_strings=list('*'),
+                    initializer_words=list('sculpture'),
+                    per_image_tokens=False,
+                    num_vectors_per_token=1,
+                    progressive_words=False,
+                )
+            )
+        )
+
     def _get_denoise_row_from_list(
         self, samples, desc='', force_no_decoder_quantization=False
     ):
diff --git a/ldm/modules/encoders/modules.py b/ldm/modules/encoders/modules.py
@@ -463,6 +463,10 @@ def forward(self, text, **kwargs):
     def encode(self, text, **kwargs):
         return self(text, **kwargs)
 
+    def set_textual_inversion_manager(self, manager): #TextualInversionManager):
+        # TODO all of the weighting and expanding stuff needs be moved out of this class
+        self.textual_inversion_manager = manager
+
     @property
     def device(self):
         return self.transformer.device
@@ -476,10 +480,6 @@ class WeightedFrozenCLIPEmbedder(FrozenCLIPEmbedder):
     fragment_weights_key = "fragment_weights"
     return_tokens_key = "return_tokens"
 
-    def set_textual_inversion_manager(self, manager): #TextualInversionManager):
-        # TODO all of the weighting and expanding stuff needs be moved out of this class
-        self.textual_inversion_manager = manager
-
     def forward(self, text: list, **kwargs):
         # TODO all of the weighting and expanding stuff needs be moved out of this class
         '''