supports loading LoRA with parallel Wan model (#30)

akaitsuki-ii · web-flow · commit 4b128640514a · 2025-04-22T17:58:35.000+08:00
* fix cfg parallel

* ParallelModel supports load/unload LoRA

* fix param name

* fix split tensor

* update assert message

* add example
diff --git a/diffsynth_engine/models/__init__.py b/diffsynth_engine/models/__init__.py
@@ -0,0 +1,7 @@
+from .base import PreTrainedModel, StateDictConverter
+
+
+__all__ = [
+    "PreTrainedModel",
+    "StateDictConverter",
+]
diff --git a/diffsynth_engine/models/base.py b/diffsynth_engine/models/base.py
@@ -1,22 +1,15 @@
 import os
 import torch
 import torch.nn as nn
-from typing import Dict, Union
+from typing import Dict, List, Union
 from safetensors.torch import load_file
 
+from diffsynth_engine.models.basic.lora import LoRALinear, LoRAConv2d
 from diffsynth_engine.models.utils import no_init_weights
 
 
-class LoRAStateDictConverter:
-    def convert(self, lora_state_dict: Dict[str, torch.Tensor]) -> Dict[str, Dict[str, torch.Tensor]]:
-        return {"lora": lora_state_dict}
-
-
-StateDictType = Dict[str, torch.Tensor]
-
-
 class StateDictConverter:
-    def convert(self, state_dict: StateDictType) -> StateDictType:
+    def convert(self, state_dict: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]:
         return state_dict
 
 
@@ -40,6 +33,22 @@ def from_state_dict(cls, state_dict: Dict[str, torch.Tensor], device: str, dtype
         model.to(device=device, dtype=dtype, non_blocking=True)
         return model
 
+    def load_loras(self, lora_args: List[Dict[str, any]], fused: bool = True):
+        for args in lora_args:
+            key = args["name"]
+            module = self.get_submodule(key)
+            if not isinstance(module, (LoRALinear, LoRAConv2d)):
+                raise ValueError(f"Unsupported lora key: {key}")
+            if fused:
+                module.add_frozen_lora(**args)
+            else:
+                module.add_lora(**args)
+
+    def unload_loras(self):
+        for module in self.modules():
+            if isinstance(module, (LoRALinear, LoRAConv2d)):
+                module.clear()
+
 
 def split_suffix(name: str):
     suffix_list = [
diff --git a/diffsynth_engine/pipelines/__init__.py b/diffsynth_engine/pipelines/__init__.py
@@ -1,11 +1,12 @@
-from .base import BasePipeline
+from .base import BasePipeline, LoRAStateDictConverter
 from .flux_image import FluxImagePipeline, FluxModelConfig
 from .sdxl_image import SDXLImagePipeline, SDXLModelConfig
 from .sd_image import SDImagePipeline, SDModelConfig
 from .wan_video import WanVideoPipeline, WanModelConfig
 
 __all__ = [
     "BasePipeline",
+    "LoRAStateDictConverter",
     "FluxImagePipeline",
     "FluxModelConfig",
     "SDXLImagePipeline",
diff --git a/diffsynth_engine/pipelines/base.py b/diffsynth_engine/pipelines/base.py
@@ -1,7 +1,7 @@
 import os
 import torch
 import numpy as np
-from typing import Dict, List
+from typing import Dict, List, Tuple
 from PIL import Image, ImageOps
 from einops import repeat
 from dataclasses import dataclass
@@ -19,7 +19,14 @@ class ModelConfig:
     pass
 
 
+class LoRAStateDictConverter:
+    def convert(self, lora_state_dict: Dict[str, torch.Tensor]) -> Dict[str, Dict[str, torch.Tensor]]:
+        return {"lora": lora_state_dict}
+
+
 class BasePipeline:
+    lora_converter = LoRAStateDictConverter()
+
     def __init__(self, device="cuda:0", dtype=torch.float16):
         super().__init__()
         self.device = device
@@ -43,6 +50,36 @@ def from_state_dict(
     ) -> "BasePipeline":
         raise NotImplementedError()
 
+    def load_loras(self, lora_list: List[Tuple[str, float]], fused: bool = True, save_original_weight: bool = False):
+        for lora_path, lora_scale in lora_list:
+            logger.info(f"loading lora from {lora_path} with scale {lora_scale}")
+            state_dict = load_file(lora_path, device="cpu")
+            lora_state_dict = self.lora_converter.convert(state_dict)
+            for model_name, state_dict in lora_state_dict.items():
+                model = getattr(self, model_name)
+                lora_args = []
+                for key, param in state_dict.items():
+                    lora_args.append(
+                        {
+                            "name": key,
+                            "scale": lora_scale,
+                            "rank": param["rank"],
+                            "alpha": param["alpha"],
+                            "up": param["up"],
+                            "down": param["down"],
+                            "device": self.device,
+                            "dtype": self.dtype,
+                            "save_original_weight": save_original_weight,
+                        }
+                    )
+                model.load_loras(lora_args, fused=fused)
+
+    def load_lora(self, path: str, scale: float, fused: bool = True, save_original_weight: bool = False):
+        self.load_loras([(path, scale)], fused, save_original_weight)
+
+    def unload_loras(self):
+        raise NotImplementedError()
+
     @staticmethod
     def load_model_checkpoint(
         checkpoint_path: str, device: str = "cpu", dtype: torch.dtype = torch.float16
diff --git a/diffsynth_engine/pipelines/flux_image.py b/diffsynth_engine/pipelines/flux_image.py
@@ -2,8 +2,7 @@
 import os
 import torch
 import math
-from typing import Callable, Dict, List, Tuple, Optional
-from safetensors.torch import load_file
+from typing import Callable, Dict, Optional
 from tqdm import tqdm
 from PIL import Image
 from dataclasses import dataclass
@@ -16,9 +15,8 @@
     flux_dit_config,
     flux_text_encoder_config,
 )
-from diffsynth_engine.models.basic.lora import LoRAContext, LoRALinear, LoRAConv2d
-from diffsynth_engine.models.base import LoRAStateDictConverter
-from diffsynth_engine.pipelines import BasePipeline
+from diffsynth_engine.models.basic.lora import LoRAContext
+from diffsynth_engine.pipelines import BasePipeline, LoRAStateDictConverter
 from diffsynth_engine.tokenizers import CLIPTokenizer, T5TokenizerFast
 from diffsynth_engine.algorithm.noise_scheduler import RecifitedFlowScheduler
 from diffsynth_engine.algorithm.sampler import FlowMatchEulerSampler
@@ -298,42 +296,10 @@ def from_pretrained(
             pipe.enable_sequential_cpu_offload()
         return pipe
 
-    def load_lora(self, path: str, scale: float, fused: bool = False, save_original_weight: bool = True):
-        self.load_loras([(path, scale)], fused, save_original_weight)
-
-    def load_loras(self, lora_list: List[Tuple[str, float]], fused: bool = False, save_original_weight: bool = True):
-        for lora_path, lora_scale in lora_list:
-            state_dict = load_file(lora_path, device="cpu")
-            lora_state_dict = self.lora_converter.convert(state_dict)
-            for model_name, state_dict in lora_state_dict.items():
-                model = getattr(self, model_name)
-                for key, param in state_dict.items():
-                    module = model.get_submodule(key)
-                    if not isinstance(module, (LoRALinear, LoRAConv2d)):
-                        raise ValueError(f"Unsupported lora key: {key}")
-                    lora_args = {
-                        "name": key,
-                        "scale": lora_scale,
-                        "rank": param["rank"],
-                        "alpha": param["alpha"],
-                        "up": param["up"],
-                        "down": param["down"],
-                        "device": self.device,
-                        "dtype": self.dtype,
-                        "save_original_weight": save_original_weight,
-                    }
-                    if fused:
-                        module.add_frozen_lora(**lora_args)
-                    else:
-                        module.add_lora(**lora_args)
-
     def unload_loras(self):
-        for key, module in self.dit.named_modules():
-            if isinstance(module, (LoRALinear, LoRAConv2d)):
-                module.clear()
-        for key, module in self.text_encoder_1.named_modules():
-            if isinstance(module, (LoRALinear, LoRAConv2d)):
-                module.clear()
+        self.dit.unload_loras()
+        self.text_encoder_1.unload_loras()
+        self.text_encoder_2.unload_loras()
 
     @classmethod
     def from_state_dict(
diff --git a/diffsynth_engine/pipelines/sd_image.py b/diffsynth_engine/pipelines/sd_image.py
@@ -2,15 +2,14 @@
 import os
 import torch
 from dataclasses import dataclass
-from typing import Callable, Dict, Optional, List, Tuple
-from safetensors.torch import load_file
+from typing import Callable, Dict, Optional
 from tqdm import tqdm
 from PIL import Image
 
-from diffsynth_engine.models.base import LoRAStateDictConverter, split_suffix
-from diffsynth_engine.models.basic.lora import LoRAContext, LoRALinear, LoRAConv2d
+from diffsynth_engine.models.base import split_suffix
+from diffsynth_engine.models.basic.lora import LoRAContext
 from diffsynth_engine.models.sd import SDTextEncoder, SDVAEDecoder, SDVAEEncoder, SDUNet, sd_unet_config
-from diffsynth_engine.pipelines import BasePipeline
+from diffsynth_engine.pipelines import BasePipeline, LoRAStateDictConverter
 from diffsynth_engine.tokenizers import CLIPTokenizer
 from diffsynth_engine.algorithm.noise_scheduler import ScaledLinearScheduler
 from diffsynth_engine.algorithm.sampler import EulerSampler
@@ -275,42 +274,9 @@ def predict_noise(self, latents, timestep, prompt_emb):
         )
         return noise_pred
 
-    def load_lora(self, path: str, scale: float, fused: bool = False, save_original_weight: bool = True):
-        self.load_loras([(path, scale)], fused, save_original_weight)
-
-    def load_loras(self, lora_list: List[Tuple[str, float]], fused: bool = False, save_original_weight: bool = True):
-        for lora_path, lora_scale in lora_list:
-            state_dict = load_file(lora_path, device="cpu")
-            lora_state_dict = self.lora_converter.convert(state_dict)
-            for model_name, state_dict in lora_state_dict.items():
-                model = getattr(self, model_name)
-                for key, param in state_dict.items():
-                    module = model.get_submodule(key)
-                    if not isinstance(module, (LoRALinear, LoRAConv2d)):
-                        raise ValueError(f"Unsupported lora key: {key}")
-                    lora_args = {
-                        "name": key,
-                        "scale": lora_scale,
-                        "rank": param["rank"],
-                        "alpha": param["alpha"],
-                        "up": param["up"],
-                        "down": param["down"],
-                        "device": self.device,
-                        "dtype": self.dtype,
-                        "save_original_weight": save_original_weight,
-                    }
-                    if fused:
-                        module.add_frozen_lora(**lora_args)
-                    else:
-                        module.add_lora(**lora_args)
-
     def unload_loras(self):
-        for key, module in self.unet.named_modules():
-            if isinstance(module, (LoRALinear, LoRAConv2d)):
-                module.clear()
-        for key, module in self.text_encoder.named_modules():
-            if isinstance(module, (LoRALinear, LoRAConv2d)):
-                module.clear()
+        self.unet.unload_loras()
+        self.text_encoder.unload_loras()
 
     @torch.no_grad()
     def __call__(
diff --git a/diffsynth_engine/pipelines/sdxl_image.py b/diffsynth_engine/pipelines/sdxl_image.py
@@ -1,13 +1,13 @@
 import os
 import re
 import torch
-from typing import Callable, Dict, List, Tuple, Optional
-from safetensors.torch import load_file
+from typing import Callable, Dict, Optional
 from tqdm import tqdm
 from PIL import Image
 from dataclasses import dataclass
-from diffsynth_engine.models.base import LoRAStateDictConverter, split_suffix
-from diffsynth_engine.models.basic.lora import LoRAContext, LoRALinear, LoRAConv2d
+
+from diffsynth_engine.models.base import split_suffix
+from diffsynth_engine.models.basic.lora import LoRAContext
 from diffsynth_engine.models.basic.timestep import TemporalTimesteps
 from diffsynth_engine.models.sdxl import (
     SDXLTextEncoder,
@@ -17,7 +17,7 @@
     SDXLUNet,
     sdxl_unet_config,
 )
-from diffsynth_engine.pipelines import BasePipeline
+from diffsynth_engine.pipelines import BasePipeline, LoRAStateDictConverter
 from diffsynth_engine.tokenizers import CLIPTokenizer
 from diffsynth_engine.algorithm.noise_scheduler import ScaledLinearScheduler
 from diffsynth_engine.algorithm.sampler import EulerSampler
@@ -305,45 +305,10 @@ def predict_noise(self, latents, timestep, prompt_emb, add_text_embeds, add_time
         )
         return noise_pred
 
-    def load_lora(self, path: str, scale: float, fused: bool = False, save_original_weight: bool = True):
-        self.load_loras([(path, scale)], fused, save_original_weight)
-
-    def load_loras(self, lora_list: List[Tuple[str, float]], fused: bool = False, save_original_weight: bool = True):
-        for lora_path, lora_scale in lora_list:
-            state_dict = load_file(lora_path, device="cpu")
-            lora_state_dict = self.lora_converter.convert(state_dict)
-            for model_name, state_dict in lora_state_dict.items():
-                model = getattr(self, model_name)
-                for key, param in state_dict.items():
-                    module = model.get_submodule(key)
-                    if not isinstance(module, (LoRALinear, LoRAConv2d)):
-                        raise ValueError(f"Unsupported lora key: {key}")
-                    lora_args = {
-                        "name": key,
-                        "scale": lora_scale,
-                        "rank": param["rank"],
-                        "alpha": param["alpha"],
-                        "up": param["up"],
-                        "down": param["down"],
-                        "device": self.device,
-                        "dtype": self.dtype,
-                        "save_original_weight": save_original_weight,
-                    }
-                    if fused:
-                        module.add_frozen_lora(**lora_args)
-                    else:
-                        module.add_lora(**lora_args)
-
     def unload_loras(self):
-        for key, module in self.unet.named_modules():
-            if isinstance(module, (LoRALinear, LoRAConv2d)):
-                module.clear()
-        for key, module in self.text_encoder.named_modules():
-            if isinstance(module, (LoRALinear, LoRAConv2d)):
-                module.clear()
-        for key, module in self.text_encoder_2.named_modules():
-            if isinstance(module, (LoRALinear, LoRAConv2d)):
-                module.clear()
+        self.unet.unload_loras()
+        self.text_encoder.unload_loras()
+        self.text_encoder_2.unload_loras()
 
     @torch.no_grad()
     def __call__(
diff --git a/diffsynth_engine/pipelines/wan_video.py b/diffsynth_engine/pipelines/wan_video.py
diff --git a/diffsynth_engine/utils/parallel.py b/diffsynth_engine/utils/parallel.py
diff --git a/examples/wan_lora.py b/examples/wan_lora.py