huggingface
diff --git a/‎docs/source/en/modular_diffusers/guiders.md‎
Lines changed: 1 addition & 1 deletion b/‎docs/source/en/modular_diffusers/guiders.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/source/en/modular_diffusers/modular_pipeline.md‎
Lines changed: 2 additions & 2 deletions b/‎docs/source/en/modular_diffusers/modular_pipeline.md‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎docs/source/zh/modular_diffusers/guiders.md‎
Lines changed: 1 addition & 1 deletion b/‎docs/source/zh/modular_diffusers/guiders.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/source/zh/modular_diffusers/modular_pipeline.md‎
Lines changed: 2 additions & 2 deletions b/‎docs/source/zh/modular_diffusers/modular_pipeline.md‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎examples/text_to_image/train_text_to_image_lora.py‎
Lines changed: 61 additions & 13 deletions b/‎examples/text_to_image/train_text_to_image_lora.py‎
Lines changed: 61 additions & 13 deletions
diff --git a/‎src/diffusers/__init__.py‎
Lines changed: 1 addition & 0 deletions b/‎src/diffusers/__init__.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/diffusers/loaders/single_file_utils.py‎
Lines changed: 8 additions & 1 deletion b/‎src/diffusers/loaders/single_file_utils.py‎
Lines changed: 8 additions & 1 deletion
diff --git a/‎src/diffusers/models/transformers/transformer_z_image.py‎
Lines changed: 17 additions & 3 deletions b/‎src/diffusers/models/transformers/transformer_z_image.py‎
Lines changed: 17 additions & 3 deletions
diff --git a/‎src/diffusers/modular_pipelines/modular_pipeline.py‎
Lines changed: 33 additions & 20 deletions b/‎src/diffusers/modular_pipelines/modular_pipeline.py‎
Lines changed: 33 additions & 20 deletions
@@ -159,7 +159,7 @@ Change the [`~ComponentSpec.default_creation_method`] to `from_pretrained` and u
 ```py
 guider_spec = t2i_pipeline.get_component_spec("guider")
 guider_spec.default_creation_method="from_pretrained"
-guider_spec.repo="YiYiXu/modular-loader-t2i-guider"
+guider_spec.pretrained_model_name_or_path="YiYiXu/modular-loader-t2i-guider"
 guider_spec.subfolder="pag_guider"
 pag_guider = guider_spec.load()
 t2i_pipeline.update_components(guider=pag_guider)
 
@@ -313,14 +313,14 @@ unet_spec
 ComponentSpec(
     name='unet',
     type_hint=<class 'diffusers.models.unets.unet_2d_condition.UNet2DConditionModel'>,
-    repo='RunDiffusion/Juggernaut-XL-v9',
+    pretrained_model_name_or_path='RunDiffusion/Juggernaut-XL-v9',
     subfolder='unet',
     variant='fp16',
     default_creation_method='from_pretrained'
 )
 
 # modify to load from a different repository
-unet_spec.repo = "stabilityai/stable-diffusion-xl-base-1.0"
+unet_spec.pretrained_model_name_or_path = "stabilityai/stable-diffusion-xl-base-1.0"
 
 # load component with modified spec
 unet = unet_spec.load(torch_dtype=torch.float16)
 
@@ -157,7 +157,7 @@ guider.push_to_hub("YiYiXu/modular-loader-t2i-guider", subfolder="pag_guider")
 ```py
 guider_spec = t2i_pipeline.get_component_spec("guider")
 guider_spec.default_creation_method="from_pretrained"
-guider_spec.repo="YiYiXu/modular-loader-t2i-guider"
+guider_spec.pretrained_model_name_or_path="YiYiXu/modular-loader-t2i-guider"
 guider_spec.subfolder="pag_guider"
 pag_guider = guider_spec.load()
 t2i_pipeline.update_components(guider=pag_guider)
 
@@ -313,14 +313,14 @@ unet_spec
 ComponentSpec(
     name='unet',
     type_hint=<class 'diffusers.models.unets.unet_2d_condition.UNet2DConditionModel'>,
-    repo='RunDiffusion/Juggernaut-XL-v9',
+    pretrained_model_name_or_path='RunDiffusion/Juggernaut-XL-v9',
     subfolder='unet',
     variant='fp16',
     default_creation_method='from_pretrained'
 )
 
 # 修改以从不同的仓库加载
-unet_spec.repo = "stabilityai/stable-diffusion-xl-base-1.0"
+unet_spec.pretrained_model_name_or_path = "stabilityai/stable-diffusion-xl-base-1.0"
 
 # 使用修改后的规范加载组件
 unet = unet_spec.load(torch_dtype=torch.float16)
 
@@ -37,7 +37,7 @@
 from huggingface_hub import create_repo, upload_folder
 from packaging import version
 from peft import LoraConfig
-from peft.utils import get_peft_model_state_dict
+from peft.utils import get_peft_model_state_dict, set_peft_model_state_dict
 from torchvision import transforms
 from tqdm.auto import tqdm
 from transformers import CLIPTextModel, CLIPTokenizer
@@ -46,7 +46,12 @@
 from diffusers import AutoencoderKL, DDPMScheduler, DiffusionPipeline, StableDiffusionPipeline, UNet2DConditionModel
 from diffusers.optimization import get_scheduler
 from diffusers.training_utils import cast_training_params, compute_snr
-from diffusers.utils import check_min_version, convert_state_dict_to_diffusers, is_wandb_available
+from diffusers.utils import (
+    check_min_version,
+    convert_state_dict_to_diffusers,
+    convert_unet_state_dict_to_peft,
+    is_wandb_available,
+)
 from diffusers.utils.hub_utils import load_or_create_model_card, populate_model_card
 from diffusers.utils.import_utils import is_xformers_available
 from diffusers.utils.torch_utils import is_compiled_module
@@ -708,6 +713,56 @@ def collate_fn(examples):
         num_workers=args.dataloader_num_workers,
     )
 
+    def save_model_hook(models, weights, output_dir):
+        if accelerator.is_main_process:
+            unet_lora_layers_to_save = None
+
+            for model in models:
+                if isinstance(model, type(unwrap_model(unet))):
+                    unet_lora_layers_to_save = get_peft_model_state_dict(model)
+                else:
+                    raise ValueError(f"Unexpected save model: {model.__class__}")
+
+                # make sure to pop weight so that corresponding model is not saved again
+                weights.pop()
+
+            StableDiffusionPipeline.save_lora_weights(
+                save_directory=output_dir,
+                unet_lora_layers=unet_lora_layers_to_save,
+                safe_serialization=True,
+            )
+
+    def load_model_hook(models, input_dir):
+        unet_ = None
+
+        while len(models) > 0:
+            model = models.pop()
+            if isinstance(model, type(unwrap_model(unet))):
+                unet_ = model
+            else:
+                raise ValueError(f"unexpected save model: {model.__class__}")
+
+        # returns a tuple of state dictionary and network alphas
+        lora_state_dict, network_alphas = StableDiffusionPipeline.lora_state_dict(input_dir)
+
+        unet_state_dict = {f"{k.replace('unet.', '')}": v for k, v in lora_state_dict.items() if k.startswith("unet.")}
+        unet_state_dict = convert_unet_state_dict_to_peft(unet_state_dict)
+        incompatible_keys = set_peft_model_state_dict(unet_, unet_state_dict, adapter_name="default")
+
+        if incompatible_keys is not None:
+            # check only for unexpected keys
+            unexpected_keys = getattr(incompatible_keys, "unexpected_keys", None)
+            # throw warning if some unexpected keys are found and continue loading
+            if unexpected_keys:
+                logger.warning(
+                    f"Loading adapter weights from state_dict led to unexpected keys not found in the model: "
+                    f" {unexpected_keys}. "
+                )
+
+        # Make sure the trainable params are in float32
+        if args.mixed_precision in ["fp16"]:
+            cast_training_params([unet_], dtype=torch.float32)
+
     # Scheduler and math around the number of training steps.
     # Check the PR https://github.com/huggingface/diffusers/pull/8312 for detailed explanation.
     num_warmup_steps_for_scheduler = args.lr_warmup_steps * accelerator.num_processes
@@ -732,6 +787,10 @@ def collate_fn(examples):
         unet, optimizer, train_dataloader, lr_scheduler
     )
 
+    # Register the hooks for efficient saving and loading of LoRA weights
+    accelerator.register_save_state_pre_hook(save_model_hook)
+    accelerator.register_load_state_pre_hook(load_model_hook)
+
     # We need to recalculate our total training steps as the size of the training dataloader may have changed.
     num_update_steps_per_epoch = math.ceil(len(train_dataloader) / args.gradient_accumulation_steps)
     if args.max_train_steps is None:
@@ -906,17 +965,6 @@ def collate_fn(examples):
                         save_path = os.path.join(args.output_dir, f"checkpoint-{global_step}")
                         accelerator.save_state(save_path)
 
-                        unwrapped_unet = unwrap_model(unet)
-                        unet_lora_state_dict = convert_state_dict_to_diffusers(
-                            get_peft_model_state_dict(unwrapped_unet)
-                        )
-
-                        StableDiffusionPipeline.save_lora_weights(
-                            save_directory=save_path,
-                            unet_lora_layers=unet_lora_state_dict,
-                            safe_serialization=True,
-                        )
-
                         logger.info(f"Saved state to {save_path}")
 
             logs = {"step_loss": loss.detach().item(), "lr": lr_scheduler.get_last_lr()[0]}
 
@@ -991,6 +991,7 @@
             WanAnimateTransformer3DModel,
             WanTransformer3DModel,
             WanVACETransformer3DModel,
+            ZImageTransformer2DModel,
             attention_backend,
         )
         from .modular_pipelines import ComponentsManager, ComponentSpec, ModularPipeline, ModularPipelineBlocks
 
@@ -389,6 +389,14 @@ def is_valid_url(url):
     return False
 
 
+def _is_single_file_path_or_url(pretrained_model_name_or_path):
+    if not os.path.isfile(pretrained_model_name_or_path) or not is_valid_url(pretrained_model_name_or_path):
+        return False
+
+    repo_id, weight_name = _extract_repo_id_and_weights_name(pretrained_model_name_or_path)
+    return bool(repo_id and weight_name)
+
+
 def _extract_repo_id_and_weights_name(pretrained_model_name_or_path):
     if not is_valid_url(pretrained_model_name_or_path):
         raise ValueError("Invalid `pretrained_model_name_or_path` provided. Please set it to a valid URL.")
@@ -400,7 +408,6 @@ def _extract_repo_id_and_weights_name(pretrained_model_name_or_path):
         pretrained_model_name_or_path = pretrained_model_name_or_path.replace(prefix, "")
     match = re.match(pattern, pretrained_model_name_or_path)
     if not match:
-        logger.warning("Unable to identify the repo_id and weights_name from the provided URL.")
         return repo_id, weights_name
 
     repo_id = f"{match.group(1)}/{match.group(2)}"
 
@@ -69,7 +69,10 @@ def timestep_embedding(t, dim, max_period=10000):
 
     def forward(self, t):
         t_freq = self.timestep_embedding(t, self.frequency_embedding_size)
-        t_emb = self.mlp(t_freq.to(self.mlp[0].weight.dtype))
+        weight_dtype = self.mlp[0].weight.dtype
+        if weight_dtype.is_floating_point:
+            t_freq = t_freq.to(weight_dtype)
+        t_emb = self.mlp(t_freq)
         return t_emb
 
 
@@ -126,6 +129,10 @@ def apply_rotary_emb(x_in: torch.Tensor, freqs_cis: torch.Tensor) -> torch.Tenso
         dtype = query.dtype
         query, key = query.to(dtype), key.to(dtype)
 
+        # From [batch, seq_len] to [batch, 1, 1, seq_len] -> broadcast to [batch, heads, seq_len, seq_len]
+        if attention_mask is not None and attention_mask.ndim == 2:
+            attention_mask = attention_mask[:, None, None, :]
+
         # Compute joint attention
         hidden_states = dispatch_attention_fn(
             query,
@@ -306,6 +313,10 @@ def __call__(self, ids: torch.Tensor):
         if self.freqs_cis is None:
             self.freqs_cis = self.precompute_freqs_cis(self.axes_dims, self.axes_lens, theta=self.theta)
             self.freqs_cis = [freqs_cis.to(device) for freqs_cis in self.freqs_cis]
+        else:
+            # Ensure freqs_cis are on the same device as ids
+            if self.freqs_cis[0].device != device:
+                self.freqs_cis = [freqs_cis.to(device) for freqs_cis in self.freqs_cis]
 
         result = []
         for i in range(len(self.axes_dims)):
@@ -317,6 +328,8 @@ def __call__(self, ids: torch.Tensor):
 class ZImageTransformer2DModel(ModelMixin, ConfigMixin, PeftAdapterMixin, FromOriginalModelMixin):
     _supports_gradient_checkpointing = True
     _no_split_modules = ["ZImageTransformerBlock"]
+    _repeated_blocks = ["ZImageTransformerBlock"]
+    _skip_layerwise_casting_patterns = ["t_embedder", "cap_embedder"]  # precision sensitive layers
 
     @register_to_config
     def __init__(
@@ -553,8 +566,6 @@ def forward(
         t = t * self.t_scale
         t = self.t_embedder(t)
 
-        adaln_input = t
-
         (
             x,
             cap_feats,
@@ -572,6 +583,9 @@ def forward(
 
         x = torch.cat(x, dim=0)
         x = self.all_x_embedder[f"{patch_size}-{f_patch_size}"](x)
+
+        # Match t_embedder output dtype to x for layerwise casting compatibility
+        adaln_input = t.type_as(x)
         x[torch.cat(x_inner_pad_mask)] = self.x_pad_token
         x = list(x.split(x_item_seqlens, dim=0))
         x_freqs_cis = list(self.rope_embedder(torch.cat(x_pos_ids, dim=0)).split(x_item_seqlens, dim=0))
 
@@ -360,7 +360,7 @@ def init_pipeline(
         collection: Optional[str] = None,
     ) -> "ModularPipeline":
         """
-        create a ModularPipeline, optionally accept modular_repo to load from hub.
+        create a ModularPipeline, optionally accept pretrained_model_name_or_path to load from hub.
         """
         pipeline_class_name = MODULAR_PIPELINE_MAPPING.get(self.model_name, ModularPipeline.__name__)
         diffusers_module = importlib.import_module("diffusers")
@@ -1645,8 +1645,8 @@ def from_pretrained(
             pretrained_model_name_or_path (`str` or `os.PathLike`, optional):
                 Path to a pretrained pipeline configuration. It will first try to load config from
                 `modular_model_index.json`, then fallback to `model_index.json` for compatibility with standard
-                non-modular repositories. If the repo does not contain any pipeline config, it will be set to None
-                during initialization.
+                non-modular repositories. If the pretrained_model_name_or_path does not contain any pipeline config, it
+                will be set to None during initialization.
             trust_remote_code (`bool`, optional):
                 Whether to trust remote code when loading the pipeline, need to be set to True if you want to create
                 pipeline blocks based on the custom code in `pretrained_model_name_or_path`
@@ -1807,7 +1807,7 @@ def register_components(self, **kwargs):
                 library, class_name = None, None
 
             # extract the loading spec from the updated component spec that'll be used as part of modular_model_index.json config
-            # e.g. {"repo": "stabilityai/stable-diffusion-2-1",
+            # e.g. {"pretrained_model_name_or_path": "stabilityai/stable-diffusion-2-1",
             #       "type_hint": ("diffusers", "UNet2DConditionModel"),
             #       "subfolder": "unet",
             #       "variant": None,
@@ -2111,8 +2111,10 @@ def load_components(self, names: Optional[Union[List[str], str]] = None, **kwarg
             **kwargs: additional kwargs to be passed to `from_pretrained()`.Can be:
              - a single value to be applied to all components to be loaded, e.g. torch_dtype=torch.bfloat16
              - a dict, e.g. torch_dtype={"unet": torch.bfloat16, "default": torch.float32}
-             - if potentially override ComponentSpec if passed a different loading field in kwargs, e.g. `repo`,
-               `variant`, `revision`, etc.
+             - if potentially override ComponentSpec if passed a different loading field in kwargs, e.g.
+               `pretrained_model_name_or_path`, `variant`, `revision`, etc.
+             - if potentially override ComponentSpec if passed a different loading field in kwargs, e.g.
+               `pretrained_model_name_or_path`, `variant`, `revision`, etc.
         """
 
         if names is None:
@@ -2378,10 +2380,10 @@ def _component_spec_to_dict(component_spec: ComponentSpec) -> Any:
           - "type_hint": Tuple[str, str]
               Library name and class name of the component. (e.g. ("diffusers", "UNet2DConditionModel"))
           - All loading fields defined by `component_spec.loading_fields()`, typically:
-              - "repo": Optional[str]
-                  The model repository (e.g., "stabilityai/stable-diffusion-xl").
+              - "pretrained_model_name_or_path": Optional[str]
+                  The model pretrained_model_name_or_pathsitory (e.g., "stabilityai/stable-diffusion-xl").
               - "subfolder": Optional[str]
-                  A subfolder within the repo where this component lives.
+                  A subfolder within the pretrained_model_name_or_path where this component lives.
               - "variant": Optional[str]
                   An optional variant identifier for the model.
               - "revision": Optional[str]
@@ -2398,11 +2400,13 @@ def _component_spec_to_dict(component_spec: ComponentSpec) -> Any:
         Example:
             >>> from diffusers.pipelines.modular_pipeline_utils import ComponentSpec >>> from diffusers import
             UNet2DConditionModel >>> spec = ComponentSpec(
-                ... name="unet", ... type_hint=UNet2DConditionModel, ... config=None, ... repo="path/to/repo", ...
-                subfolder="subfolder", ... variant=None, ... revision=None, ...
-                default_creation_method="from_pretrained",
+                ... name="unet", ... type_hint=UNet2DConditionModel, ... config=None, ...
+                pretrained_model_name_or_path="path/to/pretrained_model_name_or_path", ... subfolder="subfolder", ...
+                variant=None, ... revision=None, ... default_creation_method="from_pretrained",
             ... ) >>> ModularPipeline._component_spec_to_dict(spec) {
-                "type_hint": ("diffusers", "UNet2DConditionModel"), "repo": "path/to/repo", "subfolder": "subfolder",
+                "type_hint": ("diffusers", "UNet2DConditionModel"), "pretrained_model_name_or_path": "path/to/repo",
+                "subfolder": "subfolder", "variant": None, "revision": None, "type_hint": ("diffusers",
+                "UNet2DConditionModel"), "pretrained_model_name_or_path": "path/to/repo", "subfolder": "subfolder",
                 "variant": None, "revision": None,
             }
         """
@@ -2432,10 +2436,10 @@ def _dict_to_component_spec(
           - "type_hint": Tuple[str, str]
               Library name and class name of the component. (e.g. ("diffusers", "UNet2DConditionModel"))
           - All loading fields defined by `component_spec.loading_fields()`, typically:
-              - "repo": Optional[str]
+              - "pretrained_model_name_or_path": Optional[str]
                   The model repository (e.g., "stabilityai/stable-diffusion-xl").
               - "subfolder": Optional[str]
-                  A subfolder within the repo where this component lives.
+                  A subfolder within the pretrained_model_name_or_path where this component lives.
               - "variant": Optional[str]
                   An optional variant identifier for the model.
               - "revision": Optional[str]
@@ -2452,11 +2456,20 @@ def _dict_to_component_spec(
             ComponentSpec: A reconstructed ComponentSpec object.
 
         Example:
-            >>> spec_dict = { ... "type_hint": ("diffusers", "UNet2DConditionModel"), ... "repo":
-            "stabilityai/stable-diffusion-xl", ... "subfolder": "unet", ... "variant": None, ... "revision": None, ...
-            } >>> ModularPipeline._dict_to_component_spec("unet", spec_dict) ComponentSpec(
-                name="unet", type_hint=UNet2DConditionModel, config=None, repo="stabilityai/stable-diffusion-xl",
-                subfolder="unet", variant=None, revision=None, default_creation_method="from_pretrained"
+            >>> spec_dict = { ... "type_hint": ("diffusers", "UNet2DConditionModel"), ...
+            "pretrained_model_name_or_path": "stabilityai/stable-diffusion-xl", ... "subfolder": "unet", ... "variant":
+            None, ... "revision": None, ... } >>> ModularPipeline._dict_to_component_spec("unet", spec_dict)
+            ComponentSpec(
+                name="unet", type_hint=UNet2DConditionModel, config=None,
+                pretrained_model_name_or_path="stabilityai/stable-diffusion-xl", subfolder="unet", variant=None,
+                revision=None, default_creation_method="from_pretrained"
+            >>> spec_dict = { ... "type_hint": ("diffusers", "UNet2DConditionModel"), ...
+            "pretrained_model_name_or_path": "stabilityai/stable-diffusion-xl", ... "subfolder": "unet", ... "variant":
+            None, ... "revision": None, ... } >>> ModularPipeline._dict_to_component_spec("unet", spec_dict)
+            ComponentSpec(
+                name="unet", type_hint=UNet2DConditionModel, config=None,
+                pretrained_model_name_or_path="stabilityai/stable-diffusion-xl", subfolder="unet", variant=None,
+                revision=None, default_creation_method="from_pretrained"
             )
         """
         # make a shallow copy so we can pop() safely
Original file line number	Diff line number	Diff line change
`@@ -991,6 +991,7 @@`
`991`	`991`	`WanAnimateTransformer3DModel,`
`992`	`992`	`WanTransformer3DModel,`
`993`	`993`	`WanVACETransformer3DModel,`
	`994`	`+ ZImageTransformer2DModel,`
`994`	`995`	`attention_backend,`
`995`	`996`	`)`
`996`	`997`	`from .modular_pipelines import ComponentsManager, ComponentSpec, ModularPipeline, ModularPipelineBlocks`