huggingface
diff --git a/‎docs/source/en/api/pipelines/pag.md‎
Lines changed: 4 additions & 0 deletions b/‎docs/source/en/api/pipelines/pag.md‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎docs/source/en/tutorials/basic_training.md‎
Lines changed: 1 addition & 1 deletion b/‎docs/source/en/tutorials/basic_training.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/source/ko/tutorials/basic_training.md‎
Lines changed: 1 addition & 1 deletion b/‎docs/source/ko/tutorials/basic_training.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/diffusers/__init__.py‎
Lines changed: 2 additions & 0 deletions b/‎src/diffusers/__init__.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎src/diffusers/configuration_utils.py‎
Lines changed: 1 addition & 1 deletion b/‎src/diffusers/configuration_utils.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/diffusers/loaders/single_file_model.py‎
Lines changed: 1 addition & 0 deletions b/‎src/diffusers/loaders/single_file_model.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/diffusers/loaders/single_file_utils.py‎
Lines changed: 7 additions & 1 deletion b/‎src/diffusers/loaders/single_file_utils.py‎
Lines changed: 7 additions & 1 deletion
diff --git a/‎src/diffusers/models/attention_processor.py‎
Lines changed: 1 addition & 0 deletions b/‎src/diffusers/models/attention_processor.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/diffusers/models/autoencoders/autoencoder_kl.py‎
Lines changed: 2 additions & 1 deletion b/‎src/diffusers/models/autoencoders/autoencoder_kl.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎src/diffusers/models/controlnets/controlnet_sd3.py‎
Lines changed: 14 additions & 0 deletions b/‎src/diffusers/models/controlnets/controlnet_sd3.py‎
Lines changed: 14 additions & 0 deletions
@@ -96,6 +96,10 @@ Since RegEx is supported as a way for matching layer identifiers, it is crucial
 	- all
 	- __call__
 
+## StableDiffusion3PAGImg2ImgPipeline
+[[autodoc]] StableDiffusion3PAGImg2ImgPipeline
+	- all
+	- __call__
 
 ## PixArtSigmaPAGPipeline
 [[autodoc]] PixArtSigmaPAGPipeline
 
@@ -75,7 +75,7 @@ For convenience, create a `TrainingConfig` class containing the training hyperpa
 
 ...     push_to_hub = True  # whether to upload the saved model to the HF Hub
 ...     hub_model_id = "<your-username>/<my-awesome-model>"  # the name of the repository to create on the HF Hub
-...     hub_private_repo = False
+...     hub_private_repo = None
 ...     overwrite_output_dir = True  # overwrite the old model when re-running the notebook
 ...     seed = 0
 
 
@@ -76,7 +76,7 @@ huggingface-cli login
 ...     output_dir = "ddpm-butterflies-128"  # 로컬 및 HF Hub에 저장되는 모델명
 
 ...     push_to_hub = True  # 저장된 모델을 HF Hub에 업로드할지 여부
-...     hub_private_repo = False
+...     hub_private_repo = None
 ...     overwrite_output_dir = True  # 노트북을 다시 실행할 때 이전 모델에 덮어씌울지
 ...     seed = 0
 
 
@@ -339,6 +339,7 @@
             "StableDiffusion3Img2ImgPipeline",
             "StableDiffusion3InpaintPipeline",
             "StableDiffusion3PAGPipeline",
+            "StableDiffusion3PAGImg2ImgPipeline",
             "StableDiffusion3Pipeline",
             "StableDiffusionAdapterPipeline",
             "StableDiffusionAttendAndExcitePipeline",
@@ -807,6 +808,7 @@
             StableDiffusion3ControlNetPipeline,
             StableDiffusion3Img2ImgPipeline,
             StableDiffusion3InpaintPipeline,
+            StableDiffusion3PAGImg2ImgPipeline,
             StableDiffusion3PAGPipeline,
             StableDiffusion3Pipeline,
             StableDiffusionAdapterPipeline,
 
@@ -170,7 +170,7 @@ def save_config(self, save_directory: Union[str, os.PathLike], push_to_hub: bool
 
         if push_to_hub:
             commit_message = kwargs.pop("commit_message", None)
-            private = kwargs.pop("private", False)
+            private = kwargs.pop("private", None)
             create_pr = kwargs.pop("create_pr", False)
             token = kwargs.pop("token", None)
             repo_id = kwargs.pop("repo_id", save_directory.split(os.path.sep)[-1])
 
@@ -269,6 +269,7 @@ def from_single_file(cls, pretrained_model_link_or_path_or_dict: Optional[str] =
                 pretrained_model_name_or_path=default_pretrained_model_config_name,
                 subfolder=subfolder,
                 local_files_only=local_files_only,
+                token=token,
             )
             expected_kwargs, optional_kwargs = cls._get_signature_keys(cls)
 
 
@@ -127,6 +127,9 @@
     "sd35_large": {
         "pretrained_model_name_or_path": "stabilityai/stable-diffusion-3.5-large",
     },
+    "sd35_medium": {
+        "pretrained_model_name_or_path": "stabilityai/stable-diffusion-3.5-medium",
+    },
     "animatediff_v1": {"pretrained_model_name_or_path": "guoyww/animatediff-motion-adapter-v1-5"},
     "animatediff_v2": {"pretrained_model_name_or_path": "guoyww/animatediff-motion-adapter-v1-5-2"},
     "animatediff_v3": {"pretrained_model_name_or_path": "guoyww/animatediff-motion-adapter-v1-5-3"},
@@ -527,7 +530,10 @@ def infer_diffusers_model_type(checkpoint):
         model_type = "stable_cascade_stage_b"
 
     elif CHECKPOINT_KEY_NAMES["sd3"] in checkpoint and checkpoint[CHECKPOINT_KEY_NAMES["sd3"]].shape[-1] == 9216:
-        model_type = "sd3"
+        if checkpoint["model.diffusion_model.pos_embed"].shape[1] == 36864:
+            model_type = "sd3"
+        elif checkpoint["model.diffusion_model.pos_embed"].shape[1] == 147456:
+            model_type = "sd35_medium"
 
     elif CHECKPOINT_KEY_NAMES["sd35_large"] in checkpoint:
         model_type = "sd35_large"
 
@@ -1171,6 +1171,7 @@ def __call__(
         attn: Attention,
         hidden_states: torch.FloatTensor,
         encoder_hidden_states: torch.FloatTensor = None,
+        attention_mask: Optional[torch.FloatTensor] = None,
     ) -> torch.FloatTensor:
         residual = hidden_states
 
 
@@ -17,6 +17,7 @@
 import torch.nn as nn
 
 from ...configuration_utils import ConfigMixin, register_to_config
+from ...loaders import PeftAdapterMixin
 from ...loaders.single_file_model import FromOriginalModelMixin
 from ...utils import deprecate
 from ...utils.accelerate_utils import apply_forward_hook
@@ -34,7 +35,7 @@
 from .vae import Decoder, DecoderOutput, DiagonalGaussianDistribution, Encoder
 
 
-class AutoencoderKL(ModelMixin, ConfigMixin, FromOriginalModelMixin):
+class AutoencoderKL(ModelMixin, ConfigMixin, FromOriginalModelMixin, PeftAdapterMixin):
     r"""
     A VAE model with KL loss for encoding images into latents and decoding latent representations into images.
 
 
@@ -266,6 +266,20 @@ def _set_gradient_checkpointing(self, module, value=False):
         if hasattr(module, "gradient_checkpointing"):
             module.gradient_checkpointing = value
 
+    # Notes: This is for SD3.5 8b controlnet, which shares the pos_embed with the transformer
+    # we should have handled this in conversion script
+    def _get_pos_embed_from_transformer(self, transformer):
+        pos_embed = PatchEmbed(
+            height=transformer.config.sample_size,
+            width=transformer.config.sample_size,
+            patch_size=transformer.config.patch_size,
+            in_channels=transformer.config.in_channels,
+            embed_dim=transformer.inner_dim,
+            pos_embed_max_size=transformer.config.pos_embed_max_size,
+        )
+        pos_embed.load_state_dict(transformer.pos_embed.state_dict(), strict=True)
+        return pos_embed
+
     @classmethod
     def from_transformer(
         cls, transformer, num_layers=12, num_extra_conditioning_channels=1, load_weights_from_transformer=True
Original file line number	Diff line number	Diff line change
`@@ -269,6 +269,7 @@ def from_single_file(cls, pretrained_model_link_or_path_or_dict: Optional[str] =`
`269`	`269`	`pretrained_model_name_or_path=default_pretrained_model_config_name,`
`270`	`270`	`subfolder=subfolder,`
`271`	`271`	`local_files_only=local_files_only,`
	`272`	`+ token=token,`
`272`	`273`	`)`
`273`	`274`	`expected_kwargs, optional_kwargs = cls._get_signature_keys(cls)`
`274`	`275`