Skip to content

add expected parameters to controlnet_sd3  #9974

@sarahahtee

Description

@sarahahtee

Describe the bug

the transformer model introduced in SD3 expects the below parameters (transformer_sd3.py). there are two missing parameters that remain undefined in the SD3ControlNetModel class (controlnet_sd3.py) - dual_attention_layers and qk_norm.

@register_to_config
    def __init__(
        self,
        sample_size: int = 128,
        patch_size: int = 2,
        in_channels: int = 16,
        num_layers: int = 18,
        attention_head_dim: int = 64,
        num_attention_heads: int = 18,
        joint_attention_dim: int = 4096,
        caption_projection_dim: int = 1152,
        pooled_projection_dim: int = 2048,
        out_channels: int = 16,
        pos_embed_max_size: int = 96,
        dual_attention_layers: Tuple[
            int, ...
        ] = (),  # () for sd3.0; (0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12) for sd3.5
        qk_norm: Optional[str] = None,
    ):
        super().__init__()
        default_out_channels = in_channels
        self.out_channels = out_channels if out_channels is not None else default_out_channels
        self.inner_dim = self.config.num_attention_heads * self.config.attention_head_dim

Reproduction

class SD3ControlNetModel(ModelMixin, ConfigMixin, PeftAdapterMixin, FromOriginalModelMixin):
_supports_gradient_checkpointing = True

@register_to_config
def __init__(
    self,
    sample_size: int = 128,
    patch_size: int = 2,
    in_channels: int = 16,
    num_layers: int = 18,
    attention_head_dim: int = 64,
    num_attention_heads: int = 18,
    joint_attention_dim: int = 4096,
    caption_projection_dim: int = 1152,
    pooled_projection_dim: int = 2048,
    out_channels: int = 16,
    pos_embed_max_size: int = 96,
    extra_conditioning_channels: int = 0,
    dual_attention_layers=(),
    qk_norm=None,
   
):
    super().__init__()
    default_out_channels = in_channels
    self.out_channels = out_channels if out_channels is not None else default_out_channels
    self.inner_dim = num_attention_heads * attention_head_dim

Logs

Traceback (most recent call last):
  File "/data/user/user/project/controlnet_huggingface/diffusers/examples/controlnet/train_controlnet_sd3.py", line 1412, in <module>
    main(args)
  File "/data/user/user/project/controlnet_huggingface/diffusers/examples/controlnet/train_controlnet_sd3.py", line 989, in main
    controlnet = SD3ControlNetModel.from_transformer(transformer)
                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/user/data/conda/envs/diffusers/lib/python3.11/site-packages/diffusers/models/controlnets/controlnet_sd3.py", line 251, in from_transformer
    controlnet = cls(**config)
                 ^^^^^^^^^^^^^
  File "/home/user/data/conda/envs/diffusers/lib/python3.11/site-packages/diffusers/configuration_utils.py", line 665, in inner_init
    init(self, *args, **init_kwargs)
TypeError: SD3ControlNetModel.__init__() got an unexpected keyword argument 'dual_attention_layers'

System Info

diffusers: 0.32.0.dev0

Who can help?

@yiyixuxu @sayakpaul @DN6 @asomoza

Metadata

Metadata

Assignees

No one assigned

    Labels

    bugSomething isn't working

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions