pass copy check

tongdaxu · tongdaxu · commit e1f013da87de · 2024-01-28T21:17:36.000+08:00
diff --git a/src/diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py b/src/diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py
@@ -2095,6 +2095,8 @@ class UNetMidBlockFlat(nn.Module):
         attention_head_dim (`int`, *optional*, defaults to 1):
             Dimension of a single attention head. The number of attention heads is determined based on this value and
             the number of input channels.
+        attention_legacy_order (`bool`, *optional*, defaults to `False`):
+            if attention_legacy_order, split heads before split qkv, see https://github.com/openai/guided-diffusion/blob/main/guided_diffusion/unet.py#L328
         output_scale_factor (`float`, *optional*, defaults to 1.0): The output scale factor.
 
     Returns:
@@ -2110,21 +2112,22 @@ def __init__(
         dropout: float = 0.0,
         num_layers: int = 1,
         resnet_eps: float = 1e-6,
-        resnet_time_scale_shift: str = "default",  # default, spatial
+        resnet_time_scale_shift: str = "default",  # default, spatial, scale_shift
         resnet_act_fn: str = "swish",
         resnet_groups: int = 32,
         attn_groups: Optional[int] = None,
         resnet_pre_norm: bool = True,
         add_attention: bool = True,
         attention_head_dim: int = 1,
+        attention_legacy_order: bool = False,
         output_scale_factor: float = 1.0,
     ):
         super().__init__()
         resnet_groups = resnet_groups if resnet_groups is not None else min(in_channels // 4, 32)
         self.add_attention = add_attention
 
         if attn_groups is None:
-            attn_groups = resnet_groups if resnet_time_scale_shift == "default" else None
+            attn_groups = None if resnet_time_scale_shift == "spatial" else resnet_groups
 
         # there is always at least one resnet
         if resnet_time_scale_shift == "spatial":
@@ -2163,7 +2166,6 @@ def __init__(
                 f"It is not recommend to pass `attention_head_dim=None`. Defaulting `attention_head_dim` to `in_channels`: {in_channels}."
             )
             attention_head_dim = in_channels
-
         for _ in range(num_layers):
             if self.add_attention:
                 attentions.append(
@@ -2179,6 +2181,7 @@ def __init__(
                         bias=True,
                         upcast_softmax=True,
                         _from_deprecated_attn_block=True,
+                        attention_legacy_order=attention_legacy_order,
                     )
                 )
             else: