We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent c67f582 commit 7acd7daCopy full SHA for 7acd7da
src/diffusers/models/transformers/transformer_flux2.py
@@ -321,7 +321,8 @@ def __init__(
321
self.norm = nn.LayerNorm(dim, elementwise_affine=False, eps=eps)
322
323
# Note that the MLP in/out linear layers are fused with the attention QKV/out projections, respectively; this
324
- # is often called a "parallel" transformer block
+ # is often called a "parallel" transformer block. See the [ViT-22B paper](https://arxiv.org/abs/2302.05442)
325
+ # for a visual depiction of this type of transformer block.
326
self.attn = Flux2Attention(
327
query_dim=dim,
328
dim_head=attention_head_dim,
0 commit comments