Skip to content

Commit 4e92dc6

Browse files
committed
fix code & mask style & fix-copies
1 parent 9f81035 commit 4e92dc6

File tree

13 files changed

+406
-332
lines changed

13 files changed

+406
-332
lines changed

docs/source/en/_toctree.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -373,6 +373,8 @@
373373
title: LuminaNextDiT2DModel
374374
- local: api/models/mochi_transformer3d
375375
title: MochiTransformer3DModel
376+
- local: api/models/longcat_image_transformer2d
377+
title: LongCatImageTransformer2DModel
376378
- local: api/models/omnigen_transformer
377379
title: OmniGenTransformer2DModel
378380
- local: api/models/ovisimage_transformer2d
@@ -567,6 +569,8 @@
567569
title: Lumina 2.0
568570
- local: api/pipelines/lumina
569571
title: Lumina-T2X
572+
- local: api/pipelines/longcat_image
573+
title: LongCat-Image
570574
- local: api/pipelines/marigold
571575
title: Marigold
572576
- local: api/pipelines/panorama

src/diffusers/__init__.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -973,10 +973,10 @@
973973
Kandinsky3UNet,
974974
Kandinsky5Transformer3DModel,
975975
LatteTransformer3DModel,
976+
LongCatImageTransformer2DModel,
976977
LTXVideoTransformer3DModel,
977978
Lumina2Transformer2DModel,
978979
LuminaNextDiT2DModel,
979-
LongCatImageTransformer2DModel,
980980
MochiTransformer3DModel,
981981
ModelMixin,
982982
MotionAdapter,
@@ -1241,12 +1241,12 @@
12411241
LDMTextToImagePipeline,
12421242
LEditsPPPipelineStableDiffusion,
12431243
LEditsPPPipelineStableDiffusionXL,
1244+
LongCatImageEditPipeline,
1245+
LongCatImagePipeline,
12441246
LTXConditionPipeline,
12451247
LTXImageToVideoPipeline,
12461248
LTXLatentUpsamplePipeline,
12471249
LTXPipeline,
1248-
LongCatImagePipeline,
1249-
LongCatImageEditPipeline,
12501250
LucyEditPipeline,
12511251
Lumina2Pipeline,
12521252
Lumina2Text2ImgPipeline,

src/diffusers/models/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -209,10 +209,10 @@
209209
HunyuanVideoTransformer3DModel,
210210
Kandinsky5Transformer3DModel,
211211
LatteTransformer3DModel,
212+
LongCatImageTransformer2DModel,
212213
LTXVideoTransformer3DModel,
213214
Lumina2Transformer2DModel,
214215
LuminaNextDiT2DModel,
215-
LongCatImageTransformer2DModel,
216216
MochiTransformer3DModel,
217217
OmniGenTransformer2DModel,
218218
OvisImageTransformer2DModel,

src/diffusers/models/transformers/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,9 +33,9 @@
3333
from .transformer_hunyuan_video_framepack import HunyuanVideoFramepackTransformer3DModel
3434
from .transformer_hunyuanimage import HunyuanImageTransformer2DModel
3535
from .transformer_kandinsky import Kandinsky5Transformer3DModel
36+
from .transformer_longcat_image import LongCatImageTransformer2DModel
3637
from .transformer_ltx import LTXVideoTransformer3DModel
3738
from .transformer_lumina2 import Lumina2Transformer2DModel
38-
from .transformer_longcat_image import LongCatImageTransformer2DModel
3939
from .transformer_mochi import MochiTransformer3DModel
4040
from .transformer_omnigen import OmniGenTransformer2DModel
4141
from .transformer_ovis_image import OvisImageTransformer2DModel
@@ -48,4 +48,4 @@
4848
from .transformer_wan import WanTransformer3DModel
4949
from .transformer_wan_animate import WanAnimateTransformer3DModel
5050
from .transformer_wan_vace import WanVACETransformer3DModel
51-
from .transformer_z_image import ZImageTransformer2DModel
51+
from .transformer_z_image import ZImageTransformer2DModel

src/diffusers/models/transformers/transformer_longcat_image.py

Lines changed: 10 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@
1515
import inspect
1616
from typing import Any, Dict, List, Optional, Tuple, Union
1717

18-
import numpy as np
1918
import torch
2019
import torch.nn as nn
2120
import torch.nn.functional as F
@@ -24,14 +23,14 @@
2423
from ...loaders import FromOriginalModelMixin, PeftAdapterMixin
2524
from ...utils import is_torch_npu_available, logging
2625
from ...utils.torch_utils import maybe_allow_in_graph
27-
from .._modeling_parallel import ContextParallelInput, ContextParallelOutput
28-
from ..attention import AttentionMixin, AttentionModuleMixin, FeedForward
26+
from ..attention import AttentionModuleMixin, FeedForward
2927
from ..attention_dispatch import dispatch_attention_fn
3028
from ..cache_utils import CacheMixin
29+
from ..embeddings import TimestepEmbedding, Timesteps, apply_rotary_emb, get_1d_rotary_pos_embed
3130
from ..modeling_outputs import Transformer2DModelOutput
3231
from ..modeling_utils import ModelMixin
3332
from ..normalization import AdaLayerNormContinuous, AdaLayerNormZero, AdaLayerNormZeroSingle
34-
from ..embeddings import TimestepEmbedding, Timesteps, apply_rotary_emb, get_1d_rotary_pos_embed
33+
3534

3635
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
3736

@@ -381,7 +380,6 @@ def forward(self, ids: torch.Tensor) -> torch.Tensor:
381380
return freqs_cos, freqs_sin
382381

383382

384-
385383
class LongCatImageTimestepEmbeddings(nn.Module):
386384
def __init__(self, embedding_dim):
387385
super().__init__()
@@ -394,14 +392,15 @@ def forward(self, timestep, hidden_dtype):
394392
timesteps_emb = self.timestep_embedder(timesteps_proj.to(dtype=hidden_dtype)) # (N, D)
395393

396394
return timesteps_emb
397-
395+
398396

399397
class LongCatImageTransformer2DModel(
400398
ModelMixin,
401399
ConfigMixin,
402400
PeftAdapterMixin,
403401
FromOriginalModelMixin,
404-
CacheMixin, ):
402+
CacheMixin,
403+
):
405404
"""
406405
The Transformer model introduced in Longcat-Image.
407406
"""
@@ -455,10 +454,8 @@ def __init__(
455454
]
456455
)
457456

458-
self.norm_out = AdaLayerNormContinuous(
459-
self.inner_dim, self.inner_dim, elementwise_affine=False, eps=1e-6)
460-
self.proj_out = nn.Linear(
461-
self.inner_dim, patch_size * patch_size * self.out_channels, bias=True)
457+
self.norm_out = AdaLayerNormContinuous(self.inner_dim, self.inner_dim, elementwise_affine=False, eps=1e-6)
458+
self.proj_out = nn.Linear(self.inner_dim, patch_size * patch_size * self.out_channels, bias=True)
462459

463460
self.gradient_checkpointing = False
464461
self.use_checkpoint = [True] * num_layers
@@ -498,10 +495,9 @@ def forward(
498495

499496
timestep = timestep.to(hidden_states.dtype) * 1000
500497

501-
temb = self.time_embed( timestep, hidden_states.dtype )
498+
temb = self.time_embed(timestep, hidden_states.dtype)
502499
encoder_hidden_states = self.context_embedder(encoder_hidden_states)
503500

504-
505501
ids = torch.cat((txt_ids, img_ids), dim=0)
506502
if is_torch_npu_available():
507503
freqs_cos, freqs_sin = self.pos_embed(ids.cpu())
@@ -528,7 +524,7 @@ def forward(
528524

529525
for index_block, block in enumerate(self.single_transformer_blocks):
530526
if torch.is_grad_enabled() and self.gradient_checkpointing and self.use_single_checkpoint[index_block]:
531-
encoder_hidden_states,hidden_states = self._gradient_checkpointing_func(
527+
encoder_hidden_states, hidden_states = self._gradient_checkpointing_func(
532528
block,
533529
hidden_states,
534530
encoder_hidden_states,

src/diffusers/pipelines/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -291,7 +291,7 @@
291291
_import_structure["lumina"] = ["LuminaPipeline", "LuminaText2ImgPipeline"]
292292
_import_structure["lumina2"] = ["Lumina2Pipeline", "Lumina2Text2ImgPipeline"]
293293
_import_structure["lucy"] = ["LucyEditPipeline"]
294-
_import_structure["longcat_image"] = ["LongCatImagePipeline","LongCatImageEditPipeline"]
294+
_import_structure["longcat_image"] = ["LongCatImagePipeline", "LongCatImageEditPipeline"]
295295
_import_structure["marigold"].extend(
296296
[
297297
"MarigoldDepthPipeline",
@@ -719,11 +719,11 @@
719719
LEditsPPPipelineStableDiffusion,
720720
LEditsPPPipelineStableDiffusionXL,
721721
)
722+
from .longcat_image import LongCatImageEditPipeline, LongCatImagePipeline
722723
from .ltx import LTXConditionPipeline, LTXImageToVideoPipeline, LTXLatentUpsamplePipeline, LTXPipeline
723724
from .lucy import LucyEditPipeline
724725
from .lumina import LuminaPipeline, LuminaText2ImgPipeline
725726
from .lumina2 import Lumina2Pipeline, Lumina2Text2ImgPipeline
726-
from .longcat_image import LongCatImagePipeline,LongCatImageEditPipeline
727727
from .marigold import (
728728
MarigoldDepthPipeline,
729729
MarigoldIntrinsicsPipeline,

src/diffusers/pipelines/longcat_image/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,9 +33,9 @@
3333
except OptionalDependencyNotAvailable:
3434
from ...utils.dummy_torch_and_transformers_objects import *
3535
else:
36-
from .pipeline_output import LongCatImagePipelineOutput
3736
from .pipeline_longcat_image import LongCatImagePipeline
3837
from .pipeline_longcat_image_edit import LongCatImageEditPipeline
38+
from .pipeline_output import LongCatImagePipelineOutput
3939

4040
else:
4141
import sys

0 commit comments

Comments
 (0)