Skip to content

Commit d9f80fc

Browse files
committed
update docs
1 parent 4c5e8c5 commit d9f80fc

File tree

6 files changed

+32
-10
lines changed

6 files changed

+32
-10
lines changed

docs/source/en/_toctree.yml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,8 @@
5151
title: Text or image-to-video
5252
- local: using-diffusers/depth2img
5353
title: Depth-to-image
54+
- local: using-diffusers/multimodal2img
55+
title: Multimodal-to-image
5456
title: Generative tasks
5557
- sections:
5658
- local: using-diffusers/overview_techniques
@@ -87,6 +89,8 @@
8789
title: Kandinsky
8890
- local: using-diffusers/ip_adapter
8991
title: IP-Adapter
92+
- local: using-diffusers/omnigen
93+
title: OmniGen
9094
- local: using-diffusers/pag
9195
title: PAG
9296
- local: using-diffusers/controlnet
@@ -274,6 +278,8 @@
274278
title: LuminaNextDiT2DModel
275279
- local: api/models/mochi_transformer3d
276280
title: MochiTransformer3DModel
281+
- local: api/models/omnigen_transformer
282+
title: OmniGenTransformer2DModel
277283
- local: api/models/pixart_transformer2d
278284
title: PixArtTransformer2DModel
279285
- local: api/models/prior_transformer
@@ -412,6 +418,8 @@
412418
title: MultiDiffusion
413419
- local: api/pipelines/musicldm
414420
title: MusicLDM
421+
- local: api/pipelines/omnigen
422+
title: OmniGen
415423
- local: api/pipelines/pag
416424
title: PAG
417425
- local: api/pipelines/paint_by_example
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
<!--Copyright 2024 The HuggingFace Team. All rights reserved.
2+
3+
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
4+
the License. You may obtain a copy of the License at
5+
6+
http://www.apache.org/licenses/LICENSE-2.0
7+
8+
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
9+
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
10+
specific language governing permissions and limitations under the License.
11+
-->
12+
13+
# OmniGenTransformer2DModel
14+
15+
A Transformer model accept multi-modal instruction to generate image from [OmniGen](https://github.com/VectorSpaceLab/OmniGen/).
16+
17+
## OmniGenTransformer2DModel
18+
19+
[[autodoc]] OmniGenTransformer2DModel

docs/source/en/using-diffusers/omnigen.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ pipe = OmniGenPipeline.from_pretrained(
4646
torch_dtype=torch.bfloat16
4747
)
4848

49-
prompt = "An elderly gentleman, with a serene expression, sits at the water's edge, a steaming cup of tea by his side. He is engrossed in his artwork, brush in hand, as he renders an oil painting on a canvas that's propped up against a small, weathered table. The sea breeze whispers through his silver hair, gently billowing his loose-fitting white shirt, while the salty air adds an intangible element to his masterpiece in progress. The scene is one of tranquility and inspiration, with the artist's canvas capturing the vibrant hues of the setting sun reflecting off the tranquil sea."
49+
prompt = "A young woman sits on a sofa, holding a book and facing the camera. She wears delicate silver hoop earrings adorned with tiny, sparkling diamonds that catch the light, with her long chestnut hair cascading over her shoulders. Her eyes are focused and gentle, framed by long, dark lashes. She is dressed in a cozy cream sweater, which complements her warm, inviting smile. Behind her, there is a table with a cup of water in a sleek, minimalist blue mug. The background is a serene indoor setting with soft natural light filtering through a window, adorned with tasteful art and flowers, creating a cozy and peaceful ambiance. 4K, HD."
5050
pipe.enable_model_cpu_offload()
5151

5252
image = pipe(

src/diffusers/models/transformers/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,6 @@
1818
from .transformer_cogview3plus import CogView3PlusTransformer2DModel
1919
from .transformer_flux import FluxTransformer2DModel
2020
from .transformer_mochi import MochiTransformer3DModel
21+
from .transformer_omnigen import OmniGenTransformer2DModel
2122
from .transformer_sd3 import SD3Transformer2DModel
2223
from .transformer_temporal import TransformerTemporalModel
23-
from .transformer_omnigen import OmniGenTransformer2DModel

src/diffusers/models/transformers/transformer_omnigen.py

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -225,15 +225,10 @@ class OmniGenTransformer2DModel(ModelMixin, ConfigMixin, PeftAdapterMixin):
225225
Reference: https://arxiv.org/pdf/2409.11340
226226
227227
Parameters:
228+
transformer_config (`dict`): config for transformer layers. OmniGen-v1 use Phi3 as transformer backbone
228229
patch_size (`int`, defaults to 2): Patch size to turn the input data into small patches.
229230
in_channels (`int`, *optional*, defaults to 4): The number of channels in the input.
230-
num_layers (`int`, *optional*, defaults to 18): The number of layers of MMDiT blocks to use.
231-
num_single_layers (`int`, *optional*, defaults to 18): The number of layers of single DiT blocks to use.
232-
attention_head_dim (`int`, *optional*, defaults to 64): The number of channels in each head.
233-
num_attention_heads (`int`, *optional*, defaults to 18): The number of heads to use for multi-head attention.
234-
joint_attention_dim (`int`, *optional*): The number of `encoder_hidden_states` dimensions to use.
235-
pooled_projection_dim (`int`): Number of dimensions to use when projecting the `pooled_projections`.
236-
guidance_embeds (`bool`, defaults to False): Whether to use guidance embeddings.
231+
pos_embed_max_size (`int`, *optional*, defaults to 192): The max size of pos emb.
237232
"""
238233
_supports_gradient_checkpointing = True
239234

src/diffusers/pipelines/lumina/pipeline_lumina.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
from ...image_processor import VaeImageProcessor
2626
from ...models import AutoencoderKL
2727
from ...models.embeddings import get_2d_rotary_pos_embed_lumina
28-
from ...models.transformers.lumina_nextdit2d import LuminaextDiT2DModel
28+
from ...models.transformers.lumina_nextdit2d import LuminaNextDiT2DModel
2929
from ...schedulers import FlowMatchEulerDiscreteScheduler
3030
from ...utils import (
3131
BACKENDS_MAPPING,

0 commit comments

Comments
 (0)