Skip to content

Commit 65e76fe

Browse files
committed
feat: add lumina2 accessory pipeline
1 parent efb7a29 commit 65e76fe

File tree

14 files changed

+1881
-2
lines changed

14 files changed

+1881
-2
lines changed
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
<!-- Copyright 2025 The HuggingFace Team. All rights reserved.
2+
3+
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
4+
the License. You may obtain a copy of the License at
5+
6+
http://www.apache.org/licenses/LICENSE-2.0
7+
8+
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
9+
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
10+
specific language governing permissions and limitations under the License. -->
11+
12+
# Lumina2AccessoryTransformer2DModel
13+
14+
A Diffusion Transformer model for 2D data from [Lumina-Accessory](https://github.com/Alpha-VLLM/Lumina-Accessory). by Alpha-VLLM.
15+
16+
The model can be loaded with the following code snippet.
17+
18+
```python
19+
from diffusers import Lumina2AccessoryTransformer2DModel
20+
21+
ckpt_path = "https://huggingface.co/Alpha-VLLM/Lumina-Accessory/blob/main/consolidated.00-of-01.pth"
22+
transformer = Lumina2AccessoryTransformer2DModel.from_single_file(ckpt_path, torch_dtype=torch.bfloat16)
23+
```
24+
25+
## Lumina2AccessoryTransformer2DModel
26+
27+
[[autodoc]] Lumina2AccessoryTransformer2DModel
28+
29+
## Transformer2DModelOutput
30+
31+
[[autodoc]] models.modeling_outputs.Transformer2DModelOutput

docs/source/en/api/pipelines/lumina2.md

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,8 +80,53 @@ image = pipe(
8080
image.save("lumina-gguf.png")
8181
```
8282

83+
## Lumina Accessory
84+
85+
Lumina-Accessory is a multi-task instruction fine-tuning framework designed for the Lumina series. The official repository is from [Alpha-VLLM/Lumina-Accessory](https://github.com/Alpha-VLLM/Lumina-Accessory)
86+
87+
```python
88+
import torch
89+
from diffusers import Lumina2AccessoryPipeline, Lumina2AccessoryTransformer2DModel
90+
from diffusers.utils import load_image
91+
92+
ckpt_path = "https://huggingface.co/Alpha-VLLM/Lumina-Accessory/blob/main/consolidated.00-of-01.pth"
93+
transformer = Lumina2AccessoryTransformer2DModel.from_single_file(ckpt_path, torch_dtype=torch.bfloat16)
94+
pipe = Lumina2AccessoryPipeline.from_pretrained(
95+
"Alpha-VLLM/Lumina-Image-2.0", transformer=transformer, torch_dtype=torch.bfloat16
96+
)
97+
98+
# Enable memory optimizations.
99+
pipe.enable_model_cpu_offload()
100+
101+
img = load_image("https://github.com/Alpha-VLLM/Lumina-Accessory/blob/main/examples/case_1_condition.jpg?raw=true")
102+
prompt = "A classical oil painting of a young woman dressed in a modern DARK BLACK leather jacket."
103+
system_prompt = "You are an assistant designed to generate superior images with the highest degree of image-text alignment based on textual prompts and a partially masked image."
104+
image = pipe(
105+
image=img,
106+
prompt=prompt,
107+
system_prompt=system_prompt,
108+
width=img.size[0],
109+
height=img.size[1],
110+
negative_prompt=" ",
111+
num_inference_steps=25,
112+
num_images_per_prompt=1,
113+
guidance_scale=4.0,
114+
cfg_trunc_ratio=1.0,
115+
cfg_normalization=True,
116+
generator=torch.Generator().manual_seed(42),
117+
).images[0]
118+
image.save("lumina2_accessory_image_infliling.png")
119+
```
120+
83121
## Lumina2Pipeline
84122

85123
[[autodoc]] Lumina2Pipeline
86124
- all
87125
- __call__
126+
127+
128+
## Lumina2AccessoryPipeline
129+
130+
[[autodoc]] Lumina2AccessoryPipeline
131+
- all
132+
- __call__

src/diffusers/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -221,6 +221,7 @@
221221
"Kandinsky3UNet",
222222
"LatteTransformer3DModel",
223223
"LTXVideoTransformer3DModel",
224+
"Lumina2AccessoryTransformer2DModel",
224225
"Lumina2Transformer2DModel",
225226
"LuminaNextDiT2DModel",
226227
"MochiTransformer3DModel",
@@ -496,6 +497,7 @@
496497
"LTXLatentUpsamplePipeline",
497498
"LTXPipeline",
498499
"LucyEditPipeline",
500+
"Lumina2AccessoryPipeline",
499501
"Lumina2Pipeline",
500502
"Lumina2Text2ImgPipeline",
501503
"LuminaPipeline",
@@ -906,6 +908,7 @@
906908
Kandinsky3UNet,
907909
LatteTransformer3DModel,
908910
LTXVideoTransformer3DModel,
911+
Lumina2AccessoryTransformer2DModel,
909912
Lumina2Transformer2DModel,
910913
LuminaNextDiT2DModel,
911914
MochiTransformer3DModel,
@@ -1151,6 +1154,7 @@
11511154
LTXLatentUpsamplePipeline,
11521155
LTXPipeline,
11531156
LucyEditPipeline,
1157+
Lumina2AccessoryPipeline,
11541158
Lumina2Pipeline,
11551159
Lumina2Text2ImgPipeline,
11561160
LuminaPipeline,

src/diffusers/loaders/single_file_model.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,10 @@
134134
"checkpoint_mapping_fn": convert_lumina2_to_diffusers,
135135
"default_subfolder": "transformer",
136136
},
137+
"Lumina2AccessoryTransformer2DModel": {
138+
"checkpoint_mapping_fn": convert_lumina2_to_diffusers,
139+
"default_subfolder": "transformer",
140+
},
137141
"SanaTransformer2DModel": {
138142
"checkpoint_mapping_fn": convert_sana_transformer_to_diffusers,
139143
"default_subfolder": "transformer",

src/diffusers/models/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@
9292
_import_structure["transformers.transformer_hunyuan_video_framepack"] = ["HunyuanVideoFramepackTransformer3DModel"]
9393
_import_structure["transformers.transformer_ltx"] = ["LTXVideoTransformer3DModel"]
9494
_import_structure["transformers.transformer_lumina2"] = ["Lumina2Transformer2DModel"]
95+
_import_structure["transformers.transformer_lumina2_accessory"] = ["Lumina2AccessoryTransformer2DModel"]
9596
_import_structure["transformers.transformer_mochi"] = ["MochiTransformer3DModel"]
9697
_import_structure["transformers.transformer_omnigen"] = ["OmniGenTransformer2DModel"]
9798
_import_structure["transformers.transformer_qwenimage"] = ["QwenImageTransformer2DModel"]
@@ -182,6 +183,7 @@
182183
HunyuanVideoTransformer3DModel,
183184
LatteTransformer3DModel,
184185
LTXVideoTransformer3DModel,
186+
Lumina2AccessoryTransformer2DModel,
185187
Lumina2Transformer2DModel,
186188
LuminaNextDiT2DModel,
187189
MochiTransformer3DModel,

src/diffusers/models/transformers/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
from .transformer_hunyuan_video_framepack import HunyuanVideoFramepackTransformer3DModel
3030
from .transformer_ltx import LTXVideoTransformer3DModel
3131
from .transformer_lumina2 import Lumina2Transformer2DModel
32+
from .transformer_lumina2_accessory import Lumina2AccessoryTransformer2DModel
3233
from .transformer_mochi import MochiTransformer3DModel
3334
from .transformer_omnigen import OmniGenTransformer2DModel
3435
from .transformer_qwenimage import QwenImageTransformer2DModel

0 commit comments

Comments
 (0)