Skip to content

Commit 2c91843

Browse files
authored
Merge branch 'main' into fix-dtype-mismatch
2 parents 2ec9ffa + 124ac3e commit 2c91843

File tree

23 files changed

+174
-67
lines changed

23 files changed

+174
-67
lines changed

docs/source/en/api/pipelines/lumina.md

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -58,10 +58,10 @@ Use [`torch.compile`](https://huggingface.co/docs/diffusers/main/en/tutorials/fa
5858
First, load the pipeline:
5959

6060
```python
61-
from diffusers import LuminaText2ImgPipeline
61+
from diffusers import LuminaPipeline
6262
import torch
6363

64-
pipeline = LuminaText2ImgPipeline.from_pretrained(
64+
pipeline = LuminaPipeline.from_pretrained(
6565
"Alpha-VLLM/Lumina-Next-SFT-diffusers", torch_dtype=torch.bfloat16
6666
).to("cuda")
6767
```
@@ -86,11 +86,11 @@ image = pipeline(prompt="Upper body of a young woman in a Victorian-era outfit w
8686

8787
Quantization helps reduce the memory requirements of very large models by storing model weights in a lower precision data type. However, quantization may have varying impact on video quality depending on the video model.
8888

89-
Refer to the [Quantization](../../quantization/overview) overview to learn more about supported quantization backends and selecting a quantization backend that supports your use case. The example below demonstrates how to load a quantized [`LuminaText2ImgPipeline`] for inference with bitsandbytes.
89+
Refer to the [Quantization](../../quantization/overview) overview to learn more about supported quantization backends and selecting a quantization backend that supports your use case. The example below demonstrates how to load a quantized [`LuminaPipeline`] for inference with bitsandbytes.
9090

9191
```py
9292
import torch
93-
from diffusers import BitsAndBytesConfig as DiffusersBitsAndBytesConfig, Transformer2DModel, LuminaText2ImgPipeline
93+
from diffusers import BitsAndBytesConfig as DiffusersBitsAndBytesConfig, Transformer2DModel, LuminaPipeline
9494
from transformers import BitsAndBytesConfig as BitsAndBytesConfig, T5EncoderModel
9595

9696
quant_config = BitsAndBytesConfig(load_in_8bit=True)
@@ -109,7 +109,7 @@ transformer_8bit = Transformer2DModel.from_pretrained(
109109
torch_dtype=torch.float16,
110110
)
111111

112-
pipeline = LuminaText2ImgPipeline.from_pretrained(
112+
pipeline = LuminaPipeline.from_pretrained(
113113
"Alpha-VLLM/Lumina-Next-SFT-diffusers",
114114
text_encoder=text_encoder_8bit,
115115
transformer=transformer_8bit,
@@ -122,9 +122,9 @@ image = pipeline(prompt).images[0]
122122
image.save("lumina.png")
123123
```
124124

125-
## LuminaText2ImgPipeline
125+
## LuminaPipeline
126126

127-
[[autodoc]] LuminaText2ImgPipeline
127+
[[autodoc]] LuminaPipeline
128128
- all
129129
- __call__
130130

docs/source/en/api/pipelines/lumina2.md

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -36,14 +36,14 @@ Single file loading for Lumina Image 2.0 is available for the `Lumina2Transforme
3636

3737
```python
3838
import torch
39-
from diffusers import Lumina2Transformer2DModel, Lumina2Text2ImgPipeline
39+
from diffusers import Lumina2Transformer2DModel, Lumina2Pipeline
4040

4141
ckpt_path = "https://huggingface.co/Alpha-VLLM/Lumina-Image-2.0/blob/main/consolidated.00-of-01.pth"
4242
transformer = Lumina2Transformer2DModel.from_single_file(
4343
ckpt_path, torch_dtype=torch.bfloat16
4444
)
4545

46-
pipe = Lumina2Text2ImgPipeline.from_pretrained(
46+
pipe = Lumina2Pipeline.from_pretrained(
4747
"Alpha-VLLM/Lumina-Image-2.0", transformer=transformer, torch_dtype=torch.bfloat16
4848
)
4949
pipe.enable_model_cpu_offload()
@@ -60,7 +60,7 @@ image.save("lumina-single-file.png")
6060
GGUF Quantized checkpoints for the `Lumina2Transformer2DModel` can be loaded via `from_single_file` with the `GGUFQuantizationConfig`
6161

6262
```python
63-
from diffusers import Lumina2Transformer2DModel, Lumina2Text2ImgPipeline, GGUFQuantizationConfig
63+
from diffusers import Lumina2Transformer2DModel, Lumina2Pipeline, GGUFQuantizationConfig
6464

6565
ckpt_path = "https://huggingface.co/calcuis/lumina-gguf/blob/main/lumina2-q4_0.gguf"
6666
transformer = Lumina2Transformer2DModel.from_single_file(
@@ -69,7 +69,7 @@ transformer = Lumina2Transformer2DModel.from_single_file(
6969
torch_dtype=torch.bfloat16,
7070
)
7171

72-
pipe = Lumina2Text2ImgPipeline.from_pretrained(
72+
pipe = Lumina2Pipeline.from_pretrained(
7373
"Alpha-VLLM/Lumina-Image-2.0", transformer=transformer, torch_dtype=torch.bfloat16
7474
)
7575
pipe.enable_model_cpu_offload()
@@ -80,8 +80,8 @@ image = pipe(
8080
image.save("lumina-gguf.png")
8181
```
8282

83-
## Lumina2Text2ImgPipeline
83+
## Lumina2Pipeline
8484

85-
[[autodoc]] Lumina2Text2ImgPipeline
85+
[[autodoc]] Lumina2Pipeline
8686
- all
8787
- __call__

examples/controlnet/train_controlnet.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -152,9 +152,7 @@ def log_validation(
152152
validation_prompt = log["validation_prompt"]
153153
validation_image = log["validation_image"]
154154

155-
formatted_images = []
156-
157-
formatted_images.append(np.asarray(validation_image))
155+
formatted_images = [np.asarray(validation_image)]
158156

159157
for image in images:
160158
formatted_images.append(np.asarray(image))

examples/controlnet/train_controlnet_flux.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -166,9 +166,7 @@ def log_validation(
166166
validation_prompt = log["validation_prompt"]
167167
validation_image = log["validation_image"]
168168

169-
formatted_images = []
170-
171-
formatted_images.append(np.asarray(validation_image))
169+
formatted_images = [np.asarray(validation_image)]
172170

173171
for image in images:
174172
formatted_images.append(np.asarray(image))

examples/controlnet/train_controlnet_sdxl.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -157,9 +157,7 @@ def log_validation(vae, unet, controlnet, args, accelerator, weight_dtype, step,
157157
validation_prompt = log["validation_prompt"]
158158
validation_image = log["validation_image"]
159159

160-
formatted_images = []
161-
162-
formatted_images.append(np.asarray(validation_image))
160+
formatted_images = [np.asarray(validation_image)]
163161

164162
for image in images:
165163
formatted_images.append(np.asarray(image))

examples/research_projects/controlnet/train_controlnet_webdataset.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -381,9 +381,7 @@ def log_validation(vae, unet, controlnet, args, accelerator, weight_dtype, step)
381381
validation_prompt = log["validation_prompt"]
382382
validation_image = log["validation_image"]
383383

384-
formatted_images = []
385-
386-
formatted_images.append(np.asarray(validation_image))
384+
formatted_images = [np.asarray(validation_image)]
387385

388386
for image in images:
389387
formatted_images.append(np.asarray(image))

examples/research_projects/pixart/train_pixart_controlnet_hf.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -164,9 +164,7 @@ def log_validation(
164164
validation_prompt = log["validation_prompt"]
165165
validation_image = log["validation_image"]
166166

167-
formatted_images = []
168-
169-
formatted_images.append(np.asarray(validation_image))
167+
formatted_images = [np.asarray(validation_image)]
170168

171169
for image in images:
172170
formatted_images.append(np.asarray(image))

examples/t2i_adapter/train_t2i_adapter_sdxl.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -141,9 +141,7 @@ def log_validation(vae, unet, adapter, args, accelerator, weight_dtype, step):
141141
validation_prompt = log["validation_prompt"]
142142
validation_image = log["validation_image"]
143143

144-
formatted_images = []
145-
146-
formatted_images.append(np.asarray(validation_image))
144+
formatted_images = [np.asarray(validation_image)]
147145

148146
for image in images:
149147
formatted_images.append(np.asarray(image))

scripts/convert_lumina_to_diffusers.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from safetensors.torch import load_file
66
from transformers import AutoModel, AutoTokenizer
77

8-
from diffusers import AutoencoderKL, FlowMatchEulerDiscreteScheduler, LuminaNextDiT2DModel, LuminaText2ImgPipeline
8+
from diffusers import AutoencoderKL, FlowMatchEulerDiscreteScheduler, LuminaNextDiT2DModel, LuminaPipeline
99

1010

1111
def main(args):
@@ -115,7 +115,7 @@ def main(args):
115115
tokenizer = AutoTokenizer.from_pretrained("google/gemma-2b")
116116
text_encoder = AutoModel.from_pretrained("google/gemma-2b")
117117

118-
pipeline = LuminaText2ImgPipeline(
118+
pipeline = LuminaPipeline(
119119
tokenizer=tokenizer, text_encoder=text_encoder, transformer=transformer, vae=vae, scheduler=scheduler
120120
)
121121
pipeline.save_pretrained(args.dump_path)

src/diffusers/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -403,7 +403,9 @@
403403
"LEditsPPPipelineStableDiffusionXL",
404404
"LTXImageToVideoPipeline",
405405
"LTXPipeline",
406+
"Lumina2Pipeline",
406407
"Lumina2Text2ImgPipeline",
408+
"LuminaPipeline",
407409
"LuminaText2ImgPipeline",
408410
"MarigoldDepthPipeline",
409411
"MarigoldIntrinsicsPipeline",
@@ -945,7 +947,9 @@
945947
LEditsPPPipelineStableDiffusionXL,
946948
LTXImageToVideoPipeline,
947949
LTXPipeline,
950+
Lumina2Pipeline,
948951
Lumina2Text2ImgPipeline,
952+
LuminaPipeline,
949953
LuminaText2ImgPipeline,
950954
MarigoldDepthPipeline,
951955
MarigoldIntrinsicsPipeline,

0 commit comments

Comments
 (0)