Skip to content

Commit 0a633e4

Browse files
authored
Merge branch 'main' into main
2 parents a597713 + 0967593 commit 0a633e4

24 files changed

+2824
-62
lines changed

docs/source/en/api/pipelines/flux.md

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,35 @@ image = pipe(
143143
image.save("output.png")
144144
```
145145

146+
Canny Control is also possible with a LoRA variant of this condition. The usage is as follows:
147+
148+
```python
149+
# !pip install -U controlnet-aux
150+
import torch
151+
from controlnet_aux import CannyDetector
152+
from diffusers import FluxControlPipeline
153+
from diffusers.utils import load_image
154+
155+
pipe = FluxControlPipeline.from_pretrained("black-forest-labs/FLUX.1-dev", torch_dtype=torch.bfloat16).to("cuda")
156+
pipe.load_lora_weights("black-forest-labs/FLUX.1-Canny-dev-lora")
157+
158+
prompt = "A robot made of exotic candies and chocolates of different kinds. The background is filled with confetti and celebratory gifts."
159+
control_image = load_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/robot.png")
160+
161+
processor = CannyDetector()
162+
control_image = processor(control_image, low_threshold=50, high_threshold=200, detect_resolution=1024, image_resolution=1024)
163+
164+
image = pipe(
165+
prompt=prompt,
166+
control_image=control_image,
167+
height=1024,
168+
width=1024,
169+
num_inference_steps=50,
170+
guidance_scale=30.0,
171+
).images[0]
172+
image.save("output.png")
173+
```
174+
146175
### Depth Control
147176

148177
**Note:** `black-forest-labs/Flux.1-Depth-dev` is _not_ a ControlNet model. [`ControlNetModel`] models are a separate component from the UNet/Transformer whose residuals are added to the actual underlying model. Depth Control is an alternate architecture that achieves effectively the same results as a ControlNet model would, by using channel-wise concatenation with input control condition and ensuring the transformer learns structure control by following the condition as closely as possible.
@@ -174,6 +203,36 @@ image = pipe(
174203
image.save("output.png")
175204
```
176205

206+
Depth Control is also possible with a LoRA variant of this condition. The usage is as follows:
207+
208+
```python
209+
# !pip install git+https://github.com/huggingface/image_gen_aux
210+
import torch
211+
from diffusers import FluxControlPipeline, FluxTransformer2DModel
212+
from diffusers.utils import load_image
213+
from image_gen_aux import DepthPreprocessor
214+
215+
pipe = FluxControlPipeline.from_pretrained("black-forest-labs/FLUX.1-dev", torch_dtype=torch.bfloat16).to("cuda")
216+
pipe.load_lora_weights("black-forest-labs/FLUX.1-Depth-dev-lora")
217+
218+
prompt = "A robot made of exotic candies and chocolates of different kinds. The background is filled with confetti and celebratory gifts."
219+
control_image = load_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/robot.png")
220+
221+
processor = DepthPreprocessor.from_pretrained("LiheYoung/depth-anything-large-hf")
222+
control_image = processor(control_image)[0].convert("RGB")
223+
224+
image = pipe(
225+
prompt=prompt,
226+
control_image=control_image,
227+
height=1024,
228+
width=1024,
229+
num_inference_steps=30,
230+
guidance_scale=10.0,
231+
generator=torch.Generator().manual_seed(42),
232+
).images[0]
233+
image.save("output.png")
234+
```
235+
177236
### Redux
178237

179238
* Flux Redux pipeline is an adapter for FLUX.1 base models. It can be used with both flux-dev and flux-schnell, for image-to-image generation.

docs/source/en/api/pipelines/pag.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,11 @@ Since RegEx is supported as a way for matching layer identifiers, it is crucial
4848
- all
4949
- __call__
5050

51+
## StableDiffusionPAGInpaintPipeline
52+
[[autodoc]] StableDiffusionPAGInpaintPipeline
53+
- all
54+
- __call__
55+
5156
## StableDiffusionPAGPipeline
5257
[[autodoc]] StableDiffusionPAGPipeline
5358
- all

examples/cogvideo/train_cogvideox_image_to_video_lora.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -872,10 +872,9 @@ def prepare_rotary_positional_embeddings(
872872
crops_coords=grid_crops_coords,
873873
grid_size=(grid_height, grid_width),
874874
temporal_size=num_frames,
875+
device=device,
875876
)
876877

877-
freqs_cos = freqs_cos.to(device=device)
878-
freqs_sin = freqs_sin.to(device=device)
879878
return freqs_cos, freqs_sin
880879

881880

examples/cogvideo/train_cogvideox_lora.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -894,10 +894,9 @@ def prepare_rotary_positional_embeddings(
894894
crops_coords=grid_crops_coords,
895895
grid_size=(grid_height, grid_width),
896896
temporal_size=num_frames,
897+
device=device,
897898
)
898899

899-
freqs_cos = freqs_cos.to(device=device)
900-
freqs_sin = freqs_sin.to(device=device)
901900
return freqs_cos, freqs_sin
902901

903902

examples/community/pipeline_flux_rf_inversion.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,10 @@
5353
Examples:
5454
```py
5555
>>> import torch
56-
>>> from diffusers import FluxPipeline
56+
>>> import requests
57+
>>> import PIL
58+
>>> from io import BytesIO
59+
>>> from diffusers import DiffusionPipeline
5760
5861
>>> pipe = DiffusionPipeline.from_pretrained(
5962
... "black-forest-labs/FLUX.1-dev",
@@ -77,10 +80,7 @@
7780
... image_latents=image_latents,
7881
... latent_image_ids=latent_image_ids,
7982
... start_timestep=0,
80-
... stop_timestep=.38,
81-
... num_inference_steps=28,
82-
... eta=0.9,
83-
... stop_timestep=.38,
83+
... stop_timestep=.25,
8484
... num_inference_steps=28,
8585
... eta=0.9,
8686
... ).images[0]

src/diffusers/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -365,6 +365,7 @@
365365
"StableDiffusionLDM3DPipeline",
366366
"StableDiffusionModelEditingPipeline",
367367
"StableDiffusionPAGImg2ImgPipeline",
368+
"StableDiffusionPAGInpaintPipeline",
368369
"StableDiffusionPAGPipeline",
369370
"StableDiffusionPanoramaPipeline",
370371
"StableDiffusionParadigmsPipeline",
@@ -838,6 +839,7 @@
838839
StableDiffusionLDM3DPipeline,
839840
StableDiffusionModelEditingPipeline,
840841
StableDiffusionPAGImg2ImgPipeline,
842+
StableDiffusionPAGInpaintPipeline,
841843
StableDiffusionPAGPipeline,
842844
StableDiffusionPanoramaPipeline,
843845
StableDiffusionParadigmsPipeline,

src/diffusers/image_processor.py

Lines changed: 23 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -236,7 +236,7 @@ def denormalize(images: Union[np.ndarray, torch.Tensor]) -> Union[np.ndarray, to
236236
`np.ndarray` or `torch.Tensor`:
237237
The denormalized image array.
238238
"""
239-
return (images / 2 + 0.5).clamp(0, 1)
239+
return (images * 0.5 + 0.5).clamp(0, 1)
240240

241241
@staticmethod
242242
def convert_to_rgb(image: PIL.Image.Image) -> PIL.Image.Image:
@@ -537,6 +537,26 @@ def binarize(self, image: PIL.Image.Image) -> PIL.Image.Image:
537537

538538
return image
539539

540+
def _denormalize_conditionally(
541+
self, images: torch.Tensor, do_denormalize: Optional[List[bool]] = None
542+
) -> torch.Tensor:
543+
r"""
544+
Denormalize a batch of images based on a condition list.
545+
546+
Args:
547+
images (`torch.Tensor`):
548+
The input image tensor.
549+
do_denormalize (`Optional[List[bool]`, *optional*, defaults to `None`):
550+
A list of booleans indicating whether to denormalize each image in the batch. If `None`, will use the
551+
value of `do_normalize` in the `VaeImageProcessor` config.
552+
"""
553+
if do_denormalize is None:
554+
return self.denormalize(images) if self.config.do_normalize else images
555+
556+
return torch.stack(
557+
[self.denormalize(images[i]) if do_denormalize[i] else images[i] for i in range(images.shape[0])]
558+
)
559+
540560
def get_default_height_width(
541561
self,
542562
image: Union[PIL.Image.Image, np.ndarray, torch.Tensor],
@@ -752,12 +772,7 @@ def postprocess(
752772
if output_type == "latent":
753773
return image
754774

755-
if do_denormalize is None:
756-
do_denormalize = [self.config.do_normalize] * image.shape[0]
757-
758-
image = torch.stack(
759-
[self.denormalize(image[i]) if do_denormalize[i] else image[i] for i in range(image.shape[0])]
760-
)
775+
image = self._denormalize_conditionally(image, do_denormalize)
761776

762777
if output_type == "pt":
763778
return image
@@ -966,12 +981,7 @@ def postprocess(
966981
deprecate("Unsupported output_type", "1.0.0", deprecation_message, standard_warn=False)
967982
output_type = "np"
968983

969-
if do_denormalize is None:
970-
do_denormalize = [self.config.do_normalize] * image.shape[0]
971-
972-
image = torch.stack(
973-
[self.denormalize(image[i]) if do_denormalize[i] else image[i] for i in range(image.shape[0])]
974-
)
984+
image = self._denormalize_conditionally(image, do_denormalize)
975985

976986
image = self.pt_to_numpy(image)
977987

0 commit comments

Comments
 (0)