huggingface
diff --git a/‎docs/source/en/api/pipelines/flux.md‎
Lines changed: 59 additions & 0 deletions b/‎docs/source/en/api/pipelines/flux.md‎
Lines changed: 59 additions & 0 deletions
diff --git a/‎docs/source/en/api/pipelines/pag.md‎
Lines changed: 5 additions & 0 deletions b/‎docs/source/en/api/pipelines/pag.md‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎examples/cogvideo/train_cogvideox_image_to_video_lora.py‎
Lines changed: 1 addition & 2 deletions b/‎examples/cogvideo/train_cogvideox_image_to_video_lora.py‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎examples/cogvideo/train_cogvideox_lora.py‎
Lines changed: 1 addition & 2 deletions b/‎examples/cogvideo/train_cogvideox_lora.py‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎examples/community/pipeline_flux_rf_inversion.py‎
Lines changed: 5 additions & 5 deletions b/‎examples/community/pipeline_flux_rf_inversion.py‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎src/diffusers/__init__.py‎
Lines changed: 2 additions & 0 deletions b/‎src/diffusers/__init__.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎src/diffusers/image_processor.py‎
Lines changed: 23 additions & 13 deletions b/‎src/diffusers/image_processor.py‎
Lines changed: 23 additions & 13 deletions
@@ -143,6 +143,35 @@ image = pipe(
 image.save("output.png")
 ```
 
+Canny Control is also possible with a LoRA variant of this condition. The usage is as follows:
+
+```python
+# !pip install -U controlnet-aux
+import torch
+from controlnet_aux import CannyDetector
+from diffusers import FluxControlPipeline
+from diffusers.utils import load_image
+
+pipe = FluxControlPipeline.from_pretrained("black-forest-labs/FLUX.1-dev", torch_dtype=torch.bfloat16).to("cuda")
+pipe.load_lora_weights("black-forest-labs/FLUX.1-Canny-dev-lora")
+
+prompt = "A robot made of exotic candies and chocolates of different kinds. The background is filled with confetti and celebratory gifts."
+control_image = load_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/robot.png")
+
+processor = CannyDetector()
+control_image = processor(control_image, low_threshold=50, high_threshold=200, detect_resolution=1024, image_resolution=1024)
+
+image = pipe(
+    prompt=prompt,
+    control_image=control_image,
+    height=1024,
+    width=1024,
+    num_inference_steps=50,
+    guidance_scale=30.0,
+).images[0]
+image.save("output.png")
+```
+
 ### Depth Control
 
 **Note:** `black-forest-labs/Flux.1-Depth-dev` is _not_ a ControlNet model. [`ControlNetModel`] models are a separate component from the UNet/Transformer whose residuals are added to the actual underlying model. Depth Control is an alternate architecture that achieves effectively the same results as a ControlNet model would, by using channel-wise concatenation with input control condition and ensuring the transformer learns structure control by following the condition as closely as possible.
@@ -174,6 +203,36 @@ image = pipe(
 image.save("output.png")
 ```
 
+Depth Control is also possible with a LoRA variant of this condition. The usage is as follows:
+
+```python
+# !pip install git+https://github.com/huggingface/image_gen_aux
+import torch
+from diffusers import FluxControlPipeline, FluxTransformer2DModel
+from diffusers.utils import load_image
+from image_gen_aux import DepthPreprocessor
+
+pipe = FluxControlPipeline.from_pretrained("black-forest-labs/FLUX.1-dev", torch_dtype=torch.bfloat16).to("cuda")
+pipe.load_lora_weights("black-forest-labs/FLUX.1-Depth-dev-lora")
+
+prompt = "A robot made of exotic candies and chocolates of different kinds. The background is filled with confetti and celebratory gifts."
+control_image = load_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/robot.png")
+
+processor = DepthPreprocessor.from_pretrained("LiheYoung/depth-anything-large-hf")
+control_image = processor(control_image)[0].convert("RGB")
+
+image = pipe(
+    prompt=prompt,
+    control_image=control_image,
+    height=1024,
+    width=1024,
+    num_inference_steps=30,
+    guidance_scale=10.0,
+    generator=torch.Generator().manual_seed(42),
+).images[0]
+image.save("output.png")
+```
+
 ### Redux
 
 * Flux Redux pipeline is an adapter for FLUX.1 base models. It can be used with both flux-dev and flux-schnell, for image-to-image generation.
 
@@ -48,6 +48,11 @@ Since RegEx is supported as a way for matching layer identifiers, it is crucial
   - all
   - __call__
 
+## StableDiffusionPAGInpaintPipeline
+[[autodoc]] StableDiffusionPAGInpaintPipeline
+	- all
+	- __call__
+
 ## StableDiffusionPAGPipeline
 [[autodoc]] StableDiffusionPAGPipeline
 	- all
 
@@ -872,10 +872,9 @@ def prepare_rotary_positional_embeddings(
         crops_coords=grid_crops_coords,
         grid_size=(grid_height, grid_width),
         temporal_size=num_frames,
+        device=device,
     )
 
-    freqs_cos = freqs_cos.to(device=device)
-    freqs_sin = freqs_sin.to(device=device)
     return freqs_cos, freqs_sin
 
 
 
@@ -894,10 +894,9 @@ def prepare_rotary_positional_embeddings(
         crops_coords=grid_crops_coords,
         grid_size=(grid_height, grid_width),
         temporal_size=num_frames,
+        device=device,
     )
 
-    freqs_cos = freqs_cos.to(device=device)
-    freqs_sin = freqs_sin.to(device=device)
     return freqs_cos, freqs_sin
 
 
 
@@ -53,7 +53,10 @@
     Examples:
         ```py
         >>> import torch
-        >>> from diffusers import FluxPipeline
+        >>> import requests
+        >>> import PIL
+        >>> from io import BytesIO
+        >>> from diffusers import DiffusionPipeline
 
         >>> pipe = DiffusionPipeline.from_pretrained(
         ...    "black-forest-labs/FLUX.1-dev",
@@ -77,10 +80,7 @@
         ...     image_latents=image_latents,
         ...     latent_image_ids=latent_image_ids,
         ...     start_timestep=0,
-        ...     stop_timestep=.38,
-        ...     num_inference_steps=28,
-        ...     eta=0.9,
-        ...     stop_timestep=.38,
+        ...     stop_timestep=.25,
         ...     num_inference_steps=28,
         ...     eta=0.9,
         ... ).images[0]
 
@@ -365,6 +365,7 @@
             "StableDiffusionLDM3DPipeline",
             "StableDiffusionModelEditingPipeline",
             "StableDiffusionPAGImg2ImgPipeline",
+            "StableDiffusionPAGInpaintPipeline",
             "StableDiffusionPAGPipeline",
             "StableDiffusionPanoramaPipeline",
             "StableDiffusionParadigmsPipeline",
@@ -838,6 +839,7 @@
             StableDiffusionLDM3DPipeline,
             StableDiffusionModelEditingPipeline,
             StableDiffusionPAGImg2ImgPipeline,
+            StableDiffusionPAGInpaintPipeline,
             StableDiffusionPAGPipeline,
             StableDiffusionPanoramaPipeline,
             StableDiffusionParadigmsPipeline,
 
@@ -236,7 +236,7 @@ def denormalize(images: Union[np.ndarray, torch.Tensor]) -> Union[np.ndarray, to
             `np.ndarray` or `torch.Tensor`:
                 The denormalized image array.
         """
-        return (images / 2 + 0.5).clamp(0, 1)
+        return (images * 0.5 + 0.5).clamp(0, 1)
 
     @staticmethod
     def convert_to_rgb(image: PIL.Image.Image) -> PIL.Image.Image:
@@ -537,6 +537,26 @@ def binarize(self, image: PIL.Image.Image) -> PIL.Image.Image:
 
         return image
 
+    def _denormalize_conditionally(
+        self, images: torch.Tensor, do_denormalize: Optional[List[bool]] = None
+    ) -> torch.Tensor:
+        r"""
+        Denormalize a batch of images based on a condition list.
+
+        Args:
+            images (`torch.Tensor`):
+                The input image tensor.
+            do_denormalize (`Optional[List[bool]`, *optional*, defaults to `None`):
+                A list of booleans indicating whether to denormalize each image in the batch. If `None`, will use the
+                value of `do_normalize` in the `VaeImageProcessor` config.
+        """
+        if do_denormalize is None:
+            return self.denormalize(images) if self.config.do_normalize else images
+
+        return torch.stack(
+            [self.denormalize(images[i]) if do_denormalize[i] else images[i] for i in range(images.shape[0])]
+        )
+
     def get_default_height_width(
         self,
         image: Union[PIL.Image.Image, np.ndarray, torch.Tensor],
@@ -752,12 +772,7 @@ def postprocess(
         if output_type == "latent":
             return image
 
-        if do_denormalize is None:
-            do_denormalize = [self.config.do_normalize] * image.shape[0]
-
-        image = torch.stack(
-            [self.denormalize(image[i]) if do_denormalize[i] else image[i] for i in range(image.shape[0])]
-        )
+        image = self._denormalize_conditionally(image, do_denormalize)
 
         if output_type == "pt":
             return image
@@ -966,12 +981,7 @@ def postprocess(
             deprecate("Unsupported output_type", "1.0.0", deprecation_message, standard_warn=False)
             output_type = "np"
 
-        if do_denormalize is None:
-            do_denormalize = [self.config.do_normalize] * image.shape[0]
-
-        image = torch.stack(
-            [self.denormalize(image[i]) if do_denormalize[i] else image[i] for i in range(image.shape[0])]
-        )
+        image = self._denormalize_conditionally(image, do_denormalize)
 
         image = self.pt_to_numpy(image)
Original file line number	Diff line number	Diff line change
`@@ -872,10 +872,9 @@ def prepare_rotary_positional_embeddings(`
`872`	`872`	`crops_coords=grid_crops_coords,`
`873`	`873`	`grid_size=(grid_height, grid_width),`
`874`	`874`	`temporal_size=num_frames,`
	`875`	`+ device=device,`
`875`	`876`	`)`
`876`	`877`
`877`		`- freqs_cos = freqs_cos.to(device=device)`
`878`		`- freqs_sin = freqs_sin.to(device=device)`
`879`	`878`	`return freqs_cos, freqs_sin`
`880`	`879`
`881`	`880`
Original file line number	Diff line number	Diff line change
`@@ -894,10 +894,9 @@ def prepare_rotary_positional_embeddings(`
`894`	`894`	`crops_coords=grid_crops_coords,`
`895`	`895`	`grid_size=(grid_height, grid_width),`
`896`	`896`	`temporal_size=num_frames,`
	`897`	`+ device=device,`
`897`	`898`	`)`
`898`	`899`
`899`		`- freqs_cos = freqs_cos.to(device=device)`
`900`		`- freqs_sin = freqs_sin.to(device=device)`
`901`	`900`	`return freqs_cos, freqs_sin`
`902`	`901`
`903`	`902`