huggingface
diff --git a/‎docs/source/en/api/pipelines/flux.md‎
Lines changed: 59 additions & 0 deletions b/‎docs/source/en/api/pipelines/flux.md‎
Lines changed: 59 additions & 0 deletions
diff --git a/‎examples/cogvideo/train_cogvideox_image_to_video_lora.py‎
Lines changed: 1 addition & 2 deletions b/‎examples/cogvideo/train_cogvideox_image_to_video_lora.py‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎examples/cogvideo/train_cogvideox_lora.py‎
Lines changed: 1 addition & 2 deletions b/‎examples/cogvideo/train_cogvideox_lora.py‎
Lines changed: 1 addition & 2 deletions
@@ -143,6 +143,35 @@ image = pipe(
 image.save("output.png")
 ```
 
+Canny Control is also possible with a LoRA variant of this condition. The usage is as follows:
+
+```python
+# !pip install -U controlnet-aux
+import torch
+from controlnet_aux import CannyDetector
+from diffusers import FluxControlPipeline
+from diffusers.utils import load_image
+
+pipe = FluxControlPipeline.from_pretrained("black-forest-labs/FLUX.1-dev", torch_dtype=torch.bfloat16).to("cuda")
+pipe.load_lora_weights("black-forest-labs/FLUX.1-Canny-dev-lora")
+
+prompt = "A robot made of exotic candies and chocolates of different kinds. The background is filled with confetti and celebratory gifts."
+control_image = load_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/robot.png")
+
+processor = CannyDetector()
+control_image = processor(control_image, low_threshold=50, high_threshold=200, detect_resolution=1024, image_resolution=1024)
+
+image = pipe(
+    prompt=prompt,
+    control_image=control_image,
+    height=1024,
+    width=1024,
+    num_inference_steps=50,
+    guidance_scale=30.0,
+).images[0]
+image.save("output.png")
+```
+
 ### Depth Control
 
 **Note:** `black-forest-labs/Flux.1-Depth-dev` is _not_ a ControlNet model. [`ControlNetModel`] models are a separate component from the UNet/Transformer whose residuals are added to the actual underlying model. Depth Control is an alternate architecture that achieves effectively the same results as a ControlNet model would, by using channel-wise concatenation with input control condition and ensuring the transformer learns structure control by following the condition as closely as possible.
@@ -174,6 +203,36 @@ image = pipe(
 image.save("output.png")
 ```
 
+Depth Control is also possible with a LoRA variant of this condition. The usage is as follows:
+
+```python
+# !pip install git+https://github.com/huggingface/image_gen_aux
+import torch
+from diffusers import FluxControlPipeline, FluxTransformer2DModel
+from diffusers.utils import load_image
+from image_gen_aux import DepthPreprocessor
+
+pipe = FluxControlPipeline.from_pretrained("black-forest-labs/FLUX.1-dev", torch_dtype=torch.bfloat16).to("cuda")
+pipe.load_lora_weights("black-forest-labs/FLUX.1-Depth-dev-lora")
+
+prompt = "A robot made of exotic candies and chocolates of different kinds. The background is filled with confetti and celebratory gifts."
+control_image = load_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/robot.png")
+
+processor = DepthPreprocessor.from_pretrained("LiheYoung/depth-anything-large-hf")
+control_image = processor(control_image)[0].convert("RGB")
+
+image = pipe(
+    prompt=prompt,
+    control_image=control_image,
+    height=1024,
+    width=1024,
+    num_inference_steps=30,
+    guidance_scale=10.0,
+    generator=torch.Generator().manual_seed(42),
+).images[0]
+image.save("output.png")
+```
+
 ### Redux
 
 * Flux Redux pipeline is an adapter for FLUX.1 base models. It can be used with both flux-dev and flux-schnell, for image-to-image generation.
 
@@ -872,10 +872,9 @@ def prepare_rotary_positional_embeddings(
         crops_coords=grid_crops_coords,
         grid_size=(grid_height, grid_width),
         temporal_size=num_frames,
+        device=device,
     )
 
-    freqs_cos = freqs_cos.to(device=device)
-    freqs_sin = freqs_sin.to(device=device)
     return freqs_cos, freqs_sin
 
 
 
@@ -894,10 +894,9 @@ def prepare_rotary_positional_embeddings(
         crops_coords=grid_crops_coords,
         grid_size=(grid_height, grid_width),
         temporal_size=num_frames,
+        device=device,
     )
 
-    freqs_cos = freqs_cos.to(device=device)
-    freqs_sin = freqs_sin.to(device=device)
     return freqs_cos, freqs_sin
Original file line number	Diff line number	Diff line change
`@@ -872,10 +872,9 @@ def prepare_rotary_positional_embeddings(`
`872`	`872`	`crops_coords=grid_crops_coords,`
`873`	`873`	`grid_size=(grid_height, grid_width),`
`874`	`874`	`temporal_size=num_frames,`
	`875`	`+ device=device,`
`875`	`876`	`)`
`876`	`877`
`877`		`- freqs_cos = freqs_cos.to(device=device)`
`878`		`- freqs_sin = freqs_sin.to(device=device)`
`879`	`878`	`return freqs_cos, freqs_sin`
`880`	`879`
`881`	`880`
Original file line number	Diff line number	Diff line change
`@@ -894,10 +894,9 @@ def prepare_rotary_positional_embeddings(`
`894`	`894`	`crops_coords=grid_crops_coords,`
`895`	`895`	`grid_size=(grid_height, grid_width),`
`896`	`896`	`temporal_size=num_frames,`
	`897`	`+ device=device,`
`897`	`898`	`)`
`898`	`899`
`899`		`- freqs_cos = freqs_cos.to(device=device)`
`900`		`- freqs_sin = freqs_sin.to(device=device)`
`901`	`900`	`return freqs_cos, freqs_sin`
`902`	`901`
`903`	`902`