Skip to content

Commit 9b0eab4

Browse files
authored
Merge branch 'main' into cogvideox-ddim-inversion
2 parents 94eb110 + 24c062a commit 9b0eab4

File tree

99 files changed

+3331
-617
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

99 files changed

+3331
-617
lines changed

docs/source/en/conceptual/evaluation.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,11 @@ specific language governing permissions and limitations under the License.
1616
<img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
1717
</a>
1818

19+
> [!TIP]
20+
> This document has now grown outdated given the emergence of existing evaluation frameworks for diffusion models for image generation. Please check
21+
> out works like [HEIM](https://crfm.stanford.edu/helm/heim/latest/), [T2I-Compbench](https://arxiv.org/abs/2307.06350),
22+
> [GenEval](https://arxiv.org/abs/2310.11513).
23+
1924
Evaluation of generative models like [Stable Diffusion](https://huggingface.co/docs/diffusers/stable_diffusion) is subjective in nature. But as practitioners and researchers, we often have to make careful choices amongst many different possibilities. So, when working with different generative models (like GANs, Diffusion, etc.), how do we choose one over the other?
2025

2126
Qualitative evaluation of such models can be error-prone and might incorrectly influence a decision.

examples/advanced_diffusion_training/train_dreambooth_lora_flux_advanced.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -227,7 +227,7 @@ def log_validation(
227227
pipeline.set_progress_bar_config(disable=True)
228228

229229
# run inference
230-
generator = torch.Generator(device=accelerator.device).manual_seed(args.seed) if args.seed else None
230+
generator = torch.Generator(device=accelerator.device).manual_seed(args.seed) if args.seed is not None else None
231231
autocast_ctx = nullcontext()
232232

233233
with autocast_ctx:

examples/advanced_diffusion_training/train_dreambooth_lora_sd15_advanced.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1883,7 +1883,11 @@ def compute_text_embeddings(prompt, text_encoders, tokenizers):
18831883
pipeline.set_progress_bar_config(disable=True)
18841884

18851885
# run inference
1886-
generator = torch.Generator(device=accelerator.device).manual_seed(args.seed) if args.seed else None
1886+
generator = (
1887+
torch.Generator(device=accelerator.device).manual_seed(args.seed)
1888+
if args.seed is not None
1889+
else None
1890+
)
18871891
pipeline_args = {"prompt": args.validation_prompt}
18881892

18891893
if torch.backends.mps.is_available():
@@ -1987,7 +1991,9 @@ def compute_text_embeddings(prompt, text_encoders, tokenizers):
19871991
)
19881992
# run inference
19891993
pipeline = pipeline.to(accelerator.device)
1990-
generator = torch.Generator(device=accelerator.device).manual_seed(args.seed) if args.seed else None
1994+
generator = (
1995+
torch.Generator(device=accelerator.device).manual_seed(args.seed) if args.seed is not None else None
1996+
)
19911997
images = [
19921998
pipeline(args.validation_prompt, num_inference_steps=25, generator=generator).images[0]
19931999
for _ in range(args.num_validation_images)

examples/advanced_diffusion_training/train_dreambooth_lora_sdxl_advanced.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -269,7 +269,7 @@ def log_validation(
269269
pipeline.set_progress_bar_config(disable=True)
270270

271271
# run inference
272-
generator = torch.Generator(device=accelerator.device).manual_seed(args.seed) if args.seed else None
272+
generator = torch.Generator(device=accelerator.device).manual_seed(args.seed) if args.seed is not None else None
273273
# Currently the context determination is a bit hand-wavy. We can improve it in the future if there's a better
274274
# way to condition it. Reference: https://github.com/huggingface/diffusers/pull/7126#issuecomment-1968523051
275275
if torch.backends.mps.is_available() or "playground" in args.pretrained_model_name_or_path:

examples/cogvideo/train_cogvideox_image_to_video_lora.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -722,7 +722,7 @@ def log_validation(
722722
# pipe.set_progress_bar_config(disable=True)
723723

724724
# run inference
725-
generator = torch.Generator(device=accelerator.device).manual_seed(args.seed) if args.seed else None
725+
generator = torch.Generator(device=accelerator.device).manual_seed(args.seed) if args.seed is not None else None
726726

727727
videos = []
728728
for _ in range(args.num_validation_videos):

examples/cogvideo/train_cogvideox_lora.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -739,7 +739,7 @@ def log_validation(
739739
# pipe.set_progress_bar_config(disable=True)
740740

741741
# run inference
742-
generator = torch.Generator(device=accelerator.device).manual_seed(args.seed) if args.seed else None
742+
generator = torch.Generator(device=accelerator.device).manual_seed(args.seed) if args.seed is not None else None
743743

744744
videos = []
745745
for _ in range(args.num_validation_videos):

examples/community/README.md

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ Please also check out our [Community Scripts](https://github.com/huggingface/dif
5353
| Stable Diffusion Mixture Tiling Pipeline SD 1.5 | A pipeline generates cohesive images by integrating multiple diffusion processes, each focused on a specific image region and considering boundary effects for smooth blending | [Stable Diffusion Mixture Tiling Pipeline SD 1.5](#stable-diffusion-mixture-tiling-pipeline-sd-15) | [![Hugging Face Space](https://img.shields.io/badge/🤗%20Hugging%20Face-Space-yellow)](https://huggingface.co/spaces/albarji/mixture-of-diffusers) | [Álvaro B Jiménez](https://github.com/albarji/) |
5454
| Stable Diffusion Mixture Canvas Pipeline SD 1.5 | A pipeline generates cohesive images by integrating multiple diffusion processes, each focused on a specific image region and considering boundary effects for smooth blending. Works by defining a list of Text2Image region objects that detail the region of influence of each diffuser. | [Stable Diffusion Mixture Canvas Pipeline SD 1.5](#stable-diffusion-mixture-canvas-pipeline-sd-15) | [![Hugging Face Space](https://img.shields.io/badge/🤗%20Hugging%20Face-Space-yellow)](https://huggingface.co/spaces/albarji/mixture-of-diffusers) | [Álvaro B Jiménez](https://github.com/albarji/) |
5555
| Stable Diffusion Mixture Tiling Pipeline SDXL | A pipeline generates cohesive images by integrating multiple diffusion processes, each focused on a specific image region and considering boundary effects for smooth blending | [Stable Diffusion Mixture Tiling Pipeline SDXL](#stable-diffusion-mixture-tiling-pipeline-sdxl) | [![Hugging Face Space](https://img.shields.io/badge/🤗%20Hugging%20Face-Space-yellow)](https://huggingface.co/spaces/elismasilva/mixture-of-diffusers-sdxl-tiling) | [Eliseu Silva](https://github.com/DEVAIEXP/) |
56+
| Stable Diffusion MoD ControlNet Tile SR Pipeline SDXL | This is an advanced pipeline that leverages ControlNet Tile and Mixture-of-Diffusers techniques, integrating tile diffusion directly into the latent space denoising process. Designed to overcome the limitations of conventional pixel-space tile processing, this pipeline delivers Super Resolution (SR) upscaling for higher-quality images, reduced processing time, and greater adaptability. | [Stable Diffusion MoD ControlNet Tile SR Pipeline SDXL](#stable-diffusion-mod-controlnet-tile-sr-pipeline-sdxl) | [![Hugging Face Space](https://img.shields.io/badge/🤗%20Hugging%20Face-Space-yellow)](https://huggingface.co/spaces/elismasilva/mod-control-tile-upscaler-sdxl) | [Eliseu Silva](https://github.com/DEVAIEXP/) |
5657
| FABRIC - Stable Diffusion with feedback Pipeline | pipeline supports feedback from liked and disliked images | [Stable Diffusion Fabric Pipeline](#stable-diffusion-fabric-pipeline) | [Notebook](https://github.com/huggingface/notebooks/blob/main/diffusers/stable_diffusion_fabric.ipynb)| [Shauray Singh](https://shauray8.github.io/about_shauray/) |
5758
| sketch inpaint - Inpainting with non-inpaint Stable Diffusion | sketch inpaint much like in automatic1111 | [Masked Im2Im Stable Diffusion Pipeline](#stable-diffusion-masked-im2im) | - | [Anatoly Belikov](https://github.com/noskill) |
5859
| sketch inpaint xl - Inpainting with non-inpaint Stable Diffusion | sketch inpaint much like in automatic1111 | [Masked Im2Im Stable Diffusion XL Pipeline](#stable-diffusion-xl-masked-im2im) | - | [Anatoly Belikov](https://github.com/noskill) |
@@ -2631,6 +2632,103 @@ image = pipe(
26312632

26322633
![mixture_tiling_results](https://huggingface.co/datasets/elismasilva/results/resolve/main/mixture_of_diffusers_sdxl_1.png)
26332634

2635+
### Stable Diffusion MoD ControlNet Tile SR Pipeline SDXL
2636+
2637+
This pipeline implements the [MoD (Mixture-of-Diffusers)]("https://arxiv.org/pdf/2408.06072") tiled diffusion technique and combines it with SDXL's ControlNet Tile process to generate SR images.
2638+
2639+
This works better with 4x scales, but you can try adjusts parameters to higher scales.
2640+
2641+
````python
2642+
import torch
2643+
from diffusers import DiffusionPipeline, ControlNetUnionModel, AutoencoderKL, UniPCMultistepScheduler, UNet2DConditionModel
2644+
from diffusers.utils import load_image
2645+
from PIL import Image
2646+
2647+
device = "cuda"
2648+
2649+
# Initialize the models and pipeline
2650+
controlnet = ControlNetUnionModel.from_pretrained(
2651+
"brad-twinkl/controlnet-union-sdxl-1.0-promax", torch_dtype=torch.float16
2652+
).to(device=device)
2653+
vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16).to(device=device)
2654+
2655+
model_id = "SG161222/RealVisXL_V5.0"
2656+
pipe = DiffusionPipeline.from_pretrained(
2657+
model_id,
2658+
torch_dtype=torch.float16,
2659+
vae=vae,
2660+
controlnet=controlnet,
2661+
custom_pipeline="mod_controlnet_tile_sr_sdxl",
2662+
use_safetensors=True,
2663+
variant="fp16",
2664+
).to(device)
2665+
2666+
unet = UNet2DConditionModel.from_pretrained(model_id, subfolder="unet", variant="fp16", use_safetensors=True)
2667+
2668+
#pipe.enable_model_cpu_offload() # << Enable this if you have limited VRAM
2669+
pipe.enable_vae_tiling() # << Enable this if you have limited VRAM
2670+
pipe.enable_vae_slicing() # << Enable this if you have limited VRAM
2671+
2672+
# Set selected scheduler
2673+
pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)
2674+
2675+
# Load image
2676+
control_image = load_image("https://huggingface.co/datasets/DEVAIEXP/assets/resolve/main/1.jpg")
2677+
original_height = control_image.height
2678+
original_width = control_image.width
2679+
print(f"Current resolution: H:{original_height} x W:{original_width}")
2680+
2681+
# Pre-upscale image for tiling
2682+
resolution = 4096
2683+
tile_gaussian_sigma = 0.3
2684+
max_tile_size = 1024 # or 1280
2685+
2686+
current_size = max(control_image.size)
2687+
scale_factor = max(2, resolution / current_size)
2688+
new_size = (int(control_image.width * scale_factor), int(control_image.height * scale_factor))
2689+
image = control_image.resize(new_size, Image.LANCZOS)
2690+
2691+
# Update target height and width
2692+
target_height = image.height
2693+
target_width = image.width
2694+
print(f"Target resolution: H:{target_height} x W:{target_width}")
2695+
2696+
# Calculate overlap size
2697+
normal_tile_overlap, border_tile_overlap = pipe.calculate_overlap(target_width, target_height)
2698+
2699+
# Set other params
2700+
tile_weighting_method = pipe.TileWeightingMethod.COSINE.value
2701+
guidance_scale = 4
2702+
num_inference_steps = 35
2703+
denoising_strenght = 0.65
2704+
controlnet_strength = 1.0
2705+
prompt = "high-quality, noise-free edges, high quality, 4k, hd, 8k"
2706+
negative_prompt = "blurry, pixelated, noisy, low resolution, artifacts, poor details"
2707+
2708+
# Image generation
2709+
generated_image = pipe(
2710+
image=image,
2711+
control_image=control_image,
2712+
control_mode=[6],
2713+
controlnet_conditioning_scale=float(controlnet_strength),
2714+
prompt=prompt,
2715+
negative_prompt=negative_prompt,
2716+
normal_tile_overlap=normal_tile_overlap,
2717+
border_tile_overlap=border_tile_overlap,
2718+
height=target_height,
2719+
width=target_width,
2720+
original_size=(original_width, original_height),
2721+
target_size=(target_width, target_height),
2722+
guidance_scale=guidance_scale,
2723+
strength=float(denoising_strenght),
2724+
tile_weighting_method=tile_weighting_method,
2725+
max_tile_size=max_tile_size,
2726+
tile_gaussian_sigma=float(tile_gaussian_sigma),
2727+
num_inference_steps=num_inference_steps,
2728+
)["images"][0]
2729+
````
2730+
![Upscaled](https://huggingface.co/datasets/DEVAIEXP/assets/resolve/main/1_input_4x.png)
2731+
26342732
### TensorRT Inpainting Stable Diffusion Pipeline
26352733

26362734
The TensorRT Pipeline can be used to accelerate the Inpainting Stable Diffusion Inference run.

0 commit comments

Comments
 (0)