diff --git a/docs/source/en/_toctree.yml b/docs/source/en/_toctree.yml index 283efeef72c1..770093438ed5 100644 --- a/docs/source/en/_toctree.yml +++ b/docs/source/en/_toctree.yml @@ -64,6 +64,8 @@ title: Overview - local: using-diffusers/create_a_server title: Create a server + - local: using-diffusers/batched_inference + title: Batch inference - local: training/distributed_inference title: Distributed inference - local: using-diffusers/scheduler_features diff --git a/docs/source/en/using-diffusers/batched_inference.md b/docs/source/en/using-diffusers/batched_inference.md new file mode 100644 index 000000000000..b5e55c27ca41 --- /dev/null +++ b/docs/source/en/using-diffusers/batched_inference.md @@ -0,0 +1,264 @@ + + +# Batch inference + +Batch inference processes multiple prompts at a time to increase throughput. It is more efficient because processing multiple prompts at once maximizes GPU usage versus processing a single prompt and underutilizing the GPU. + +The downside is increased latency because you must wait for the entire batch to complete, and more GPU memory is required for large batches. + + + + +For text-to-image, pass a list of prompts to the pipeline. + +```py +import torch +from diffusers import DiffusionPipeline + +pipeline = DiffusionPipeline.from_pretrained( + "stabilityai/stable-diffusion-xl-base-1.0", + torch_dtype=torch.float16 +).to("cuda") + +prompts = [ + "cinematic photo of A beautiful sunset over mountains, 35mm photograph, film, professional, 4k, highly detailed", + "cinematic film still of a cat basking in the sun on a roof in Turkey, highly detailed, high budget hollywood movie, cinemascope, moody, epic, gorgeous, film grain", + "pixel-art a cozy coffee shop interior, low-res, blocky, pixel art style, 8-bit graphics" +] + +images = pipeline( + prompt=prompts, +).images + +fig, axes = plt.subplots(2, 2, figsize=(12, 12)) +axes = axes.flatten() + +for i, image in enumerate(images): + axes[i].imshow(image) + axes[i].set_title(f"Image {i+1}") + axes[i].axis('off') + +plt.tight_layout() +plt.show() +``` + +To generate multiple variations of one prompt, use the `num_images_per_prompt` argument. + +```py +import torch +import matplotlib.pyplot as plt +from diffusers import DiffusionPipeline + +pipeline = DiffusionPipeline.from_pretrained( + "stabilityai/stable-diffusion-xl-base-1.0", + torch_dtype=torch.float16 +).to("cuda") + +images = pipeline( + prompt="pixel-art a cozy coffee shop interior, low-res, blocky, pixel art style, 8-bit graphics", + num_images_per_prompt=4 +).images + +fig, axes = plt.subplots(2, 2, figsize=(12, 12)) +axes = axes.flatten() + +for i, image in enumerate(images): + axes[i].imshow(image) + axes[i].set_title(f"Image {i+1}") + axes[i].axis('off') + +plt.tight_layout() +plt.show() +``` + +Combine both approaches to generate different variations of different prompts. + +```py +images = pipeline( + prompt=prompts, + num_images_per_prompt=2, +).images + +fig, axes = plt.subplots(2, 2, figsize=(12, 12)) +axes = axes.flatten() + +for i, image in enumerate(images): + axes[i].imshow(image) + axes[i].set_title(f"Image {i+1}") + axes[i].axis('off') + +plt.tight_layout() +plt.show() +``` + + + + +For image-to-image, pass a list of input images and prompts to the pipeline. + +```py +import torch +from diffusers.utils import load_image +from diffusers import DiffusionPipeline + +pipeline = DiffusionPipeline.from_pretrained( + "stabilityai/stable-diffusion-xl-base-1.0", + torch_dtype=torch.float16 +).to("cuda") + +input_images = [ + load_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/inpaint.png"), + load_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/cat.png"), + load_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/detail-prompt.png") +] + +prompts = [ + "cinematic photo of a beautiful sunset over mountains, 35mm photograph, film, professional, 4k, highly detailed", + "cinematic film still of a cat basking in the sun on a roof in Turkey, highly detailed, high budget hollywood movie, cinemascope, moody, epic, gorgeous, film grain", + "pixel-art a cozy coffee shop interior, low-res, blocky, pixel art style, 8-bit graphics" +] + +images = pipeline( + prompt=prompts, + image=input_images, + guidance_scale=8.0, + strength=0.5 +).images + +fig, axes = plt.subplots(2, 2, figsize=(12, 12)) +axes = axes.flatten() + +for i, image in enumerate(images): + axes[i].imshow(image) + axes[i].set_title(f"Image {i+1}") + axes[i].axis('off') + +plt.tight_layout() +plt.show() +``` + +To generate multiple variations of one prompt, use the `num_images_per_prompt` argument. + +```py +import torch +import matplotlib.pyplot as plt +from diffusers.utils import load_image +from diffusers import DiffusionPipeline + +pipeline = DiffusionPipeline.from_pretrained( + "stabilityai/stable-diffusion-xl-base-1.0", + torch_dtype=torch.float16 +).to("cuda") + +input_image = load_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/detail-prompt.png") + +images = pipeline( + prompt="pixel-art a cozy coffee shop interior, low-res, blocky, pixel art style, 8-bit graphics", + image=input_image, + num_images_per_prompt=4 +).images + +fig, axes = plt.subplots(2, 2, figsize=(12, 12)) +axes = axes.flatten() + +for i, image in enumerate(images): + axes[i].imshow(image) + axes[i].set_title(f"Image {i+1}") + axes[i].axis('off') + +plt.tight_layout() +plt.show() +``` + +Combine both approaches to generate different variations of different prompts. + +```py +input_images = [ + load_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/cat.png"), + load_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/detail-prompt.png") +] + +prompts = [ + "cinematic film still of a cat basking in the sun on a roof in Turkey, highly detailed, high budget hollywood movie, cinemascope, moody, epic, gorgeous, film grain", + "pixel-art a cozy coffee shop interior, low-res, blocky, pixel art style, 8-bit graphics" +] + +images = pipeline( + prompt=prompts, + image=input_images, + num_images_per_prompt=2, +).images + +fig, axes = plt.subplots(2, 2, figsize=(12, 12)) +axes = axes.flatten() + +for i, image in enumerate(images): + axes[i].imshow(image) + axes[i].set_title(f"Image {i+1}") + axes[i].axis('off') + +plt.tight_layout() +plt.show() +``` + + + + +## Deterministic generation + +Enable reproducible batch generation by passing a list of [Generator’s](https://pytorch.org/docs/stable/generated/torch.Generator.html) to the pipeline and tie each `Generator` to a seed to reuse it. + +Use a list comprehension to iterate over the batch size specified in `range()` to create a unique `Generator` object for each image in the batch. + +Don't multiply the `Generator` by the batch size because that only creates one `Generator` object that is used sequentially for each image in the batch. + +```py +generator = [torch.Generator(device="cuda").manual_seed(0)] * 3 +``` + +Pass the `generator` to the pipeline. + +```py +import torch +from diffusers import DiffusionPipeline + +pipeline = DiffusionPipeline.from_pretrained( + "stabilityai/stable-diffusion-xl-base-1.0", + torch_dtype=torch.float16 +).to("cuda") + +generator = [torch.Generator(device="cuda").manual_seed(i) for i in range(3)] +prompts = [ + "cinematic photo of A beautiful sunset over mountains, 35mm photograph, film, professional, 4k, highly detailed", + "cinematic film still of a cat basking in the sun on a roof in Turkey, highly detailed, high budget hollywood movie, cinemascope, moody, epic, gorgeous, film grain", + "pixel-art a cozy coffee shop interior, low-res, blocky, pixel art style, 8-bit graphics" +] + +images = pipeline( + prompt=prompts, + generator=generator +).images + +fig, axes = plt.subplots(2, 2, figsize=(12, 12)) +axes = axes.flatten() + +for i, image in enumerate(images): + axes[i].imshow(image) + axes[i].set_title(f"Image {i+1}") + axes[i].axis('off') + +plt.tight_layout() +plt.show() +``` + +You can use this to iteratively select an image associated with a seed and then improve on it by crafting a more detailed prompt. \ No newline at end of file diff --git a/docs/source/en/using-diffusers/reusing_seeds.md b/docs/source/en/using-diffusers/reusing_seeds.md index 60b8fee754f5..ac9350f24caa 100644 --- a/docs/source/en/using-diffusers/reusing_seeds.md +++ b/docs/source/en/using-diffusers/reusing_seeds.md @@ -136,53 +136,3 @@ result2 = pipe(prompt=prompt, num_inference_steps=50, generator=g, output_type=" print("L_inf dist =", abs(result1 - result2).max()) "L_inf dist = tensor(0., device='cuda:0')" ``` - -## Deterministic batch generation - -A practical application of creating reproducible pipelines is *deterministic batch generation*. You generate a batch of images and select one image to improve with a more detailed prompt. The main idea is to pass a list of [Generator's](https://pytorch.org/docs/stable/generated/torch.Generator.html) to the pipeline and tie each `Generator` to a seed so you can reuse it. - -Let's use the [stable-diffusion-v1-5/stable-diffusion-v1-5](https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-v1-5) checkpoint and generate a batch of images. - -```py -import torch -from diffusers import DiffusionPipeline -from diffusers.utils import make_image_grid - -pipeline = DiffusionPipeline.from_pretrained( - "stable-diffusion-v1-5/stable-diffusion-v1-5", torch_dtype=torch.float16, use_safetensors=True -) -pipeline = pipeline.to("cuda") -``` - -Define four different `Generator`s and assign each `Generator` a seed (`0` to `3`). Then generate a batch of images and pick one to iterate on. - -> [!WARNING] -> Use a list comprehension that iterates over the batch size specified in `range()` to create a unique `Generator` object for each image in the batch. If you multiply the `Generator` by the batch size integer, it only creates *one* `Generator` object that is used sequentially for each image in the batch. -> -> ```py -> [torch.Generator().manual_seed(seed)] * 4 -> ``` - -```python -generator = [torch.Generator(device="cuda").manual_seed(i) for i in range(4)] -prompt = "Labrador in the style of Vermeer" -images = pipeline(prompt, generator=generator, num_images_per_prompt=4).images[0] -make_image_grid(images, rows=2, cols=2) -``` - -
- -
- -Let's improve the first image (you can choose any image you want) which corresponds to the `Generator` with seed `0`. Add some additional text to your prompt and then make sure you reuse the same `Generator` with seed `0`. All the generated images should resemble the first image. - -```python -prompt = [prompt + t for t in [", highly realistic", ", artsy", ", trending", ", colorful"]] -generator = [torch.Generator(device="cuda").manual_seed(0) for i in range(4)] -images = pipeline(prompt, generator=generator).images -make_image_grid(images, rows=2, cols=2) -``` - -
- -