refresh

stevhliu · stevhliu · commit 1a36051815db · 2025-08-14T16:24:08.000-07:00
diff --git a/docs/source/en/using-diffusers/loading.md b/docs/source/en/using-diffusers/loading.md
@@ -29,9 +29,8 @@ import torch
 from diffusers import DiffusionPipeline
 
 pipeline = DiffusionPipeline.from_pretrained(
-  "Qwen/Qwen-Image",
-  torch_dtype=torch.bfloat16
-).to("cuda")
+  "Qwen/Qwen-Image", torch_dtype=torch.bfloat16, device_map="cuda"
+)
 ```
 
 Every model has a specific pipeline subclass that inherits from [`DiffusionPipeline`]. A subclass usually has a narrow focus and are task-specific. See the table below for an example.
@@ -49,9 +48,8 @@ import torch
 from diffusers import QwenImagePipeline
 
 pipeline = QwenImagePipeline.from_pretrained(
-  "Qwen/Qwen-Image",
-  torch_dtype=torch.bfloat16
-).to("cuda")
+  "Qwen/Qwen-Image", torch_dtype=torch.bfloat16, device_map="cuda"
+)
 ```
 
 ### Local pipelines
@@ -70,9 +68,8 @@ import torch
 from diffusers import QwenImagePipeline
 
 pipeline = QwenImagePipeline.from_pretrained(
-  "path/to/local/Qwen/Qwen-Image",
-  torch_dtype=torch.bfloat16
-).to("cuda")
+  "path/to/local/Qwen/Qwen-Image", torch_dtype=torch.bfloat16, device_map="cuda"
+)
 ```
 
 The [`~QwenImagePipeline.from_pretrained`] method won't download files from the Hub when it detects a local path. But this also means it won't download and cache any updates that have been made to the model.
@@ -88,8 +85,8 @@ import torch
 from diffusers import HunyuanVideoPipeline
 
 pipeline = HunyuanVideoPipeline.from_pretrained(
-    "hunyuanvideo-community/HunyuanVideo",
-    torch_dtype={"transformer": torch.bfloat16, "default": torch.float16},
+  "hunyuanvideo-community/HunyuanVideo",
+  torch_dtype={"transformer": torch.bfloat16, "default": torch.float16},
 )
 print(pipeline.transformer.dtype, pipeline.vae.dtype)
 ```
@@ -101,12 +98,72 @@ import torch
 from diffusers import HunyuanVideoPipeline
 
 pipeline = HunyuanVideoPipeline.from_pretrained(
-    "hunyuanvideo-community/HunyuanVideo",
-    torch_dtype=torch.bfloat16
+  "hunyuanvideo-community/HunyuanVideo", torch_dtype=torch.bfloat16
 )
 print(pipeline.transformer.dtype, pipeline.vae.dtype)
 ```
 
+## Device placement
+
+The `device_map` argument determines individual model or pipeline placement on an accelerator like a GPU. It is especially helpful when there are multiple GPUs.
+
+Diffusers currently provides three options to `device_map`, `"cuda"`, `"balanced"` and `"auto"`. Refer to the table below to compare the three placement strategies.
+
+| parameter | description |
+|---|---|
+| `"cuda"` | places model on CUDA device |
+| `"balanced"` | evenly distributes model or pipeline on all GPUs |
+| `"auto"` | distribute model or pipeline from fastest device first to slowest |
+
+Use the `max_memory` argument in [`~DiffusionPipeline.from_pretrained`] to allocate a maximum amount of memory to use on each device. By default, Diffusers uses the maximum amount available.
+
+<hfoptions id="device_map">
+<hfoption id="pipeline">
+
+```py
+import torch
+from diffusers import DiffusionPipeline
+
+pipeline = DiffusionPipeline.from_pretrained(
+  "black-forest-labs/FLUX.1-dev", 
+  torch_dtype=torch.bfloat16,
+  device_map="cuda",
+)
+```
+
+</hfoption>
+<hfoption id="individual model">
+
+```py
+import torch
+from diffusers import DiffusionPipeline, AutoModel
+
+max_memory = {0: "16GB", 1: "16GB"}
+transformer = AutoModel.from_pretrained(
+    "black-forest-labs/FLUX.1-dev", 
+    subfolder="transformer",
+    torch_dtype=torch.bfloat16
+    device_map="cuda",
+    max_memory=max_memory
+)
+```
+
+</hfoption>
+</hfoptions>
+
+The `hf_device_map` attribute allows you to access and view the `device_map`.
+
+```py
+print(pipeline.hf_device_map)
+# {'unet': 1, 'vae': 1, 'safety_checker': 0, 'text_encoder': 0}
+```
+
+Reset a pipeline's `device_map` with the [`~DiffusionPipeline.reset_device_map`] method. This is necessary if you want to use methods such as `.to()`, [`~DiffusionPipeline.enable_sequential_cpu_offload`], and [`~DiffusionPipeline.enable_model_cpu_offload`].
+
+```py
+pipeline.reset_device_map()
+```
+
 ## Parallel loading
 
 Large models are often [sharded](../training/distributed_inference#model-sharding) into smaller files so that they are easier to load. Diffusers supports loading shards in parallel to speed up the loading process.
@@ -124,12 +181,9 @@ import torch
 from diffusers import DiffusionPipeline
 
 os.environ["HF_ENABLE_PARALLEL_LOADING"] = "YES"
-os.environ["HF_PARALLEL_LOADING_WORKERS"] = "12"
 
 pipeline = DiffusionPipeline.from_pretrained(
-    "Wan-AI/Wan2.2-I2V-A14B-Diffusers",
-    torch_dtype=torch.bfloat16,
-    device_map="cuda"
+  "Wan-AI/Wan2.2-I2V-A14B-Diffusers", torch_dtype=torch.bfloat16, device_map="cuda"
 )
 ```
 
@@ -155,7 +209,8 @@ pipeline = DiffusionPipeline.from_pretrained(
   scheduler=scheduler,
   vae=vae,
   torch_dtype=torch.float16,
-).to("cuda")
+  device_map="cuda"
+)
 ```
 
 ## Reusing models in multiple pipelines
@@ -174,8 +229,8 @@ import torch
 from diffusers import AutoPipelineForText2Image
 
 pipeline_sdxl = AutoPipelineForText2Image.from_pretrained(
-  "stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16
-).to("cuda")
+  "stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16, device_map="cuda"
+)
 prompt = """
 cinematic film still of a cat sipping a margarita in a pool in Palm Springs, California
 highly detailed, high budget hollywood movie, cinemascope, moody, epic, gorgeous, film grain
@@ -203,4 +258,21 @@ print(f"Max memory reserved: {torch.cuda.max_memory_allocated() / 1024**3:.2f} G
 > [!WARNING]
 > Pipelines created by [`~DiffusionPipeline.from_pipe`] share the same models and *state*. Modifying the state of a model in one pipeline affects all the other pipelines that share the same model.
 
-Some methods may not work correctly on pipelines created with [`~DiffusionPipeline.from_pipe`]. For example, [`~DiffusionPipeline.enable_model_cpu_offload`] relies on a unique model execution order, which may differ in the new pipeline. To ensure proper functionality, reapply these methods on the new pipeline.
+Some methods may not work correctly on pipelines created with [`~DiffusionPipeline.from_pipe`]. For example, [`~DiffusionPipeline.enable_model_cpu_offload`] relies on a unique model execution order, which may differ in the new pipeline. To ensure proper functionality, reapply these methods on the new pipeline.
+
+## Safety checker
+
+Diffusers provides a [safety checker](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/stable_diffusion/safety_checker.py) for older Stable Diffusion models to prevent generating harmful content. It screens the generated output against a set of hardcoded harmful concepts.
+
+If you want to disable the safety checker, pass `safety_checker=None` in [`!DiffusionPipeline.from_pretrained`] as shown below.
+
+```py
+from diffusers import DiffusionPipeline
+
+pipeline = DiffusionPipeline.from_pretrained(
+  "stable-diffusion-v1-5/stable-diffusion-v1-5", safety_checker=None
+)
+"""
+You have disabled the safety checker for <class 'diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline'> by passing `safety_checker=None`. Ensure that you abide by the conditions of the Stable Diffusion license and do not expose unfiltered results in services or applications open to the public. Both the diffusers team and Hugging Face strongly recommend keeping the safety filter enabled in all public-facing circumstances, disabling it only for use cases that involve analyzing network behavior or auditing its results. For more information, please have a look at https://github.com/huggingface/diffusers/pull/254 .
+"""
+```