Change the default model to the BF16 one for more stable training and output

lawrence-cj · lawrence-cj · commit 6bf67e7effa9 · 2024-12-21T23:31:41.000-08:00
diff --git a/docs/source/en/api/models/sana_transformer2d.md b/docs/source/en/api/models/sana_transformer2d.md
@@ -22,7 +22,7 @@ The model can be loaded with the following code snippet.
 ```python
 from diffusers import SanaTransformer2DModel
 
-transformer = SanaTransformer2DModel.from_pretrained("Efficient-Large-Model/Sana_1600M_1024px_diffusers", subfolder="transformer", torch_dtype=torch.float16)
+transformer = SanaTransformer2DModel.from_pretrained("Efficient-Large-Model/Sana_1600M_1024px_BF16_diffusers", subfolder="transformer", torch_dtype=torch.bfloat16)
 ```
 
 ## SanaTransformer2DModel
diff --git a/docs/source/en/api/pipelines/sana.md b/docs/source/en/api/pipelines/sana.md
@@ -32,9 +32,9 @@ Available models:
 
 | Model | Recommended dtype |
 |:-----:|:-----------------:|
+| [`Efficient-Large-Model/Sana_1600M_1024px_BF16_diffusers`](https://huggingface.co/Efficient-Large-Model/Sana_1600M_1024px_BF16_diffusers) | `torch.bfloat16` |
 | [`Efficient-Large-Model/Sana_1600M_1024px_diffusers`](https://huggingface.co/Efficient-Large-Model/Sana_1600M_1024px_diffusers) | `torch.float16` |
 | [`Efficient-Large-Model/Sana_1600M_1024px_MultiLing_diffusers`](https://huggingface.co/Efficient-Large-Model/Sana_1600M_1024px_MultiLing_diffusers) | `torch.float16` |
-| [`Efficient-Large-Model/Sana_1600M_1024px_BF16_diffusers`](https://huggingface.co/Efficient-Large-Model/Sana_1600M_1024px_BF16_diffusers) | `torch.bfloat16` |
 | [`Efficient-Large-Model/Sana_1600M_512px_diffusers`](https://huggingface.co/Efficient-Large-Model/Sana_1600M_512px_diffusers) | `torch.float16` |
 | [`Efficient-Large-Model/Sana_1600M_512px_MultiLing_diffusers`](https://huggingface.co/Efficient-Large-Model/Sana_1600M_512px_MultiLing_diffusers) | `torch.float16` |
 | [`Efficient-Large-Model/Sana_600M_1024px_diffusers`](https://huggingface.co/Efficient-Large-Model/Sana_600M_1024px_diffusers) | `torch.float16` |
diff --git a/src/diffusers/pipelines/pag/pipeline_pag_sana.py b/src/diffusers/pipelines/pag/pipeline_pag_sana.py
@@ -59,13 +59,13 @@
         >>> from diffusers import SanaPAGPipeline
 
         >>> pipe = SanaPAGPipeline.from_pretrained(
-        ...     "Efficient-Large-Model/Sana_1600M_1024px_diffusers",
+        ...     "Efficient-Large-Model/Sana_1600M_1024px_BF16_diffusers",
         ...     pag_applied_layers=["transformer_blocks.8"],
         ...     torch_dtype=torch.float32,
         ... )
         >>> pipe.to("cuda")
         >>> pipe.text_encoder.to(torch.bfloat16)
-        >>> pipe.transformer = pipe.transformer.to(torch.float16)
+        >>> pipe.transformer = pipe.transformer.to(torch.bfloat16)
 
         >>> image = pipe(prompt='a cyberpunk cat with a neon sign that says "Sana"')[0]
         >>> image[0].save("output.png")
diff --git a/src/diffusers/pipelines/sana/pipeline_sana.py b/src/diffusers/pipelines/sana/pipeline_sana.py
@@ -62,11 +62,11 @@
         >>> from diffusers import SanaPipeline
 
         >>> pipe = SanaPipeline.from_pretrained(
-        ...     "Efficient-Large-Model/Sana_1600M_1024px_diffusers", torch_dtype=torch.float32
+        ...     "Efficient-Large-Model/Sana_1600M_1024px_BF16_diffusers", torch_dtype=torch.float32
         ... )
         >>> pipe.to("cuda")
         >>> pipe.text_encoder.to(torch.bfloat16)
-        >>> pipe.transformer = pipe.transformer.to(torch.float16)
+        >>> pipe.transformer = pipe.transformer.to(torch.bfloat16)
 
         >>> image = pipe(prompt='a cyberpunk cat with a neon sign that says "Sana"')[0]
         >>> image[0].save("output.png")