huggingface · yiyixuxu · Jan 24, 2025 · Dec 30, 2024 · Dec 30, 2024 · Jan 3, 2025
diff --git a/examples/dreambooth/train_dreambooth_lora_sana.py b/examples/dreambooth/train_dreambooth_lora_sana.py
@@ -63,6 +63,7 @@
     is_wandb_available,
 )
 from diffusers.utils.hub_utils import load_or_create_model_card, populate_model_card
+from diffusers.utils.import_utils import is_torch_npu_available
 from diffusers.utils.torch_utils import is_compiled_module
 
 
@@ -74,6 +75,9 @@
 
 logger = get_logger(__name__)
 
+if is_torch_npu_available():
+    torch.npu.config.allow_internal_format = False
+
 
 def save_model_card(
     repo_id: str,
@@ -920,8 +924,7 @@ def main(args):
                     image.save(image_filename)
 
             del pipeline
-            if torch.cuda.is_available():
-                torch.cuda.empty_cache()
+            free_memory()
 
     # Handle the repository creation
     if accelerator.is_main_process:

diff --git a/src/diffusers/models/attention_processor.py b/src/diffusers/models/attention_processor.py
@@ -294,6 +294,10 @@ def __init__(
             processor = (
                 AttnProcessor2_0() if hasattr(F, "scaled_dot_product_attention") and self.scale_qk else AttnProcessor()
             )
+
+        if is_torch_npu_available():
+            if isinstance(processor, AttnProcessor2_0):
+                processor = AttnProcessorNPU()
         self.set_processor(processor)
 
     def set_use_xla_flash_attention(
@@ -3147,7 +3151,16 @@ def __call__(
             attention_mask = attn.prepare_attention_mask(attention_mask, sequence_length, batch_size)
             # scaled_dot_product_attention expects attention_mask shape to be
             # (batch, heads, source_length, target_length)
-            attention_mask = attention_mask.view(batch_size, attn.heads, -1, attention_mask.shape[-1])
+            attn_mask = attention_mask[0]
+            seq_len = hidden_states.shape[1]
+            attention_mask = attn_mask.repeat_interleave(seq_len * batch_size, dim=0)
+            attention_mask = attention_mask.view(batch_size, 1, -1, attention_mask.shape[-1])
+
+            if attention_mask.dtype != torch.uint8:
+                if attention_mask.dtype == torch.bool:
+                    attention_mask = torch.logical_not(attention_mask.bool())
+                else:
+                    attention_mask = attention_mask.to(torch.uint8)
 
         if attn.group_norm is not None:
             hidden_states = attn.group_norm(hidden_states.transpose(1, 2)).transpose(1, 2)

diff --git a/src/diffusers/models/transformers/sana_transformer.py b/src/diffusers/models/transformers/sana_transformer.py
@@ -19,11 +19,12 @@
 
 from ...configuration_utils import ConfigMixin, register_to_config
 from ...loaders import PeftAdapterMixin
-from ...utils import USE_PEFT_BACKEND, is_torch_version, logging, scale_lora_layers, unscale_lora_layers
+from ...utils import USE_PEFT_BACKEND, is_torch_version, logging, scale_lora_layers, unscale_lora_layers, is_torch_npu_available
 from ..attention_processor import (
     Attention,
     AttentionProcessor,
     AttnProcessor2_0,
+    AttnProcessorNPU,
     SanaLinearAttnProcessor2_0,
 )
 from ..embeddings import PatchEmbed, PixArtAlphaTextProjection