fix MPS GPU support and defensive API checks

charliewwdev · claude · charliewwdev · commit 771344d4c3d6 · 2026-02-10T14:42:51.000+03:00
- _apply_offloading: skip cpu offload on MPS, move directly to GPU
- _apply_vae_opts: add hasattr checks (WanPipeline lacks vae_slicing)
- All backends pass device to _apply_offloading for proper routing
- E2e test: MPS uses offload=none to run on GPU directly

Tested: Wan 1.3B on MPS generates 17 frames in 294s (vs 414s on CPU)

Co-Authored-By: Claude Opus 4.6 &lt;noreply@anthropic.com&gt;
diff --git a/animatediff/backends/cogvideo.py b/animatediff/backends/cogvideo.py
@@ -67,7 +67,7 @@ def load(
         instance = cls(pipe, model_variant=model_variant)
 
         if offload_strategy != "none":
-            instance._apply_offloading(pipe, offload_strategy)
+            instance._apply_offloading(pipe, offload_strategy, device=device)
         else:
             pipe.to(device)
 
diff --git a/animatediff/backends/hunyuan.py b/animatediff/backends/hunyuan.py
@@ -64,7 +64,7 @@ def load(
         instance = cls(pipe, model_variant=model_variant)
 
         if offload_strategy != "none":
-            instance._apply_offloading(pipe, offload_strategy)
+            instance._apply_offloading(pipe, offload_strategy, device=device)
         else:
             pipe.to(device)
 
diff --git a/animatediff/backends/ltx.py b/animatediff/backends/ltx.py
@@ -62,7 +62,7 @@ def load(
         instance = cls(pipe, model_variant=model_variant)
 
         if offload_strategy != "none":
-            instance._apply_offloading(pipe, offload_strategy)
+            instance._apply_offloading(pipe, offload_strategy, device=device)
         else:
             pipe.to(device)
 
diff --git a/animatediff/backends/wan.py b/animatediff/backends/wan.py
@@ -77,7 +77,7 @@ def load(
 
         # Apply offloading (must be before .to(device) for cpu offload)
         if offload_strategy != "none":
-            instance._apply_offloading(pipe, offload_strategy)
+            instance._apply_offloading(pipe, offload_strategy, device=device)
         else:
             pipe.to(device)
 
diff --git a/animatediff/core/base_pipeline.py b/animatediff/core/base_pipeline.py
@@ -82,16 +82,29 @@ def _make_generator(self, seed: int, device: str) -> Optional[torch.Generator]:
             return torch.Generator(device=device).manual_seed(seed)
         return None
 
-    def _apply_offloading(self, pipe, strategy: str):
-        """Apply memory offloading strategy to a diffusers pipeline."""
-        if strategy == "model_cpu":
+    def _apply_offloading(self, pipe, strategy: str, device: str = "cuda"):
+        """Apply memory offloading strategy to a diffusers pipeline.
+
+        Note: CPU offloading only works with CUDA. For MPS, we skip offloading
+        and move the full pipeline to the device instead.
+        """
+        # CPU offloading requires CUDA — skip for MPS/CPU and just move to device
+        if device != "cuda" and device != "cpu":
+            logger.info(f"Offloading not supported on {device}, moving pipeline to {device}")
+            pipe.to(device)
+            return
+
+        if strategy == "model_cpu" and hasattr(pipe, "enable_model_cpu_offload"):
             pipe.enable_model_cpu_offload()
-        elif strategy == "sequential_cpu":
+        elif strategy == "sequential_cpu" and hasattr(pipe, "enable_sequential_cpu_offload"):
             pipe.enable_sequential_cpu_offload()
+        else:
+            logger.warning(f"Offload strategy '{strategy}' not available, moving to {device}")
+            pipe.to(device)
 
     def _apply_vae_opts(self, pipe, slicing: bool = True, tiling: bool = False):
         """Apply VAE memory optimizations."""
-        if slicing:
+        if slicing and hasattr(pipe, "enable_vae_slicing"):
             pipe.enable_vae_slicing()
         if tiling and hasattr(pipe, "enable_vae_tiling"):
             pipe.enable_vae_tiling()
diff --git a/tests/test_e2e_generate.py b/tests/test_e2e_generate.py
@@ -27,26 +27,28 @@ def test_wan_1_3b_generate():
     if torch.cuda.is_available():
         device = "cuda"
         dtype = torch.float16
+        offload = "model_cpu"  # CUDA supports CPU offload
     elif hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
         device = "mps"
-        dtype = torch.float32  # MPS works best with float32 for this model
+        dtype = torch.float32  # MPS requires float32 for Wan
+        offload = "none"  # MPS: load directly to GPU (no cpu offload support)
     else:
         print("SKIP: No GPU available (need CUDA or MPS)")
         return
 
     print(f"\n{'='*60}")
-    print(f"Device: {device} | dtype: {dtype}")
+    print(f"Device: {device} | dtype: {dtype} | offload: {offload}")
     print(f"{'='*60}")
 
     # Load model (will download ~5GB on first run)
     print("\n[1/3] Loading Wan 2.1 1.3B...")
     t0 = time.time()
     backend = WanBackend.load(
-        model_path=None,  # auto: Wan-AI/Wan2.1-T2V-1.3B
+        model_path=None,  # auto: Wan-AI/Wan2.1-T2V-1.3B-Diffusers
         torch_dtype=dtype,
         device=device,
         quantization="none",
-        offload_strategy="model_cpu",  # save memory
+        offload_strategy=offload,
         enable_vae_slicing=True,
         model_variant="1.3B",
     )