Merge pull request #22 from audiohacking/copilot/fix-decoding-audio-failure-another-one

lmangani · web-flow · commit c2ae85ba9854 · 2026-01-30T14:43:41.000+01:00
Fix MPS audio decoding failure with explicit dtype preservation
diff --git a/backend/app/services/music_service.py b/backend/app/services/music_service.py
@@ -804,7 +804,9 @@ def _pad_audio_token(token):
                 progress = int((i + 1) / max_audio_frames * 100)
                 callback(progress, f"Generating audio... {i + 1}/{max_audio_frames} frames")
 
-        frames = torch.stack(frames).permute(1, 2, 0).squeeze(0).cpu()  # Move to CPU immediately
+        # Stack frames and explicitly preserve torch.long dtype (critical for MPS compatibility)
+        # Explicitly ensure torch.long dtype is preserved (defensive fix for potential MPS backend issues)
+        frames = torch.stack(frames).permute(1, 2, 0).squeeze(0).to(dtype=torch.long).cpu()
 
         # Sequential offload: Move HeartMuLa to CPU before loading HeartCodec
         # This allows fitting on smaller GPUs (12GB) by never having both models in VRAM