Address code review feedback: fix comments, add error handling, update docs

Copilot · lmangani · Copilot · commit 63e150b5de49 · 2026-01-29T17:57:37.000Z
Co-authored-by: lmangani &lt;1423657+lmangani@users.noreply.github.com&gt;
diff --git a/MPS_OPTIMIZATION.md b/MPS_OPTIMIZATION.md
@@ -41,21 +41,21 @@ Ensured that lazy-loaded models (like HeartCodec) use the same dtype as the pipe
 
 ### Changes Made
 
-1. **`backend/app/services/music_service.py`** (lines 19-27):
+1. **`backend/app/services/music_service.py`** (top of file):
    - Added MPS configuration at module import time
    - Set `PYTORCH_ENABLE_MPS_FALLBACK=1` environment variable
 
-2. **Model Loading** (lines 1521-1565):
+2. **Model Loading** (MPS pipeline initialization):
    - Changed from `torch.float32` to `torch.float16` for MPS
    - Added device verification after model loading
    - Explicitly set pipeline attributes: `mula_device`, `codec_device`, `mula_dtype`, `codec_dtype`
    - Added automatic device correction if models are on wrong device
 
-3. **Lazy Codec Loading** (lines 813-829):
+3. **Lazy Codec Loading** (codec loading function):
    - Use `pipeline.codec_dtype` instead of hardcoded `torch.float32`
    - Added MPS-specific logging
 
-4. **Generation Logging** (line 731):
+4. **Generation Logging** (generation start):
    - Added diagnostic logging to show device and dtype at generation start
 
 ### Performance Impact
diff --git a/backend/app/services/music_service.py b/backend/app/services/music_service.py
@@ -17,9 +17,10 @@
 from tokenizers import Tokenizer
 
 # Configure MPS (Apple Metal) for optimal performance
-# These settings must be set before any PyTorch operations
+# Note: PYTORCH_ENABLE_MPS_FALLBACK can be set at runtime for fallback behavior
 if hasattr(torch.backends, 'mps') and torch.backends.mps.is_available():
     # Enable MPS fallback to CPU for unsupported operations (better than crashing)
+    # This takes effect for subsequent tensor operations
     os.environ.setdefault("PYTORCH_ENABLE_MPS_FALLBACK", "1")
     # Note: MPS is already using Metal under the hood, no additional config needed
     logger_temp = logging.getLogger(__name__)
@@ -111,7 +112,7 @@ def get_autocast_context(device_type: str, dtype: torch.dtype):
     
     PyTorch's autocast only supports 'cuda', 'cpu', and 'xpu' device types.
     For MPS (Apple Metal), autocast is not supported, so we use a nullcontext (no-op).
-    Since MPS pipelines already use float32, no autocast is needed.
+    MPS pipelines use float16 precision which is optimal for the hardware.
     
     Args:
         device_type: Device type string ('cuda', 'cpu', 'mps', 'xpu')
@@ -121,7 +122,7 @@ def get_autocast_context(device_type: str, dtype: torch.dtype):
         Context manager for autocast or nullcontext for unsupported devices
     """
     # torch.autocast doesn't support MPS device type
-    # MPS pipelines already use float32, so autocast is not needed
+    # MPS pipelines use float16 directly, which is optimal for the hardware
     if device_type == 'mps':
         return nullcontext()
     
@@ -1537,7 +1538,6 @@ def patched_warmup(model, device_map, hf_quantizer):
                 os.environ["HF_HUB_DISABLE_CACHING_ALLOCATOR_WARMUP"] = "1"
             
             try:
-                # MPS doesn't support bfloat16, use float32 instead
                 # IMPORTANT: For MPS, we need to use float16 (not float32) for optimal performance
                 # MPS has native support for float16 operations which are much faster than float32
                 print("[Apple Metal] Loading models with float16 precision for optimal MPS performance", flush=True)
@@ -1563,23 +1563,44 @@ def patched_warmup(model, device_map, hf_quantizer):
                 pipeline.mula_dtype = torch.float16
                 pipeline.codec_dtype = torch.float16
                 
+                # Verify and correct model device placement
+                # Note: Accessing _mula and _codec (private attributes) is necessary here
+                # because the pipeline library doesn't provide public methods for device verification
                 if hasattr(pipeline, '_mula') and pipeline._mula is not None:
-                    mula_device = next(pipeline._mula.parameters()).device
-                    print(f"[Apple Metal] HeartMuLa model device: {mula_device}", flush=True)
-                    if mula_device.type != 'mps':
-                        logger.warning(f"[MPS] HeartMuLa model is on {mula_device}, not MPS! This will be slow.")
-                        print(f"[Apple Metal] WARNING: HeartMuLa is on {mula_device}, moving to MPS...", flush=True)
-                        pipeline._mula = pipeline._mula.to(mps_device)
-                        print(f"[Apple Metal] HeartMuLa moved to MPS", flush=True)
+                    try:
+                        # Get first parameter's device, or handle case where model has no parameters
+                        mula_params = list(pipeline._mula.parameters())
+                        if mula_params:
+                            mula_device = mula_params[0].device
+                            print(f"[Apple Metal] HeartMuLa model device: {mula_device}", flush=True)
+                            if mula_device.type != 'mps':
+                                logger.warning(f"[MPS] HeartMuLa model is on {mula_device}, not MPS! This will be slow.")
+                                print(f"[Apple Metal] WARNING: HeartMuLa is on {mula_device}, moving to MPS...", flush=True)
+                                # Explicitly set both device and dtype for consistency
+                                pipeline._mula = pipeline._mula.to(device=mps_device, dtype=torch.float16)
+                                print(f"[Apple Metal] HeartMuLa moved to MPS with float16 precision", flush=True)
+                        else:
+                            logger.warning("[MPS] HeartMuLa model has no parameters - cannot verify device")
+                    except Exception as e:
+                        logger.warning(f"[MPS] Failed to verify HeartMuLa device: {e}")
                 
                 if hasattr(pipeline, '_codec') and pipeline._codec is not None:
-                    codec_device = next(pipeline._codec.parameters()).device
-                    print(f"[Apple Metal] HeartCodec model device: {codec_device}", flush=True)
-                    if codec_device.type != 'mps':
-                        logger.warning(f"[MPS] HeartCodec model is on {codec_device}, not MPS! This will be slow.")
-                        print(f"[Apple Metal] WARNING: HeartCodec is on {codec_device}, moving to MPS...", flush=True)
-                        pipeline._codec = pipeline._codec.to(mps_device)
-                        print(f"[Apple Metal] HeartCodec moved to MPS", flush=True)
+                    try:
+                        # Get first parameter's device, or handle case where model has no parameters
+                        codec_params = list(pipeline._codec.parameters())
+                        if codec_params:
+                            codec_device = codec_params[0].device
+                            print(f"[Apple Metal] HeartCodec model device: {codec_device}", flush=True)
+                            if codec_device.type != 'mps':
+                                logger.warning(f"[MPS] HeartCodec model is on {codec_device}, not MPS! This will be slow.")
+                                print(f"[Apple Metal] WARNING: HeartCodec is on {codec_device}, moving to MPS...", flush=True)
+                                # Explicitly set both device and dtype for consistency
+                                pipeline._codec = pipeline._codec.to(device=mps_device, dtype=torch.float16)
+                                print(f"[Apple Metal] HeartCodec moved to MPS with float16 precision", flush=True)
+                        else:
+                            logger.warning("[MPS] HeartCodec model has no parameters - cannot verify device")
+                    except Exception as e:
+                        logger.warning(f"[MPS] Failed to verify HeartCodec device: {e}")
                 
                 print("[Apple Metal] MPS pipeline loaded successfully with float16 precision", flush=True)
                 print("[Apple Metal] All models are on MPS device for hardware acceleration", flush=True)