fix: When alpha is a sequence, each alpha[c] should be interpreted as the weight for positive samples of class c. Negative samples should have a default weight of 1.0

ytl0623 · ytl0623 · commit 945abfe31c3b · 2026-01-08T10:55:55.000+08:00
Signed-off-by: ytl0623 &lt;david89062388@gmail.com&gt;
diff --git a/monai/losses/focal_loss.py b/monai/losses/focal_loss.py
@@ -82,7 +82,7 @@ def __init__(
             gamma: value of the exponent gamma in the definition of the Focal loss. Defaults to 2.
             alpha: value of the alpha in the definition of the alpha-balanced Focal loss.
                 The value should be in [0, 1].
-                If a sequence is provided, it must match the number of classes (after excluding background if set).
+                If a sequence is provided, its length must match the number of classes (excluding the background class if `include_background=False`).
                 Defaults to None.
             weight: weights to apply to the voxels of each class. If None no weights are applied.
                 The input can be a single value (same weight for all classes), a sequence of values (the length
@@ -289,8 +289,10 @@ def sigmoid_focal_loss(
             # Reshape alpha for broadcasting: (1, C, 1, 1...)
             broadcast_dims = [-1] + [1] * len(target.shape[2:])
             alpha_t = alpha_t.view(broadcast_dims)
-            # Apply alpha_c if t==1, (1-alpha_c) if t==0 for channel c
-            alpha_factor = target * alpha_t + (1 - target) * (1 - alpha_t)
+            # Apply per-class weight only to positive samples
+            # For positive samples (target==1): multiply by alpha[c]
+            # For negative samples (target==0): keep weight as 1.0
+            alpha_factor = torch.where(target == 1, alpha_t, torch.ones_like(alpha_t))
 
         loss = alpha_factor * loss
 
diff --git a/tests/losses/test_focal_loss.py b/tests/losses/test_focal_loss.py
@@ -24,7 +24,8 @@
 from tests.test_utils import TEST_DEVICES, test_script_save
 
 TEST_CASES = []
-for device in ["cpu", "cuda"] if torch.cuda.is_available() else ["cpu"]:
+for case in TEST_DEVICES:
+    device = case[0]
     input_data = {
         "input": torch.tensor(
             [[[[1.0, 1.0], [0.5, 0.0]], [[1.0, 1.0], [0.5, 0.0]], [[1.0, 1.0], [0.5, 0.0]]]], device=device
@@ -79,10 +80,10 @@
 
 TEST_ALPHA_BROADCASTING = []
 for case in TEST_DEVICES:
-    dev = case[0]
+    device = case[0]
     for include_background in [True, False]:
         for use_softmax in [True, False]:
-            TEST_ALPHA_BROADCASTING.append([dev, include_background, use_softmax])
+            TEST_ALPHA_BROADCASTING.append([device, include_background, use_softmax])
 
 
 class TestFocalLoss(unittest.TestCase):