Fix GAUC not calculated with weights (#2895)

xinyuanzzz · facebook-github-bot · commit b1b7e01878c7 · 2025-04-20T21:48:33.000-07:00
Summary: Pull Request resolved: #2895 The gAUC score is lower than expected e.g. https://fburl.com/mlhub/vljz497c. In ig, if a label presence is false, the corresponding weight is set to 0. It should not be considered when calculating gAUC. Reviewed By: yunjiangster Differential Revision: D73231152 fbshipit-source-id: 3a83269948db27341cd8b6ad5d5f7b553195aa75
diff --git a/torchrec/metrics/gauc.py b/torchrec/metrics/gauc.py
@@ -24,7 +24,7 @@
 def compute_gauc_3d(
     predictions: torch.Tensor,
     labels: torch.Tensor,
-    num_candidates: torch.Tensor,
+    weights: torch.Tensor,
 ) -> Dict[str, torch.Tensor]:
     """Both predictions and labels are 3-d tensors in shape [n_task, n_group, n_sample]."""
 
@@ -34,7 +34,7 @@ def compute_gauc_3d(
     pre_arange = torch.arange(max_len, device=predictions.device)
 
     with record_function("## gauc_argsort ##"):
-        sorted_indices = torch.argsort(predictions, descending=True, dim=-1)
+        sorted_indices = torch.argsort(predictions, dim=-1)
     task_indices = (
         pre_arange[:n_task][:, None, None]
         .expand(n_task, n_group, n_sample)
@@ -51,28 +51,26 @@ def compute_gauc_3d(
     sorted_labels = labels[task_indices, group_indices, sample_indices].view(
         n_task, n_group, n_sample
     )
+    sorted_weights = weights[task_indices, group_indices, sample_indices].view(
+        n_task, n_group, n_sample
+    )
 
     with record_function("## gauc_calculation ##"):
-        num_sample = num_candidates[None, :].expand(n_task, n_group)
-        # Count number of padding zeros.
-        num_zeros = (n_sample - num_candidates)[None, :, None].expand(
-            n_task, n_group, n_sample
-        )  # [n_task, n_group, n_sample]
-        # This assumes the labels are binary.
-        num_zeros = (sorted_labels != 0) * num_zeros
-        rank = torch.flip(pre_arange[:n_sample] + 1, [0])[None, None, :].expand(
-            n_task, n_group, n_sample
-        )
-        positive_rank = sorted_labels * rank - num_zeros  # [n_task, n_group, n_sample]
-        num_positive = sorted_labels.sum(-1)  # [n_task, n_group]
+        pos_mask = sorted_labels
+        neg_mask = 1 - sorted_labels
 
-        # AUC is calcuated as (sum{positive_ranks} - num{positive_pairs}) /
-        # (num_positive * num_negative).
-        numerator = torch.sum(positive_rank, -1) - (
-            num_positive * (num_positive + 1) / 2
-        )
-        denominator = num_positive * (num_sample - num_positive)
-        auc = numerator / (denominator + 1e-10)  # [n_task, n_group]
+        # cumulative negative *weight* that appear **before** each position
+        cum_neg_weight = torch.cumsum(sorted_weights * neg_mask, dim=-1)
+
+        # contribution of every positive example: w_pos * (sum w_neg ranked lower)
+        contrib = pos_mask * sorted_weights * cum_neg_weight
+        numerator = contrib.sum(-1)  # [n_task, n_group]
+
+        w_pos = (pos_mask * sorted_weights).sum(-1)  # [n_task, n_group]
+        w_neg = (neg_mask * sorted_weights).sum(-1)  # [n_task, n_group]
+        denominator = w_pos * w_neg
+
+        auc = numerator / (denominator + 1e-10)
 
     # Skip identical prediction sessions.
     identical_prediction_mask = ~(
@@ -85,7 +83,7 @@ def compute_gauc_3d(
         )
     )
     # Skip identical label(all 0s/1s) sessions.
-    identical_label_mask = (num_positive >= 1) * (num_positive < num_sample)
+    identical_label_mask = (w_pos > 0) & (w_neg > 0)
     auc_mask = identical_label_mask * identical_prediction_mask
     auc *= auc_mask
     num_effective_samples = auc_mask.sum(-1)  # [n_task]
@@ -104,23 +102,25 @@ def to_3d(
 def get_auc_states(
     labels: torch.Tensor,
     predictions: torch.Tensor,
-    weights: Optional[torch.Tensor],
+    weights: torch.Tensor,
     num_candidates: torch.Tensor,
 ) -> Dict[str, torch.Tensor]:
 
     # predictions, labels: [n_task, n_sample]
     max_length = int(num_candidates.max().item())
     predictions_perm = predictions.permute(1, 0)
     labels_perm = labels.permute(1, 0)
+    weights_perm = weights.permute(1, 0)
     predictions_3d = to_3d(predictions_perm, num_candidates, max_length).permute(
         2, 0, 1
     )
     labels_3d = to_3d(labels_perm, num_candidates, max_length).permute(2, 0, 1)
+    weights_3d = to_3d(weights_perm, num_candidates, max_length).permute(2, 0, 1)
 
     return compute_gauc_3d(
         predictions_3d,
         labels_3d,
-        num_candidates,
+        weights_3d,
     )
 
 
@@ -175,9 +175,9 @@ def update(
         num_candidates: torch.Tensor,
         **kwargs: Dict[str, Any],
     ) -> None:
-        if predictions is None or labels is None:
+        if predictions is None or weights is None:
             raise RecMetricException(
-                "Inputs 'predictions' and 'labels' should not be None for GAUCMetricComputation update"
+                "Inputs 'predictions' and 'weights' should not be None for GAUCMetricComputation update"
             )
 
         states = get_auc_states(labels, predictions, weights, num_candidates)
diff --git a/torchrec/metrics/tests/test_gauc.py b/torchrec/metrics/tests/test_gauc.py
@@ -24,9 +24,8 @@ def _get_states(
         labels: torch.Tensor,
         predictions: torch.Tensor,
         weights: torch.Tensor,
-        num_candidates: torch.Tensor,
     ) -> Dict[str, torch.Tensor]:
-        gauc_res = compute_gauc_3d(predictions, labels, num_candidates)
+        gauc_res = compute_gauc_3d(predictions, labels, weights)
         return {
             "auc_sum": gauc_res["auc_sum"],
             "num_samples": gauc_res["num_samples"],
@@ -44,8 +43,8 @@ class GAUCMetricValueTest(unittest.TestCase):
     def setUp(self) -> None:
         self.predictions = {"DefaultTask": None}
         self.labels = {"DefaultTask": None}
+        self.weights = {"DefaultTask": None}
         self.num_candidates = None
-        self.weights = None
         self.batches = {
             "predictions": self.predictions,
             "labels": self.labels,
@@ -62,13 +61,13 @@ def setUp(self) -> None:
     def test_calc_gauc_simple(self) -> None:
         self.predictions["DefaultTask"] = torch.tensor([[0.9, 0.8, 0.7, 0.6, 0.5]])
         self.labels["DefaultTask"] = torch.tensor([[1, 0, 1, 1, 0]])
+        self.weights["DefaultTask"] = torch.tensor([[1, 1, 1, 1, 1]])
         self.num_candidates = torch.tensor([3, 2])
-        self.weights = None
         self.batches = {
             "predictions": self.predictions,
             "labels": self.labels,
             "num_candidates": self.num_candidates,
-            "weights": None,
+            "weights": self.weights,
         }
 
         expected_gauc = torch.tensor([0.75], dtype=torch.double)
@@ -97,13 +96,13 @@ def test_calc_gauc_hard(self) -> None:
             [[0.3, 0.9, 0.1, 0.8, 0.2, 0.8, 0.7, 0.6, 0.5, 0.5]]
         )
         self.labels["DefaultTask"] = torch.tensor([[1, 1, 1, 0, 0, 1, 0, 1, 1, 0]])
+        self.weights["DefaultTask"] = torch.tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])
         self.num_candidates = torch.tensor([2, 3, 3, 2])
-        self.weights = None
         self.batches = {
             "predictions": self.predictions,
             "labels": self.labels,
             "num_candidates": self.num_candidates,
-            "weights": None,
+            "weights": self.weights,
         }
 
         expected_gauc = torch.tensor([0.25], dtype=torch.double)
@@ -130,8 +129,8 @@ def test_calc_gauc_hard(self) -> None:
     def test_calc_gauc_all_0_labels(self) -> None:
         self.predictions["DefaultTask"] = torch.tensor([[0.9, 0.8, 0.7, 0.6, 0.5]])
         self.labels["DefaultTask"] = torch.tensor([[0, 0, 0, 0, 0]])
+        self.weights["DefaultTask"] = torch.tensor([[1, 1, 1, 1, 1]])
         self.num_candidates = torch.tensor([3, 2])
-        self.weights = None
         self.batches = {
             "predictions": self.predictions,
             "labels": self.labels,
@@ -163,8 +162,8 @@ def test_calc_gauc_all_0_labels(self) -> None:
     def test_calc_gauc_all_1_labels(self) -> None:
         self.predictions["DefaultTask"] = torch.tensor([[0.9, 0.8, 0.7, 0.6, 0.5]])
         self.labels["DefaultTask"] = torch.tensor([[1, 1, 1, 1, 1]])
+        self.weights["DefaultTask"] = torch.tensor([[1, 1, 1, 1, 1]])
         self.num_candidates = torch.tensor([3, 2])
-        self.weights = None
         self.batches = {
             "predictions": self.predictions,
             "labels": self.labels,
@@ -196,6 +195,7 @@ def test_calc_gauc_all_1_labels(self) -> None:
     def test_calc_gauc_identical_predictions(self) -> None:
         self.predictions["DefaultTask"] = torch.tensor([[0.8, 0.8, 0.8, 0.8, 0.8]])
         self.labels["DefaultTask"] = torch.tensor([[1, 1, 0, 1, 0]])
+        self.weights["DefaultTask"] = torch.tensor([[1, 1, 1, 1, 1]])
         self.num_candidates = torch.tensor([3, 2])
         self.weights = None
         self.batches = {
@@ -225,3 +225,38 @@ def test_calc_gauc_identical_predictions(self) -> None:
                     actual_gauc, expected_gauc
                 )
             )
+
+    def test_calc_gauc_weighted(self) -> None:
+        self.predictions["DefaultTask"] = torch.tensor(
+            [[0.3, 0.9, 0.1, 0.8, 0.2, 0.8, 0.7, 0.6, 0.5, 0.5]]
+        )
+        self.labels["DefaultTask"] = torch.tensor([[1, 1, 1, 0, 0, 1, 0, 1, 1, 0]])
+        self.weights["DefaultTask"] = torch.tensor([[1, 1, 1, 0, 1, 1, 1, 0, 1, 1]])
+        self.num_candidates = torch.tensor([2, 3, 3, 2])
+        self.batches = {
+            "predictions": self.predictions,
+            "labels": self.labels,
+            "num_candidates": self.num_candidates,
+            "weights": self.weights,
+        }
+
+        expected_gauc = torch.tensor([0.5], dtype=torch.double)
+        expected_num_samples = torch.tensor([2], dtype=torch.double)
+        self.gauc.update(**self.batches)
+        gauc_res = self.gauc.compute()
+        actual_gauc, num_effective_samples = (
+            gauc_res["gauc-DefaultTask|window_gauc"],
+            gauc_res["gauc-DefaultTask|window_gauc_num_samples"],
+        )
+        if not torch.allclose(expected_num_samples, num_effective_samples):
+            raise ValueError(
+                "actual num sample {} is not equal to expected num sample {}".format(
+                    num_effective_samples, expected_num_samples
+                )
+            )
+        if not torch.allclose(expected_gauc, actual_gauc):
+            raise ValueError(
+                "actual auc {} is not equal to expected auc {}".format(
+                    actual_gauc, expected_gauc
+                )
+            )