ChEB-AI
diff --git a/‎chebai/callbacks/epoch_metrics.py‎
Lines changed: 74 additions & 0 deletions b/‎chebai/callbacks/epoch_metrics.py‎
Lines changed: 74 additions & 0 deletions
diff --git a/‎chebai/cli.py‎
Lines changed: 2 additions & 2 deletions b/‎chebai/cli.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎configs/metrics/balanced-accuracy.yml‎
Lines changed: 5 additions & 0 deletions b/‎configs/metrics/balanced-accuracy.yml‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎setup.py‎
Lines changed: 1 addition & 0 deletions b/‎setup.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎tests/testCustomBalancedAccuracyMetric.py‎
Lines changed: 132 additions & 0 deletions b/‎tests/testCustomBalancedAccuracyMetric.py‎
Lines changed: 132 additions & 0 deletions
@@ -47,3 +47,77 @@ def compute(self):
         # if (precision and recall are 0) or (precision is nan), set f1 to 0
         classwise_f1 = classwise_f1.nan_to_num()
         return torch.mean(classwise_f1)
+
+
+class BalancedAccuracy(torchmetrics.Metric):
+    """Balanced Accuracy = (TPR + TNR) / 2 = ( TP/(TP + FN) + (TN)/(TN + FP) ) / 2
+
+    This metric computes the balanced accuracy, which is the average of true positive rate (TPR)
+    and true negative rate (TNR). It is useful for imbalanced datasets where the classes are not
+    represented equally.
+    """
+
+    def __init__(self, num_labels, dist_sync_on_step=False, threshold=0.5):
+        super().__init__(dist_sync_on_step=dist_sync_on_step)
+
+        self.add_state(
+            "true_positives",
+            default=torch.zeros(num_labels, dtype=torch.int),
+            dist_reduce_fx="sum",
+        )
+
+        self.add_state(
+            "false_positives",
+            default=torch.zeros(num_labels, dtype=torch.int),
+            dist_reduce_fx="sum",
+        )
+
+        self.add_state(
+            "true_negatives",
+            default=torch.zeros(num_labels, dtype=torch.int),
+            dist_reduce_fx="sum",
+        )
+
+        self.add_state(
+            "false_negatives",
+            default=torch.zeros(num_labels, dtype=torch.int),
+            dist_reduce_fx="sum",
+        )
+
+        self.threshold = threshold
+
+    def update(self, preds: torch.Tensor, labels: torch.Tensor):
+        """Update the TPs, TNs ,FPs and FNs"""
+
+        # Size: Batch_size x Num_of_Classes;
+        # summing over 1st dimension (dim=0), gives us the True positives per class
+        tps = torch.sum(
+            torch.logical_and(preds > self.threshold, labels.to(torch.bool)), dim=0
+        )
+        fps = torch.sum(
+            torch.logical_and(preds > self.threshold, ~labels.to(torch.bool)), dim=0
+        )
+        tns = torch.sum(
+            torch.logical_and(preds <= self.threshold, ~labels.to(torch.bool)), dim=0
+        )
+        fns = torch.sum(
+            torch.logical_and(preds <= self.threshold, labels.to(torch.bool)), dim=0
+        )
+
+        # Size: Num_of_Classes;
+        self.true_positives += tps
+        self.false_positives += fps
+        self.true_negatives += tns
+        self.false_negatives += fns
+
+    def compute(self):
+        """Compute the average value of Balanced accuracy from each batch"""
+
+        tpr = self.true_positives / (self.true_positives + self.false_negatives)
+        tnr = self.true_negatives / (self.true_negatives + self.false_positives)
+        # Convert the nan values to 0
+        tpr = tpr.nan_to_num()
+        tnr = tnr.nan_to_num()
+
+        balanced_acc = (tpr + tnr) / 2
+        return torch.mean(balanced_acc)
@@ -11,10 +11,10 @@ def __init__(self, *args, **kwargs):
 
     def add_arguments_to_parser(self, parser: LightningArgumentParser):
         for kind in ("train", "val", "test"):
-            for average in ("micro", "macro"):
+            for average in ("micro-f1", "macro-f1", "balanced-accuracy"):
                 parser.link_arguments(
                     "model.init_args.out_dim",
-                    f"model.init_args.{kind}_metrics.init_args.metrics.{average}-f1.init_args.num_labels",
+                    f"model.init_args.{kind}_metrics.init_args.metrics.{average}.init_args.num_labels",
                 )
         parser.link_arguments(
             "model.init_args.out_dim", "trainer.callbacks.init_args.num_labels"
 
@@ -0,0 +1,5 @@
+class_path: torchmetrics.MetricCollection
+init_args:
+  metrics:
+    balanced-accuracy:
+      class_path: chebai.callbacks.epoch_metrics.BalancedAccuracy
@@ -49,6 +49,7 @@
         "wandb",
         "chardet",
         "yaml",
+        "torchmetrics",
     ],
     extras_require={"dev": ["black", "isort", "pre-commit"]},
 )
@@ -0,0 +1,132 @@
+import unittest
+import torch
+import os
+from chebai.callbacks.epoch_metrics import BalancedAccuracy
+import random
+
+
+class TestCustomBalancedAccuracyMetric(unittest.TestCase):
+
+    @classmethod
+    def setUpClass(cls) -> None:
+        cls.device = "cuda" if torch.cuda.is_available() else "cpu"
+
+    def test_iterative_vs_single_call_approach(self):
+        """Test the custom metric implementation in update fashion approach against
+        the single call approach"""
+
+        preds = torch.tensor([[1, 1, 0, 1], [1, 0, 1, 1], [0, 1, 0, 1]])
+        label = torch.tensor([[0, 0, 0, 0], [0, 0, 1, 1], [0, 1, 0, 1]])
+
+        num_labels = label.shape[1]
+        iterative_custom_metric = BalancedAccuracy(num_labels=num_labels)
+        for i in range(label.shape[0]):
+            iterative_custom_metric.update(preds[i].unsqueeze(0), label[i].unsqueeze(0))
+        iterative_custom_metric_score = iterative_custom_metric.compute().item()
+
+        single_call_custom_metric = BalancedAccuracy(num_labels=num_labels)
+        single_call_custom_metric_score = single_call_custom_metric(preds, label).item()
+
+        self.assertEqual(iterative_custom_metric_score, single_call_custom_metric_score)
+
+    def test_metric_against_realistic_data(self):
+        """Test the custom metric against the standard on realistic data"""
+        directory_path = os.path.join("tests", "test_data", "CheBIOver100_test")
+        abs_path = os.path.join(os.getcwd(), directory_path)
+        print(f"Checking data from - {abs_path}")
+        num_of_files = len(os.listdir(abs_path)) // 2
+
+        # load single file to get the num of labels for metric class instantiation
+        labels = torch.load(
+            f"{directory_path}/labels{0:03d}.pt", map_location=torch.device(self.device)
+        )
+        num_labels = labels.shape[1]
+        balanced_acc_custom = BalancedAccuracy(num_labels=num_labels)
+
+        for i in range(num_of_files):
+            labels = torch.load(
+                f"{directory_path}/labels{i:03d}.pt",
+                map_location=torch.device(self.device),
+            )
+            preds = torch.load(
+                f"{directory_path}/preds{i:03d}.pt",
+                map_location=torch.device(self.device),
+            )
+            balanced_acc_custom.update(preds, labels)
+
+        balanced_acc_custom_score = balanced_acc_custom.compute().item()
+        print(f"Balanced Accuracy for realistic data: {balanced_acc_custom_score}")
+
+    def test_case_when_few_class_has_no_labels(self):
+        """Test custom metric against standard metric for the scenario where some class has no labels"""
+        preds = torch.tensor([[1, 1, 0, 1], [1, 0, 1, 1], [0, 1, 0, 1]])
+        label = torch.tensor([[0, 0, 0, 0], [0, 0, 1, 1], [0, 1, 0, 1]])  # no labels
+
+        # tp = [0, 1, 1, 2], fp = [2, 1, 0, 1], tn = [1, 1, 2, 0], fn = [0, 0, 0, 0]
+        # tpr = [0, 1, 1, 2] / ([0, 1, 1, 2] + [0, 0, 0, 0]) = [0, 1, 1, 1]
+        # tnr = [1, 1, 2, 0] / ([1, 1, 2, 0] + [2, 1, 0, 1]) = [0.33333, 0.5, 1, 0]
+        # balanced_accuracy = ([0, 1, 1, 1] + [0.33333, 0.5, 1, 0]) / 2 = ([0.16666667, 0.75, 1, 0.5]
+        # mean bal accuracy = 0.6041666666666666
+
+        balanced_acc_score = self.__get_custom_metric_score(
+            preds, label, label.shape[1]
+        )
+
+        self.assertAlmostEqual(balanced_acc_score, 0.6041666666, places=4)
+
+    def test_all_predictions_are_1_half_labels_are_1(self):
+        """Test custom metric against standard metric for the scenario where all prediction are 1 but only half of
+        the labels are 1"""
+        preds = torch.ones((1, 900), dtype=torch.int)
+        label = torch.ones((1, 900), dtype=torch.int)
+
+        mask = [
+            [True] * (label.size(1) // 2)
+            + [False] * (label.size(1) - (label.size(1) // 2))
+        ]
+        random.shuffle(mask[0])
+        label[torch.tensor(mask)] = 0
+
+        # preds = [1, 1, 1, 1], label = [0, 1, 0, 1]
+        # tp = [0, 1, 0, 1], fp = [1, 0, 1, 0], tn = [0, 0, 0, 0], fn = [0, 0, 0, 0]
+        # tpr = tp / (tp + fn) = [0, 1, 0, 1] / [0, 1, 0, 1] = [0, 1, 0, 1]
+        # tnr = tn / (tn + fp) = [0, 0, 0, 0]
+        # balanced accuracy = 1 / 4 = 0.25
+
+        balanced_acc_custom_score = self.__get_custom_metric_score(
+            preds, label, label.shape[1]
+        )
+        self.assertAlmostEqual(balanced_acc_custom_score, 0.25, places=4)
+
+    def test_all_labels_are_1_half_predictions_are_1(self):
+        """Test custom metric against standard metric for the scenario where all labels are 1 but only half of
+        the predictions are 1"""
+        preds = torch.ones((1, 900), dtype=torch.int)
+        label = torch.ones((1, 900), dtype=torch.int)
+
+        mask = [
+            [True] * (label.size(1) // 2)
+            + [False] * (label.size(1) - (label.size(1) // 2))
+        ]
+        random.shuffle(mask[0])
+        preds[torch.tensor(mask)] = 0
+
+        # label = [1, 1, 1, 1], pred = [0, 1, 0, 1]
+        # tp = [0, 1, 0, 1], fp = [0, 1, 0, 1], tn = [0, 0, 0, 0], fn = [0, 0, 0, 0]
+        # tpr = tp / (tp + fn) = [0, 1, 0, 1] / [0, 1, 0, 1] = [0, 1, 0, 1]
+        # tnr = tn / (tn + fp) = [0, 0, 0, 0]
+        # balanced accuracy = 1 / 4 = 0.25
+
+        balanced_acc_custom_score = self.__get_custom_metric_score(
+            preds, label, label.shape[1]
+        )
+        self.assertAlmostEqual(balanced_acc_custom_score, 0.25, places=4)
+
+    @staticmethod
+    def __get_custom_metric_score(preds, labels, num_labels):
+        balanced_acc_custom = BalancedAccuracy(num_labels=num_labels)
+        return balanced_acc_custom(preds, labels).item()
+
+
+if __name__ == "__main__":
+    unittest.main()
Original file line number	Diff line number	Diff line change
`@@ -49,6 +49,7 @@`
`49`	`49`	`"wandb",`
`50`	`50`	`"chardet",`
`51`	`51`	`"yaml",`
	`52`	`+ "torchmetrics",`
`52`	`53`	`],`
`53`	`54`	`extras_require={"dev": ["black", "isort", "pre-commit"]},`
`54`	`55`	`)`