ChEB-AI
diff --git a/‎chebai/cli.py‎
Lines changed: 3 additions & 0 deletions b/‎chebai/cli.py‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎chebai/loss/bce_weighted.py‎
Lines changed: 12 additions & 4 deletions b/‎chebai/loss/bce_weighted.py‎
Lines changed: 12 additions & 4 deletions
diff --git a/‎chebai/loss/semantic.py‎
Lines changed: 20 additions & 6 deletions b/‎chebai/loss/semantic.py‎
Lines changed: 20 additions & 6 deletions
diff --git a/‎chebai/models/base.py‎
Lines changed: 0 additions & 2 deletions b/‎chebai/models/base.py‎
Lines changed: 0 additions & 2 deletions
diff --git a/‎chebai/preprocessing/datasets/base.py‎
Lines changed: 4 additions & 0 deletions b/‎chebai/preprocessing/datasets/base.py‎
Lines changed: 4 additions & 0 deletions
@@ -19,6 +19,9 @@ def add_arguments_to_parser(self, parser: LightningArgumentParser):
         parser.link_arguments(
             "model.init_args.out_dim", "trainer.callbacks.init_args.num_labels"
         )
+        parser.link_arguments(
+            "data", "model.init_args.criterion.init_args.data_extractor"
+        )
 
     @staticmethod
     def subcommands() -> Dict[str, Set[str]]:
 
@@ -1,5 +1,6 @@
 import torch
-from chebai.preprocessing.datasets.chebi import _ChEBIDataExtractor
+from chebai.preprocessing.datasets.base import XYBaseDataModule
+from chebai.preprocessing.datasets.pubchem import LabeledUnlabeledMixed
 import pandas as pd
 import os
 import pickle
@@ -10,9 +11,16 @@ class BCEWeighted(torch.nn.BCEWithLogitsLoss):
     https://openaccess.thecvf.com/content_CVPR_2019/papers/Cui_Class-Balanced_Loss_Based_on_Effective_Number_of_Samples_CVPR_2019_paper.pdf)
     """
 
-    def __init__(self, beta: float = None, data_extractor: _ChEBIDataExtractor = None):
+    def __init__(
+        self,
+        beta: float = None,
+        data_extractor: XYBaseDataModule = None,
+    ):
         self.beta = beta
+        if isinstance(data_extractor, LabeledUnlabeledMixed):
+            data_extractor = data_extractor.labeled
         self.data_extractor = data_extractor
+
         super().__init__()
 
     def set_pos_weight(self, input):
@@ -31,12 +39,12 @@ def set_pos_weight(self, input):
                         open(
                             os.path.join(
                                 self.data_extractor.raw_dir,
-                                self.data_extractor.raw_file_names_dict[set],
+                                raw_file_name,
                             ),
                             "rb",
                         )
                     )
-                    for set in ["train", "validation", "test"]
+                    for raw_file_name in self.data_extractor.raw_file_names
                 ]
             )
             value_counts = []
 
@@ -7,20 +7,29 @@
 from typing import Literal
 
 from chebai.preprocessing.datasets.chebi import _ChEBIDataExtractor, ChEBIOver100
+from chebai.preprocessing.datasets.pubchem import LabeledUnlabeledMixed
+from chebai.loss.bce_weighted import BCEWeighted
 
 
 class ImplicationLoss(torch.nn.Module):
     def __init__(
         self,
-        data_extractor: _ChEBIDataExtractor,
+        data_extractor: _ChEBIDataExtractor | LabeledUnlabeledMixed,
         base_loss: torch.nn.Module = None,
         tnorm: Literal["product", "lukasiewicz", "xu19"] = "product",
         impl_loss_weight=0.1,  # weight of implication loss in relation to base_loss
         pos_scalar=1,
         pos_epsilon=0.01,
+        multiply_by_softmax=False,
     ):
         super().__init__()
+        # automatically choose labeled subset for implication filter in case of mixed dataset
+        if isinstance(data_extractor, LabeledUnlabeledMixed):
+            data_extractor = data_extractor.labeled
         self.data_extractor = data_extractor
+        # propagate data_extractor to base loss
+        if isinstance(base_loss, BCEWeighted):
+            base_loss.data_extractor = self.data_extractor
         self.base_loss = base_loss
         self.implication_cache_file = f"implications_{self.data_extractor.name}.cache"
         self.label_names = _load_label_names(
@@ -36,6 +45,7 @@ def __init__(
         self.impl_weight = impl_loss_weight
         self.pos_scalar = pos_scalar
         self.eps = pos_epsilon
+        self.multiply_by_softmax = multiply_by_softmax
 
     def forward(self, input, target, **kwargs):
         nnl = kwargs.pop("non_null_labels", None)
@@ -70,16 +80,20 @@ def _calculate_implication_loss(self, l, r):
                 math.pow(1 + self.eps, 1 / self.pos_scalar)
                 - math.pow(self.eps, 1 / self.pos_scalar)
             )
-            r = torch.pow(r, self.pos_scalar)
+            one_min_r = torch.pow(1 - r, self.pos_scalar)
+        else:
+            one_min_r = 1 - r
         if self.tnorm == "product":
-            individual_loss = l * (1 - r)
+            individual_loss = l * one_min_r
         elif self.tnorm == "xu19":
-            individual_loss = -torch.log(1 - l * (1 - r))
+            individual_loss = -torch.log(1 - l * one_min_r)
         elif self.tnorm == "lukasiewicz":
-            individual_loss = torch.relu(l - r)
+            individual_loss = torch.relu(l + one_min_r - 1)
         else:
             raise NotImplementedError(f"Unknown tnorm {self.tnorm}")
 
+        if self.multiply_by_softmax:
+            individual_loss = individual_loss * individual_loss.softmax(dim=-1)
         return torch.mean(
             torch.sum(individual_loss, dim=-1),
             dim=0,
@@ -100,7 +114,7 @@ class DisjointLoss(ImplicationLoss):
     def __init__(
         self,
         path_to_disjointness,
-        data_extractor: _ChEBIDataExtractor,
+        data_extractor: _ChEBIDataExtractor | LabeledUnlabeledMixed,
         base_loss: torch.nn.Module = None,
         disjoint_loss_weight=100,
         **kwargs,
 
@@ -28,11 +28,9 @@ class ChebaiBaseNet(LightningModule):
 
     Attributes:
         NAME (str): The name of the model.
-        LOSS (torch.nn.Module): The loss function used by the model.
     """
 
     NAME = None
-    LOSS = torch.nn.BCEWithLogitsLoss
 
     def __init__(
         self,
 
@@ -312,6 +312,10 @@ def setup_processed(self):
     def processed_file_names(self):
         raise NotImplementedError
 
+    @property
+    def raw_file_names(self):
+        raise NotImplementedError
+
     @property
     def processed_file_names_dict(self) -> dict:
         raise NotImplementedError
Original file line number	Diff line number	Diff line change
`@@ -19,6 +19,9 @@ def add_arguments_to_parser(self, parser: LightningArgumentParser):`
`19`	`19`	`parser.link_arguments(`
`20`	`20`	`"model.init_args.out_dim", "trainer.callbacks.init_args.num_labels"`
`21`	`21`	`)`
	`22`	`+ parser.link_arguments(`
	`23`	`+ "data", "model.init_args.criterion.init_args.data_extractor"`
	`24`	`+ )`
`22`	`25`
`23`	`26`	`@staticmethod`
`24`	`27`	`def subcommands() -> Dict[str, Set[str]]:`