ChEB-AI
diff --git a/‎chebifier/ensemble/base_ensemble.py‎
Lines changed: 44 additions & 3 deletions b/‎chebifier/ensemble/base_ensemble.py‎
Lines changed: 44 additions & 3 deletions
diff --git a/‎chebifier/ensemble/weighted_majority_ensemble.py‎
Lines changed: 2 additions & 2 deletions b/‎chebifier/ensemble/weighted_majority_ensemble.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎chebifier/prediction_models/base_predictor.py‎
Lines changed: 11 additions & 0 deletions b/‎chebifier/prediction_models/base_predictor.py‎
Lines changed: 11 additions & 0 deletions
@@ -16,7 +16,7 @@ def __init__(self, model_configs: dict):
         self.positive_prediction_threshold = 0.5
         for model_name, model_config in model_configs.items():
             model_cls = MODEL_TYPES[model_config["type"]]
-            model_instance = model_cls(**model_config)
+            model_instance = model_cls(model_name, **model_config)
             assert isinstance(model_instance, BasePredictor)
             self.models.append(model_instance)
 
@@ -73,8 +73,12 @@ def consolidate_predictions(
         has_valid_predictions = valid_counts > 0
 
         # Calculate positive and negative predictions for all classes at once
-        positive_mask = (predictions > 0.5) & valid_predictions
-        negative_mask = (predictions < 0.5) & valid_predictions
+        positive_mask = (
+            predictions > self.positive_prediction_threshold
+        ) & valid_predictions
+        negative_mask = (
+            predictions < self.positive_prediction_threshold
+        ) & valid_predictions
 
         confidence = 2 * torch.abs(
             predictions.nan_to_num() - self.positive_prediction_threshold
@@ -134,6 +138,7 @@ def predict_smiles_list(self, smiles_list, load_preds_if_possible=True) -> list:
             with open(predicted_classes_file, "w") as f:
                 for cls in predicted_classes:
                     f.write(f"{cls}\n")
+            predicted_classes = {cls: i for i, cls in enumerate(predicted_classes)}
         else:
             print(
                 f"Loading predictions from {preds_file} and label indexes from {predicted_classes_file}"
@@ -149,3 +154,39 @@ def predict_smiles_list(self, smiles_list, load_preds_if_possible=True) -> list:
             ordered_predictions, predicted_classes, classwise_weights
         )
         return aggregated_predictions
+
+
+if __name__ == "__main__":
+    ensemble = BaseEnsemble(
+        {
+            "resgated_0ps1g189": {
+                "type": "resgated",
+                "ckpt_path": "../python-chebai/logs/downloaded_ckpts/electra_resgated_comp/resgated_80-10-10_0ps1g189_epoch=122.ckpt",
+                "target_labels_path": "../python-chebai/data/chebi_v241/ChEBI50/processed/classes.txt",
+                "molecular_properties": [
+                    "chebai_graph.preprocessing.properties.AtomType",
+                    "chebai_graph.preprocessing.properties.NumAtomBonds",
+                    "chebai_graph.preprocessing.properties.AtomCharge",
+                    "chebai_graph.preprocessing.properties.AtomAromaticity",
+                    "chebai_graph.preprocessing.properties.AtomHybridization",
+                    "chebai_graph.preprocessing.properties.AtomNumHs",
+                    "chebai_graph.preprocessing.properties.BondType",
+                    "chebai_graph.preprocessing.properties.BondInRing",
+                    "chebai_graph.preprocessing.properties.BondAromaticity",
+                    "chebai_graph.preprocessing.properties.RDKit2DNormalized",
+                ],
+                "classwise_weights_path": "../python-chebai/metrics_0ps1g189_80-10-10.json",
+            },
+            "electra_14ko0zcf": {
+                "type": "electra",
+                "ckpt_path": "../python-chebai/logs/downloaded_ckpts/electra_resgated_comp/electra_80-10-10_14ko0zcf_epoch=193.ckpt",
+                "target_labels_path": "../python-chebai/data/chebi_v241/ChEBI50/processed/classes.txt",
+                "classwise_weights_path": "../python-chebai/metrics_electra_14ko0zcf_80-10-10.json",
+            },
+        }
+    )
+    r = ensemble.predict_smiles_list(
+        ["[NH3+]CCCC[C@H](NC(=O)[C@@H]([NH3+])CC([O-])=O)C([O-])=O"],
+        load_preds_if_possible=False,
+    )
+    print(len(r), r[0])
@@ -37,7 +37,7 @@ def calculate_classwise_weights(self, predicted_classes):
         """
         Given the positions of predicted classes in the predictions tensor, assign weights to each class. The
         result is two tensors of shape (num_predicted_classes, num_models). The weight for each class is the model_weight
-        (default: 1) multiplied by the class-specific validation-f1 (default 1).
+        (default: 1) multiplied by (1 + the class-specific validation-f1 (default 1)).
         """
         weights_by_cls = torch.ones(len(predicted_classes), len(self.models))
         for j, model in enumerate(self.models):
@@ -51,7 +51,7 @@ def calculate_classwise_weights(self, predicted_classes):
                         * weights["TP"]
                         / (2 * weights["TP"] + weights["FP"] + weights["FN"])
                     )
-                    weights_by_cls[predicted_classes[cls], j] *= f1
+                    weights_by_cls[predicted_classes[cls], j] *= 1 + f1
 
         print("Calculated model weightings. The average weights are:")
         for i, model in enumerate(self.models):
 
@@ -19,5 +19,16 @@ def __init__(
         else:
             self.classwise_weights = None
 
+        self._description = kwargs.get("description", None)
+
     def predict_smiles_list(self, smiles_list: list[str]) -> dict:
         raise NotImplementedError
+
+    @property
+    def info_text(self):
+        if self._description is None:
+            return "No description is available for this model."
+        return self._description
+
+    def explain_smiles(self, smiles):
+        return None