ChEB-AI
diff --git a/‎chebifier/cli.py‎
Lines changed: 16 additions & 5 deletions b/‎chebifier/cli.py‎
Lines changed: 16 additions & 5 deletions
diff --git a/‎chebifier/ensemble/base_ensemble.py‎
Lines changed: 49 additions & 15 deletions b/‎chebifier/ensemble/base_ensemble.py‎
Lines changed: 49 additions & 15 deletions
diff --git a/‎chebifier/model_registry.py‎
Lines changed: 6 additions & 3 deletions b/‎chebifier/model_registry.py‎
Lines changed: 6 additions & 3 deletions
diff --git a/‎chebifier/prediction_models/__init__.py‎
Lines changed: 9 additions & 2 deletions b/‎chebifier/prediction_models/__init__.py‎
Lines changed: 9 additions & 2 deletions
diff --git a/‎chebifier/prediction_models/c3p_predictor.py‎
Lines changed: 32 additions & 9 deletions b/‎chebifier/prediction_models/c3p_predictor.py‎
Lines changed: 32 additions & 9 deletions
@@ -72,19 +72,26 @@ def predict(
     use_confidence,
     resolve_inconsistencies=True,
 ):
-    """Predict ChEBI classes for SMILES strings using an ensemble model.
-        """
+    """Predict ChEBI classes for SMILES strings using an ensemble model."""
     # Load configuration from YAML file
     if not ensemble_config:
         print(f"Using default ensemble configuration")
-        with importlib.resources.files("chebifier").joinpath("ensemble.yml").open("r") as f:
+        with (
+            importlib.resources.files("chebifier")
+            .joinpath("ensemble.yml")
+            .open("r") as f
+        ):
             config = yaml.safe_load(f)
     else:
         print(f"Loading ensemble configuration from {ensemble_config}")
         with open(ensemble_config, "r") as f:
             config = yaml.safe_load(f)
 
-    with importlib.resources.files("chebifier").joinpath("model_registry.yml").open("r") as f:
+    with (
+        importlib.resources.files("chebifier")
+        .joinpath("model_registry.yml")
+        .open("r") as f
+    ):
         model_registry = yaml.safe_load(f)
 
     new_config = {}
@@ -101,7 +108,11 @@ def predict(
     config = new_config
 
     # Instantiate ensemble model
-    ensemble = ENSEMBLES[ensemble_type](config, chebi_version=chebi_version, resolve_inconsistencies=resolve_inconsistencies)
+    ensemble = ENSEMBLES[ensemble_type](
+        config,
+        chebi_version=chebi_version,
+        resolve_inconsistencies=resolve_inconsistencies,
+    )
 
     # Collect SMILES strings from arguments and/or file
     smiles_list = list(smiles)
 
@@ -12,7 +12,12 @@
 
 class BaseEnsemble:
 
-    def __init__(self, model_configs: dict, chebi_version: int = 241, resolve_inconsistencies: bool = True):
+    def __init__(
+        self,
+        model_configs: dict,
+        chebi_version: int = 241,
+        resolve_inconsistencies: bool = True,
+    ):
         # Deferred Import: To avoid circular import error
         from chebifier.model_registry import MODEL_TYPES
 
@@ -28,33 +33,43 @@ def __init__(self, model_configs: dict, chebi_version: int = 241, resolve_incons
             if os.path.isfile(file):
                 self.disjoint_files.append(file)
             else:
-                print(f"Disjoint axiom file {file} not found. Loading from huggingface instead...")
+                print(
+                    f"Disjoint axiom file {file} not found. Loading from huggingface instead..."
+                )
                 from chebifier.hugging_face import download_model_files
-                self.disjoint_files.append(download_model_files({
-                        "repo_id": "chebai/chebifier",
-                        "repo_type": "dataset",
-                        "files": {"disjoint_file": os.path.basename(file)},
-                })["disjoint_file"])
+
+                self.disjoint_files.append(
+                    download_model_files(
+                        {
+                            "repo_id": "chebai/chebifier",
+                            "repo_type": "dataset",
+                            "files": {"disjoint_file": os.path.basename(file)},
+                        }
+                    )["disjoint_file"]
+                )
 
         self.models = []
         self.positive_prediction_threshold = 0.5
         for model_name, model_config in model_configs.items():
             model_cls = MODEL_TYPES[model_config["type"]]
             if "hugging_face" in model_config:
                 from chebifier.hugging_face import download_model_files
+
                 hugging_face_kwargs = download_model_files(model_config["hugging_face"])
             else:
                 hugging_face_kwargs = {}
             if "package_name" in model_config:
                 check_package_installed(model_config["package_name"])
 
             model_instance = model_cls(
-                model_name, **model_config, **hugging_face_kwargs, chebi_graph=self.chebi_graph
+                model_name,
+                **model_config,
+                **hugging_face_kwargs,
+                chebi_graph=self.chebi_graph,
             )
             assert isinstance(model_instance, BasePredictor)
             self.models.append(model_instance)
 
-
         if resolve_inconsistencies:
             self.smoother = PredictionSmoother(
                 self.chebi_dataset,
@@ -96,7 +111,9 @@ def gather_predictions(self, smiles_list):
 
         return ordered_logits, predicted_classes
 
-    def consolidate_predictions(self, predictions, classwise_weights, predicted_classes, **kwargs):
+    def consolidate_predictions(
+        self, predictions, classwise_weights, predicted_classes, **kwargs
+    ):
         """
         Aggregates predictions from multiple models using weighted majority voting.
         Optimized version using tensor operations instead of for loops.
@@ -152,9 +169,13 @@ def consolidate_predictions(self, predictions, classwise_weights, predicted_clas
         if self.smoother is not None:
             self.smoother.set_label_names(class_names)
             smooth_net_score = self.smoother(net_score)
-            class_decisions = (smooth_net_score > 0.5) & has_valid_predictions  # Shape: (num_smiles, num_classes)
+            class_decisions = (
+                smooth_net_score > 0.5
+            ) & has_valid_predictions  # Shape: (num_smiles, num_classes)
         else:
-            class_decisions = (net_score > 0) & has_valid_predictions  # Shape: (num_smiles, num_classes)
+            class_decisions = (
+                net_score > 0
+            ) & has_valid_predictions  # Shape: (num_smiles, num_classes)
         end_time = time.perf_counter()
         print(f"Prediction smoothing took {end_time - start_time:.2f} seconds")
 
@@ -178,7 +199,9 @@ def predict_smiles_list(
                 smiles_list
             )
             if len(predicted_classes) == 0:
-                print(f"Warning: No classes have been predicted for the given SMILES list.")
+                print(
+                    f"Warning: No classes have been predicted for the given SMILES list."
+                )
             # save predictions
             torch.save(ordered_predictions, preds_file)
             with open(predicted_classes_file, "w") as f:
@@ -203,7 +226,14 @@ def predict_smiles_list(
         class_names = list(predicted_classes.keys())
         class_indices = {predicted_classes[cls]: cls for cls in class_names}
         result = [
-            [class_indices[idx.item()] for idx in torch.nonzero(i, as_tuple=True)[0]] if not failure else None
+            (
+                [
+                    class_indices[idx.item()]
+                    for idx in torch.nonzero(i, as_tuple=True)[0]
+                ]
+                if not failure
+                else None
+            )
             for i, failure in zip(class_decisions, is_failure)
         ]
 
@@ -240,7 +270,11 @@ def predict_smiles_list(
         }
     )
     r = ensemble.predict_smiles_list(
-        ["[NH3+]CCCC[C@H](NC(=O)[C@@H]([NH3+])CC([O-])=O)C([O-])=O", "C[C@H](N)C(=O)NCC(O)=O#", ""],
+        [
+            "[NH3+]CCCC[C@H](NC(=O)[C@@H]([NH3+])CC([O-])=O)C([O-])=O",
+            "C[C@H](N)C(=O)NCC(O)=O#",
+            "",
+        ],
         load_preds_if_possible=False,
     )
     print(len(r), r[0])
@@ -7,10 +7,13 @@
     ChemlogPeptidesPredictor,
     ElectraPredictor,
     ResGatedPredictor,
-    ChEBILookupPredictor
+    ChEBILookupPredictor,
 )
 from chebifier.prediction_models.c3p_predictor import C3PPredictor
-from chebifier.prediction_models.chemlog_predictor import ChemlogXMolecularEntityPredictor, ChemlogOrganoXCompoundPredictor
+from chebifier.prediction_models.chemlog_predictor import (
+    ChemlogXMolecularEntityPredictor,
+    ChemlogOrganoXCompoundPredictor,
+)
 
 ENSEMBLES = {
     "mv": BaseEnsemble,
@@ -26,7 +29,7 @@
     "chebi_lookup": ChEBILookupPredictor,
     "chemlog_element": ChemlogXMolecularEntityPredictor,
     "chemlog_organox": ChemlogOrganoXCompoundPredictor,
-    "c3p": C3PPredictor
+    "c3p": C3PPredictor,
 }
 
 
 
@@ -3,5 +3,12 @@
 from .electra_predictor import ElectraPredictor
 from .gnn_predictor import ResGatedPredictor
 from .chebi_lookup import ChEBILookupPredictor
-__all__ = ["BasePredictor", "ChemlogPeptidesPredictor", "ElectraPredictor", "ResGatedPredictor", "ChEBILookupPredictor",
-           "ChemlogExtraPredictor"]
+
+__all__ = [
+    "BasePredictor",
+    "ChemlogPeptidesPredictor",
+    "ElectraPredictor",
+    "ResGatedPredictor",
+    "ChEBILookupPredictor",
+    "ChemlogExtraPredictor",
+]
@@ -12,22 +12,37 @@ class C3PPredictor(BasePredictor):
     Wrapper for C3P (url).
     """
 
-    def __init__(self, model_name: str, program_directory: Optional[Path]=None, chemical_classes: Optional[List[str]]=None, **kwargs):
+    def __init__(
+        self,
+        model_name: str,
+        program_directory: Optional[Path] = None,
+        chemical_classes: Optional[List[str]] = None,
+        **kwargs,
+    ):
         super().__init__(model_name, **kwargs)
         self.program_directory = program_directory
         self.chemical_classes = chemical_classes
         self.chebi_graph = kwargs.get("chebi_graph", None)
 
     @lru_cache(maxsize=100)
     def predict_smiles_tuple(self, smiles_list: tuple[str]) -> list:
-        result_list = c3p_classifier.classify(list(smiles_list), self.program_directory, self.chemical_classes, strict=False)
+        result_list = c3p_classifier.classify(
+            list(smiles_list),
+            self.program_directory,
+            self.chemical_classes,
+            strict=False,
+        )
         result_reformatted = [dict() for _ in range(len(smiles_list))]
         for result in result_list:
             chebi_id = result.class_id.split(":")[1]
-            result_reformatted[smiles_list.index(result.input_smiles)][chebi_id] = result.is_match
+            result_reformatted[smiles_list.index(result.input_smiles)][
+                chebi_id
+            ] = result.is_match
             if result.is_match and self.chebi_graph is not None:
                 for parent in list(self.chebi_graph.predecessors(int(chebi_id))):
-                    result_reformatted[smiles_list.index(result.input_smiles)][str(parent)] = 1
+                    result_reformatted[smiles_list.index(result.input_smiles)][
+                        str(parent)
+                    ] = 1
         return result_reformatted
 
     def explain_smiles(self, smiles):
@@ -36,14 +51,22 @@ def explain_smiles(self, smiles):
         than 300 classes, only take the positive ones.
         """
         highlights = []
-        result_list = c3p_classifier.classify([smiles], self.program_directory, self.chemical_classes, strict=False)
+        result_list = c3p_classifier.classify(
+            [smiles], self.program_directory, self.chemical_classes, strict=False
+        )
         for result in result_list:
             if result.is_match:
                 highlights.append(
-                    ("text", f"For class {result.class_name} ({result.class_id}), C3P gave the following explanation: {result.reason}")
+                    (
+                        "text",
+                        f"For class {result.class_name} ({result.class_id}), C3P gave the following explanation: {result.reason}",
+                    )
                 )
         highlights = [
-                         ("text", f"C3P made positive predictions for {len(highlights)} classes. The explanations are as follows:")
-                     ] + highlights
+            (
+                "text",
+                f"C3P made positive predictions for {len(highlights)} classes. The explanations are as follows:",
+            )
+        ] + highlights
 
-        return {"highlights": highlights}
+        return {"highlights": highlights}