daisybio
diff --git a/‎create_report.py‎
Lines changed: 2 additions & 2 deletions b/‎create_report.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎drevalpy/datasets/dataset.py‎
Lines changed: 3 additions & 4 deletions b/‎drevalpy/datasets/dataset.py‎
Lines changed: 3 additions & 4 deletions
diff --git a/‎drevalpy/datasets/loader.py‎
Lines changed: 7 additions & 1 deletion b/‎drevalpy/datasets/loader.py‎
Lines changed: 7 additions & 1 deletion
diff --git a/‎drevalpy/evaluation.py‎
Lines changed: 2 additions & 0 deletions b/‎drevalpy/evaluation.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎drevalpy/experiment.py‎
Lines changed: 32 additions & 24 deletions b/‎drevalpy/experiment.py‎
Lines changed: 32 additions & 24 deletions
diff --git a/‎drevalpy/models/MOLIR/molir.py‎
Lines changed: 37 additions & 14 deletions b/‎drevalpy/models/MOLIR/molir.py‎
Lines changed: 37 additions & 14 deletions
@@ -341,8 +341,8 @@ def draw_per_grouping_algorithm_plots(
                 custom_id=run_id,
             )
         # get all html files from results/{run_id}
-        all_files = []
-        for _, _, files in os.walk(f"results/{run_id}"):
+        all_files: list[str] = []
+        for _, _, files in os.walk(f"results/{run_id}"):  # type: ignore[assignment]
             for file in files:
                 if file.endswith(".html") and file not in ["index.html", "LPO.html", "LCO.html", "LDO.html"]:
                     all_files.append(file)
 
@@ -83,7 +83,7 @@ def __init__(
         if len(self.response) != len(self.drug_ids):
             raise AssertionError("response and drug_ids/cell_line_ids have different lengths")
         # Used in the pipeline!
-        self.dataset_name = dataset_name
+        self.dataset_name = dataset_name if dataset_name is not None else ""
 
         self.predictions: Optional[np.ndarray] = None
         if predictions is not None:
@@ -785,14 +785,13 @@ def get_view_names(self) -> list[str]:
         """
         return list(self.features[list(self.features.keys())[0]].keys())
 
-    def get_feature_matrix(self, view: str, identifiers: np.ndarray, stack: bool = True) -> np.ndarray:
+    def get_feature_matrix(self, view: str, identifiers: np.ndarray) -> np.ndarray:
         """
         Returns the feature matrix for the given view.
 
         The feature view must be a vector or matrix.
         :param view: view name
         :param identifiers: list of identifiers (cell lines oder drugs)
-        :param stack: if True, the feature vectors are stacked to a matrix
         :returns: feature matrix
         :raises AssertionError: if no identifiers are given
         :raises AssertionError: if view is not in the FeatureDataset
@@ -818,7 +817,7 @@ def get_feature_matrix(self, view: str, identifiers: np.ndarray, stack: bool = T
         if not all(isinstance(self.features[id_][view], np.ndarray) for id_ in identifiers):
             raise AssertionError(f"get_feature_matrix only works for vectors or matrices. {view} is not a numpy array.")
         out = np.array([self.features[id_][view] for id_ in identifiers])
-        return np.stack(out, axis=0)
+        return out
 
     def copy(self):
         """Returns a copy of the feature dataset.
 
@@ -1,6 +1,7 @@
 """Contains functions to load the GDSC1, GDSC2, CCLE, and Toy datasets."""
 
 import os
+from typing import Callable
 
 import pandas as pd
 
@@ -91,7 +92,12 @@ def load_toy(path_data: str = "data") -> DrugResponseDataset:
     )
 
 
-AVAILABLE_DATASETS = {"GDSC1": load_gdsc1, "GDSC2": load_gdsc2, "CCLE": load_ccle, "Toy_Data": load_toy}
+AVAILABLE_DATASETS: dict[str, Callable] = {
+    "GDSC1": load_gdsc1,
+    "GDSC2": load_gdsc2,
+    "CCLE": load_ccle,
+    "Toy_Data": load_toy,
+}
 
 
 @pipeline_function
 
@@ -234,6 +234,8 @@ def evaluate(dataset: DrugResponseDataset, metric: list[str] | str):
     if isinstance(metric, str):
         metric = [metric]
     predictions = dataset.predictions
+    if predictions is None:
+        raise AssertionError("No predictions found in the dataset")
     response = dataset.response
 
     results = {}
 
@@ -4,7 +4,7 @@
 import os
 import shutil
 import warnings
-from typing import Optional
+from typing import Any, Optional
 
 import numpy as np
 import pandas as pd
@@ -16,7 +16,7 @@
 from .datasets.dataset import DrugResponseDataset, FeatureDataset
 from .evaluation import evaluate, get_mode
 from .models import MODEL_FACTORY, MULTI_DRUG_MODEL_FACTORY, SINGLE_DRUG_MODEL_FACTORY
-from .models.drp_model import DRPModel, SingleDrugModel
+from .models.drp_model import DRPModel
 from .pipeline_function import pipeline_function
 
 
@@ -82,14 +82,14 @@ def drug_response_experiment(
     :param test_mode: test mode one of "LPO", "LCO", "LDO" (leave-pair-out, leave-cell-line-out, leave-drug-out)
     :param overwrite: whether to overwrite existing results
     :param path_data: path to the data directory, usually data/
+    :raises ValueError: if no cv splits are found
     """
     if baselines is None:
         baselines = []
     cross_study_datasets = cross_study_datasets or []
     result_path = os.path.join(path_out, run_id, test_mode)
     split_path = os.path.join(result_path, "splits")
     result_folder_exists = os.path.exists(result_path)
-    randomization_test_views = []
     if result_folder_exists and overwrite:
         # if results exists, delete them if overwrite is True
         print(f"Overwriting existing results at {result_path}")
@@ -146,6 +146,9 @@ def drug_response_experiment(
 
         model_hpam_set = model_class.get_hyperparameter_set()
 
+        if response_data.cv_splits is None:
+            raise ValueError("No cv splits found.")
+
         for split_index, split in enumerate(response_data.cv_splits):
             print(f"################# FOLD {split_index+1}/{len(response_data.cv_splits)} " f"#################")
 
@@ -233,7 +236,7 @@ def drug_response_experiment(
                     best_hpams = json.load(f)
             if not is_baseline:
                 if randomization_mode is not None:
-                    print(f"Randomization tests for {model_class.model_name}")
+                    print(f"Randomization tests for {model_class.get_model_name()}")
                     # if this line changes, it also needs to be changed in pipeline:
                     # randomization_split.py
                     randomization_test_views = get_randomization_test_views(
@@ -253,7 +256,7 @@ def drug_response_experiment(
                         response_transformation=response_transformation,
                     )
                 if n_trials_robustness > 0:
-                    print(f"Robustness test for {model_class.model_name}")
+                    print(f"Robustness test for {model_class.get_model_name()}")
                     robustness_test(
                         n_trials=n_trials_robustness,
                         model=model,
@@ -289,7 +292,7 @@ def consolidate_single_drug_model_predictions(
     out_path: str = "",
 ) -> None:
     """
-    Consolidate SingleDrugModel predictions into a single file.
+    Consolidate single drug model predictions into a single file.
 
     :param models: list of model classes to compare, e.g., [SimpleNeuralNetwork, RandomForest]
     :param n_cv_splits: number of cross-validation splits, e.g., 5
@@ -301,10 +304,11 @@ def consolidate_single_drug_model_predictions(
         will be stored in the work directory.
     """
     for model in models:
-        if model.model_name in SINGLE_DRUG_MODEL_FACTORY:
-            model_instance = MODEL_FACTORY[model.model_name]()
-            model_path = os.path.join(results_path, str(model.model_name))
-            out_path = os.path.join(out_path, str(model.model_name))
+        if model.get_model_name() in SINGLE_DRUG_MODEL_FACTORY:
+
+            model_instance = MODEL_FACTORY[model.get_model_name()]()
+            model_path = os.path.join(results_path, model.get_model_name())
+            out_path = os.path.join(out_path, model.get_model_name())
             os.makedirs(os.path.join(out_path, "predictions"), exist_ok=True)
             if cross_study_datasets:
                 os.makedirs(os.path.join(out_path, "cross_study"), exist_ok=True)
@@ -316,7 +320,7 @@ def consolidate_single_drug_model_predictions(
             for split in range(n_cv_splits):
 
                 # Collect predictions for drugs across all scenarios (main, cross_study, robustness, randomization)
-                predictions = {
+                predictions: Any = {
                     "main": [],
                     "cross_study": {},
                     "robustness": {},
@@ -423,14 +427,14 @@ def consolidate_single_drug_model_predictions(
 
 def load_features(
     model: DRPModel, path_data: str, dataset: DrugResponseDataset
-) -> tuple[Optional[FeatureDataset], Optional[FeatureDataset]]:
+) -> tuple[FeatureDataset, Optional[FeatureDataset]]:
     """
     Load and reduce cell line and drug features for a given dataset.
 
     :param model: model to use, e.g., SimpleNeuralNetwork
     :param path_data: path to the data directory, e.g., data/
     :param dataset: dataset to load features for, e.g., GDSC2
-    :returns: tuple of cell line and drug features
+    :returns: tuple of cell line and, potentially, drug features
     """
     cl_features = model.load_cell_line_features(data_path=path_data, dataset_name=dataset.dataset_name)
     drug_features = model.load_drug_features(data_path=path_data, dataset_name=dataset.dataset_name)
@@ -480,10 +484,11 @@ def cross_study_prediction(
 
     cell_lines_to_keep = cl_features.identifiers if cl_features is not None else None
 
+    drugs_to_keep: Optional[np.ndarray] = None
     if single_drug_id is not None:
         drugs_to_keep = np.array([single_drug_id])
-    else:
-        drugs_to_keep = drug_features.identifiers if drug_features is not None else None
+    elif drug_features is not None:
+        drugs_to_keep = drug_features.identifiers
 
     print(
         f"Reducing cross study dataset ... feature data available for "
@@ -778,12 +783,15 @@ def randomize_train_predict(
         )
         return
 
-    cl_features_rand = cl_features.copy() if cl_features is not None else None
-    drug_features_rand = drug_features.copy() if drug_features is not None else None
-    if cl_features_rand is not None and view in cl_features.get_view_names():
-        cl_features_rand.randomize_features(view, randomization_type=randomization_type)
-    elif drug_features_rand is not None and view in drug_features.get_view_names():
-        drug_features_rand.randomize_features(view, randomization_type=randomization_type)
+    cl_features_rand: Optional[FeatureDataset] = None
+    if cl_features is not None:
+        cl_features_rand = cl_features.copy()
+        cl_features_rand.randomize_features(view, randomization_type=randomization_type)  # type: ignore[union-attr]
+
+    drug_features_rand: Optional[FeatureDataset] = None
+    if drug_features is not None:
+        drug_features_rand = drug_features.copy()
+        drug_features_rand.randomize_features(view, randomization_type=randomization_type)  # type: ignore[union-attr]
 
     test_dataset_rand = train_and_predict(
         model=model,
@@ -1069,11 +1077,11 @@ def make_model_list(models: list[type[DRPModel]], response_data: DrugResponseDat
     model_list = {}
     unique_drugs = np.unique(response_data.drug_ids)
     for model in models:
-        if issubclass(model, SingleDrugModel):
+        if model.is_single_drug_model:
             for drug in unique_drugs:
-                model_list[f"{model.model_name}.{drug}"] = str(model.model_name)
+                model_list[f"{model.get_model_name()}.{drug}"] = model.get_model_name()
         else:
-            model_list[str(model.model_name)] = str(model.model_name)
+            model_list[model.get_model_name()] = model.get_model_name()
     return model_list
 
 
 
@@ -13,12 +13,12 @@
 from sklearn.preprocessing import StandardScaler
 
 from ...datasets.dataset import DrugResponseDataset, FeatureDataset
-from ..drp_model import SingleDrugModel
+from ..drp_model import DRPModel
 from ..utils import get_multiomics_feature_dataset
 from .utils import MOLIModel, get_dimensions_of_omics_data
 
 
-class MOLIR(SingleDrugModel):
+class MOLIR(DRPModel):
     """
     Regression extension of MOLI: multi-omics late integration deep neural network.
 
@@ -28,10 +28,10 @@ class MOLIR(SingleDrugModel):
     We use a regression adaption with MSE loss and a mechanism to find positive and negative samples.
     """
 
+    is_single_drug_model = True
     cell_line_views = ["gene_expression", "mutations", "copy_number_variation_gistic"]
     drug_views = []
     early_stopping = True
-    model_name = "MOLIR"
 
     def __init__(self) -> None:
         """
@@ -41,8 +41,17 @@ def __init__(self) -> None:
         gene expression, mutation and copy number variation data.
         """
         super().__init__()
-        self.model = None
-        self.hyperparameters = None
+        self.model: MOLIModel | None = None
+        self.hyperparameters: dict[str, Any] = dict()
+
+    @classmethod
+    def get_model_name(cls) -> str:
+        """
+        Returns the model name.
+
+        :returns: MOLIR
+        """
+        return "MOLIR"
 
     def build_model(self, hyperparameters: dict[str, Any]) -> None:
         """
@@ -68,6 +77,7 @@ def train(
         copy number variation data. If there is no training data, the model is set to None (and predictions will be
         skipped as well). If there is not enough training data, the predictions will be made on the randomly
         initialized model.
+
         :param output: drug response data
         :param cell_line_input: cell line omics features, i.e., gene expression, mutations and copy number variation
         :param drug_input: drug features, not needed
@@ -86,7 +96,7 @@ def train(
                 transformer=scaler_gex,
                 view="gene_expression",
             )
-            if self.early_stopping and len(output_earlystopping) < 2:
+            if output_earlystopping is not None and self.early_stopping and len(output_earlystopping) < 2:
                 output_earlystopping = None
             dim_gex, dim_mut, dim_cnv = get_dimensions_of_omics_data(cell_line_input)
             self.model = MOLIModel(
@@ -109,19 +119,20 @@ def train(
 
     def predict(
         self,
-        drug_ids: str | np.ndarray,
-        cell_line_ids: str | np.ndarray,
+        cell_line_ids: np.ndarray,
+        drug_ids: np.ndarray,
+        cell_line_input: FeatureDataset,
         drug_input: FeatureDataset | None = None,
-        cell_line_input: FeatureDataset = None,
     ) -> np.ndarray:
         """
         Predicts the drug response.
 
         If there was no training data, only nans will be returned.
-        :param drug_ids: Drugs to predict
+
         :param cell_line_ids: Cell lines to predict
-        :param drug_input: drug features, not needed
+        :param drug_ids: Drugs to predict
         :param cell_line_input: cell line omics features
+        :param drug_input: drug features, not needed
         :returns: Predicted drug response
         """
         input_data = self.get_feature_matrices(
@@ -130,9 +141,11 @@ def predict(
             cell_line_input=cell_line_input,
             drug_input=drug_input,
         )
-        gene_expression = input_data["gene_expression"]
-        mutations = input_data["mutations"]
-        cnvs = input_data["copy_number_variation_gistic"]
+        (gene_expression, mutations, cnvs) = (
+            input_data["gene_expression"],
+            input_data["mutations"],
+            input_data["copy_number_variation_gistic"],
+        )
         if self.model is None:
             print("No model trained, will predict NA.")
             return np.array([np.nan] * len(cell_line_ids))
@@ -155,3 +168,13 @@ def load_cell_line_features(self, data_path: str, dataset_name: str) -> FeatureD
         # log transformation
         feature_dataset.apply(function=np.log, view="gene_expression")
         return feature_dataset
+
+    def load_drug_features(self, data_path: str, dataset_name: str) -> FeatureDataset | None:
+        """
+        Returns None, as drug features are not needed for MOLIR.
+
+        :param data_path: path to the data
+        :param dataset_name: name of the dataset
+        :returns: None
+        """
+        return None