Merge pull request #111 from Genentech/transform-ensembles

avantikalal · web-flow · commit 62f9ab1c4ce8 · 2025-10-03T17:23:11.000-07:00
Transform ensembles
diff --git a/src/grelu/lightning/__init__.py b/src/grelu/lightning/__init__.py
@@ -1146,6 +1146,12 @@ def __init__(self, models: list, model_names: Optional[List[str]] = None) -> Non
             "n_tasks": sum([model.model_params["n_tasks"] for model in self.models])
         }
         self.data_params = {"tasks": defaultdict(list)}
+
+        # Set models to eval mode (since this class is used for prediction and design)
+        for model in self.models:
+            model.eval()
+
+        self.reset_transform()
         self._combine_tasks()
 
     def _combine_tasks(self) -> None:
@@ -1171,18 +1177,51 @@ def forward(self, x: Tensor) -> Tensor:
         """
         Forward Pass.
         """
-        return torch.cat([model(x) for model in self.models], axis=1)  # B, T, L
+        x = torch.cat([model(x) for model in self.models], axis=1)  # B, T, L
+
+        # apply transform to ensemble output
+        x = self.transform(x)
+        return x
 
-    def predict_on_dataset(self, dataset: Callable, **kwargs) -> np.ndarray:
+    def add_transform(self, prediction_transform: Callable) -> None:
         """
-        This will return the concatenated predictions from all the
-        constituent models, in the order in which they were supplied.
-        Predictions will be concatenated along the task axis.
+        Add a prediction transform
+        """
+        if prediction_transform is not None:
+            self.transform = prediction_transform
+
+    def reset_transform(self) -> None:
+        """
+        Remove a prediction transform
         """
-        return np.concatenate(
+        self.transform = nn.Identity()
+
+    def predict_on_dataset(
+        self,
+        dataset: Callable,
+        **kwargs,
+    ):
+        """
+        Predict for a dataset of sequences or variants. This will return
+        the concatenated predictions from all the constituent models, in the
+        order in which they were supplied to __.init__. Predictions will be
+        concatenated along the task axis.
+
+        Args:
+            dataset: Dataset object that yields one-hot encoded sequences
+            **kwargs: Additional arguments to pass to the `predict_on_dataset`
+                functions of the constituent models.
+
+        Returns:
+            Model predictions as a numpy array
+        """
+        preds = np.concatenate(
             [model.predict_on_dataset(dataset, **kwargs) for model in self.models],
             axis=-2,
         )
+        if not isinstance(self.transform, nn.Identity):
+            preds = self.transform(torch.tensor(preds)).numpy()
+        return preds
 
     def get_task_idxs(
         self, tasks: Union[str, int, List[str], List[int]], key: str = "name"
diff --git a/src/grelu/transforms/prediction_transforms.py b/src/grelu/transforms/prediction_transforms.py
@@ -97,14 +97,14 @@ def filter(self, x: Union[Tensor, np.ndarray]) -> Union[Tensor, np.ndarray]:
         """
         # Select positions
         if self.positions is not None:
-            x = x[:, :, self.positions]
+            x = x[..., self.positions]
 
         # Select tasks
         if self.tasks is not None:
-            x = x[:, self.tasks, :]
+            x = x[..., self.tasks, :]
         elif self.except_tasks is not None:
             keep = [i for i in range(x.shape[1]) if i not in self.except_tasks]
-            x = x[:, keep, :]
+            x = x[..., keep, :]
         return x
 
     def torch_aggregate(self, x: Tensor) -> Tensor:
@@ -113,11 +113,11 @@ def torch_aggregate(self, x: Tensor) -> Tensor:
         """
         # Aggregate positions
         if self.length_aggfunc is not None:
-            x = self.length_aggfunc(x, axis=2, keepdims=True)
+            x = self.length_aggfunc(x, axis=-1, keepdims=True)
 
         # Aggregate tasks
         if self.task_aggfunc is not None:
-            x = self.task_aggfunc(x, axis=1, keepdims=True)
+            x = self.task_aggfunc(x, axis=-2, keepdims=True)
         return x
 
     def numpy_aggregate(self, x: np.ndarray) -> np.ndarray:
@@ -126,11 +126,11 @@ def numpy_aggregate(self, x: np.ndarray) -> np.ndarray:
         """
         # Aggregate positions
         if self.length_aggfunc is not None:
-            x = self.length_aggfunc_numpy(x, axis=2, keepdims=True)
+            x = self.length_aggfunc_numpy(x, axis=-1, keepdims=True)
 
         # Aggregate tasks
         if self.task_aggfunc is not None:
-            x = self.task_aggfunc_numpy(x, axis=1, keepdims=True)
+            x = self.task_aggfunc_numpy(x, axis=-2, keepdims=True)
 
         return x
 
diff --git a/tests/test_lightning.py b/tests/test_lightning.py
@@ -371,6 +371,12 @@ def test_lightning_model_ensemble():
     preds = model.predict_on_dataset(dataset=udataset, devices="cpu")
     assert preds.shape == (2, 4, 1)
 
+    # Test transform
+    t = Aggregate(task_aggfunc="mean")
+    model.add_transform(t)
+    preds = model.predict_on_dataset(dataset=udataset, devices="cpu")
+    assert preds.shape == (2, 1, 1)
+
 
 bin_model = generate_model(task="binary", loss="bce", n_tasks=2)
 bin_model.model_params["crop_len"] = 0