ffn: update for as per deepgo2 mlp architecture

aditya0by0 · aditya0by0 · commit 191c979ab514 · 2025-01-26T23:27:37.000+01:00
diff --git a/chebai/models/ffn.py b/chebai/models/ffn.py
@@ -1,28 +1,36 @@
-from typing import Dict, Any, Tuple
+from typing import Any, Dict, List, Optional, Tuple
 
-from chebai.models import ChebaiBaseNet
 import torch
-from torch import Tensor
+from torch import Tensor, nn
+
+from chebai.models import ChebaiBaseNet
 
 
 class FFN(ChebaiBaseNet):
+    # Reference: https://github.com/bio-ontology-research-group/deepgo2/blob/main/deepgo/models.py#L121-L139
 
     NAME = "FFN"
 
     def __init__(
         self,
-        input_size: int = 1000,
-        num_hidden_layers: int = 3,
-        hidden_size: int = 128,
+        input_size: int,
+        hidden_layers: List[int] = [
+            1024,
+        ],
         **kwargs
     ):
         super().__init__(**kwargs)
 
-        self.layers = torch.nn.ModuleList()
-        self.layers.append(torch.nn.Linear(input_size, hidden_size))
-        for _ in range(num_hidden_layers):
-            self.layers.append(torch.nn.Linear(hidden_size, hidden_size))
-        self.layers.append(torch.nn.Linear(hidden_size, self.out_dim))
+        layers = []
+        current_layer_input_size = input_size
+        for hidden_dim in hidden_layers:
+            layers.append(MLPBlock(current_layer_input_size, hidden_dim))
+            layers.append(Residual(MLPBlock(current_layer_input_size, hidden_dim)))
+            current_layer_input_size = hidden_dim
+
+        layers.append(torch.nn.Linear(current_layer_input_size, self.out_dim))
+        layers.append(nn.Sigmoid())
+        self.model = nn.Sequential(*layers)
 
     def _get_prediction_and_labels(self, data, labels, model_output):
         d = model_output["logits"]
@@ -56,6 +64,90 @@ def _process_for_loss(
 
     def forward(self, data, **kwargs):
         x = data["features"]
-        for layer in self.layers:
-            x = torch.relu(layer(x))
-        return {"logits": x}
+        return {"logits": self.model(x)}
+
+
+class Residual(nn.Module):
+    """
+    A residual layer that adds the output of a function to its input.
+
+    Args:
+        fn (nn.Module): The function to be applied to the input.
+
+    References:
+        https://github.com/bio-ontology-research-group/deepgo2/blob/main/deepgo/base.py#L6-L35
+    """
+
+    def __init__(self, fn):
+        """
+        Initialize the Residual layer with a given function.
+
+        Args:
+            fn (nn.Module): The function to be applied to the input.
+        """
+        super().__init__()
+        self.fn = fn
+
+    def forward(self, x):
+        """
+        Forward pass of the Residual layer.
+
+        Args:
+            x: Input tensor.
+
+        Returns:
+            torch.Tensor: The input tensor added to the result of applying the function `fn` to it.
+        """
+        return x + self.fn(x)
+
+
+class MLPBlock(nn.Module):
+    """
+    A basic Multi-Layer Perceptron (MLP) block with one fully connected layer.
+
+    Args:
+        in_features (int): The number of input features.
+        output_size (int): The number of output features.
+        bias (boolean): Add bias to the linear layer
+        layer_norm (boolean): Apply layer normalization
+        dropout (float): The dropout value
+        activation (nn.Module): The activation function to be applied after each fully connected layer.
+
+    References:
+        https://github.com/bio-ontology-research-group/deepgo2/blob/main/deepgo/base.py#L38-L73
+
+    Example:
+    ```python
+    # Create an MLP block with 2 hidden layers and ReLU activation
+    mlp_block = MLPBlock(input_size=64, output_size=10, activation=nn.ReLU())
+
+    # Apply the MLP block to an input tensor
+    input_tensor = torch.randn(32, 64)
+    output = mlp_block(input_tensor)
+    ```
+    """
+
+    def __init__(
+        self,
+        in_features,
+        out_features,
+        bias=True,
+        layer_norm=True,
+        dropout=0.1,
+        activation=nn.ReLU,
+    ):
+        super().__init__()
+        self.linear = nn.Linear(in_features, out_features, bias)
+        self.activation = activation()
+        self.layer_norm: Optional[nn.LayerNorm] = (
+            nn.LayerNorm(out_features) if layer_norm else None
+        )
+        self.dropout: Optional[nn.Dropout] = nn.Dropout(dropout) if dropout else None
+
+    def forward(self, x):
+        x = self.activation(self.linear(x))
+        if self.layer_norm:
+            x = self.layer_norm(x)
+        if self.dropout:
+            x = self.dropout(x)
+        return x
diff --git a/configs/model/ffn.yml b/configs/model/ffn.yml
@@ -2,6 +2,4 @@ class_path: chebai.models.ffn.FFN
 init_args:
   optimizer_kwargs:
     lr: 1e-3
-  hidden_size: 128
-  num_hidden_layers: 3
   input_size: 2560