BaderLab
diff --git a/‎README.md‎
Lines changed: 51 additions & 0 deletions b/‎README.md‎
Lines changed: 51 additions & 0 deletions
diff --git a/‎apamodel/__init__.py‎ b/‎apamodel/__init__.py‎
diff --git a/‎apamodel/blocks.py‎
Lines changed: 63 additions & 0 deletions b/‎apamodel/blocks.py‎
Lines changed: 63 additions & 0 deletions
diff --git a/‎apamodel/model.py‎
Lines changed: 122 additions & 0 deletions b/‎apamodel/model.py‎
Lines changed: 122 additions & 0 deletions
@@ -0,0 +1,51 @@
+# APA-Net
+
+APA-Net is a deep learning model designed for [brief description of the model's purpose or use case]. This guide covers the steps necessary to set up and run APA-Net.
+
+## Installation
+
+Before running APA-Net, ensure you have Python installed on your system. Clone this repository to your local machine:
+
+```bash
+git clone https://github.com/yourusername/APA-Net.git
+cd APA-Net
+
+conda env create -f environments/environment.yml
+conda activate apa-net-env
+
+pip install .
+
+```
+
+# Usage
+
+To train the APA-Net model, use the train_script.py script with the necessary command-line arguments:
+
+```bash
+python train_script.py \
+--train_data "/path/to/train_data.npy" \
+--train_seq "/path/to/train_seq.npy" \
+--valid_data "/path/to/valid_data.npy" \
+--valid_seq "/path/to/valid_seq.npy" \
+--profiles "/path/to/celltype_profiles.tsv" \
+--modelfile "/path/to/model_output.pt" \
+--batch_size 64 \
+--epochs 200 \
+--project_name "APA-Net_Training" \
+--device "cuda:1" \
+--use_wandb "True"
+```
+
+# Arguments
+- `--train_data`: Path to the training data file.
+- `--train_seq`: Path to the training sequence data file.
+- `--valid_data`: Path to the validation data file.
+- `--valid_seq`: Path to the validation sequence data file.
+- `--profiles`: Path to the cell type profiles file.
+- `--modelfile`: Path where the trained model will be saved.
+- `--batch_size`: Batch size for training (default: 64).
+- `--epochs`: Number of training epochs (default: 200).
+- `--project_name`: Name of the project for wandb logging.
+- `--device`: Device to run the training on (e.g., 'cuda:1').
+- `--use_wandb`: Flag to enable or disable wandb logging ('True' or 'False').
+
@@ -0,0 +1,63 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+class ConvBlock(nn.Module):
+    """
+    Convolutional Block for neural networks.
+    Args:
+        in_channel (int): Number of input channels.
+        out_channel (int): Number of output channels.
+        ...
+    """
+    def __init__(
+        self,
+        in_channel,
+        out_channel,
+        cnvks=1,
+        cnvst=1,
+        poolks=1,
+        poolst=1,
+        pdropout=0,
+        activation_t="none",
+    ):
+        super(ConvBlock, self).__init__()
+        activations = {
+            "ELU": nn.ELU(),
+            "LeakyReLU": nn.LeakyReLU(),
+            "none": nn.Identity(),
+        }
+        self.op = nn.Sequential(
+            nn.Conv1d(in_channel, out_channel, cnvks, cnvst, padding=cnvks // 2),
+            nn.BatchNorm1d(out_channel),
+            activations[activation_t],
+            nn.MaxPool1d(kernel_size=poolks, stride=poolst),
+            nn.Dropout(p=pdropout),
+        )
+
+    def forward(self, x):
+        return self.op(x)
+
+
+class FCBlock(nn.Module):
+    """
+    Fully Connected Block for neural networks.
+    Args:
+        layer_dims (list): Dimensions of layers.
+        dropouts (list): Dropout values for layers.
+        dropout (bool): Whether to apply dropout.
+    """
+    def __init__(self, layer_dims, dropouts, dropout=False):
+        super(FCBlock, self).__init__()
+        layers = []
+        for i in range(len(layer_dims) - 1):
+            layers.append(nn.Linear(layer_dims[i], layer_dims[i + 1]))
+            if i < len(layer_dims) - 2:
+                layers.append(nn.BatchNorm1d(num_features=layer_dims[i + 1]))
+                layers.append(nn.ReLU())
+                if dropout:
+                    layers.append(nn.Dropout(p=dropouts[i]))
+        self.op = nn.Sequential(*layers)
+
+    def forward(self, x):
+        return self.op(x)
@@ -0,0 +1,122 @@
+import torch
+import torch.nn as nn
+import torch.optim as optim
+from torch.utils.data import Dataset, DataLoader
+from blocks import ConvBlock, FCBlock
+import numpy as np
+
+RBP_COUNT = 279
+FIX_SEQ_LEN = 4000
+class APAData(Dataset):
+    """
+    APAData is a dataset class for APA-Net model.
+    Args:
+        seqs (Tensor): Sequences tensor.
+        df (DataFrame): Dataframe containing sample information.
+        ct (DataFrame): Cell type profiles.
+        device (str): Device to use (e.g., 'cuda' or 'cpu').
+    """
+    def __init__(self, seqs, df, ct, device):
+        self.device = device
+        self.reg_label = torch.from_numpy(np.array(df[:, 3].tolist(), dtype=np.float32)).to(device)
+        self.seq_idx = torch.from_numpy(np.array(df[:, 1].tolist(), dtype=np.int32)).to(device)
+        self.oneH_seqs = torch.from_numpy(np.array(list(seqs[:, 3]), dtype=np.int8)).to(device)
+        self.oneH_seq_indexes = torch.from_numpy(np.array(seqs[:, 0], dtype=np.int32)).to(device)
+        self.celltypes = df[:, 2]
+        self.ct_profiles = ct
+
+    def __len__(self):
+        return self.reg_label.shape[0]
+
+    def __getitem__(self, idx):
+        seq_idx = self.seq_idx[idx]
+        seq = self.oneH_seqs[torch.where(self.oneH_seq_indexes == seq_idx)].squeeze().type(torch.cuda.FloatTensor)
+        reg_label = self.reg_label[idx]
+        celltype_name = self.celltypes[idx]
+        celltype = torch.from_numpy(self.ct_profiles[celltype_name].values.astype(np.float32)).to(self.device)
+        return (seq, celltype, celltype_name, reg_label)
+
+
+class APANET(nn.Module):
+    """
+    APANET is a deep neural network for APA-Net.
+    Includes Convolutional, Attention, and Fully Connected blocks.
+    """
+    def __init__(self, config):
+        super(APANET, self).__init__()
+        self.config = config
+        self.device = config['device']
+        self._build_model()
+
+    def _build_model(self):
+        # Convolutional Block
+        self.conv_block_1 = ConvBlock(
+            in_channel=4,
+            out_channel=self.config['conv1kc'],
+            cnvks=self.config['conv1ks'],
+            cnvst=self.config['conv1st'],
+            poolks=self.config['pool1ks'],
+            poolst=self.config['pool1st'],
+            pdropout=self.config['cnvpdrop1'],
+            activation_t="ELU",
+        )
+        # Calculate output length after Convolution
+        cnv1_len = self._get_conv1d_out_length(FIX_SEQ_LEN, self.config['conv1ks'], self.config['conv1st'], self.config['pool1ks'], self.config['pool1st'])
+
+        # Attention Block
+        self.attention = nn.MultiheadAttention(
+            embed_dim=self.config['conv1kc'], 
+            num_heads=self.config['Matt_heads'], 
+            dropout=self.config['Matt_drop']
+        )
+
+        # Fully Connected Blocks
+        fc1_L1 = cnv1_len * self.config['conv1kc']
+        self.fc1 = FCBlock(
+            layer_dims=[fc1_L1, *self.config['fc1_dims']],
+            dropouts=self.config['fc1_dropouts'],
+            dropout=True,
+        )
+
+        fc2_L1 = self.config['fc1_dims'][-1] + RBP_COUNT
+        self.fc2 = FCBlock(
+            layer_dims=[fc2_L1, *self.config['fc2_dims']],
+            dropouts=self.config['fc2_dropouts'],
+            dropout=True,
+        )
+
+    def _get_conv1d_out_length(self, l_in, kernel, stride, pool_kernel, pool_stride):
+        """ Utility method to calculate output length of Conv1D layer. """
+        length_after_conv = (l_in + 2 * (kernel // 2) - 1 * (kernel - 1) - 1) // stride + 1
+        return (length_after_conv - pool_kernel) // pool_stride + 1
+
+    def forward(self, seq, celltype):
+        # Convolutional forward
+        x_conv = self.conv_block_1(seq)
+        x = x_conv.permute(2, 0, 1)  # reshape for attention block
+        x, _ = self.attention(x, x, x)
+        x = x.permute(1, 2, 0)  # reshape back
+        x = x + x_conv  # add residual connection
+        x = torch.flatten(x, 1)  # flatten for FC layers
+        x = self.fc1(x)  # FC block 1
+        x = torch.cat((x, celltype), 1)  # concat with celltype profile
+        x = self.fc2(x)  # FC block 2
+        return x
+
+    def compile(self):
+        """ Compile the model with optimizer and loss function. """
+        self.to(self.device)
+        if self.config['opt'] == "Adam":
+            self.optimizer = optim.AdamW(
+                self.parameters(),
+                weight_decay=self.config['adam_weight_decay'],
+                lr=self.config['lr']
+            )
+        if self.config['loss'] == "mse":
+            self.loss_fn = nn.MSELoss()
+
+    def save_model(self, filename):
+        torch.save(self.state_dict(), filename)
+
+    def load_model(self, filename):
+        self.load_state_dict(torch.load(filename))