deviating from paper, allow for atom and bond embeddings

lucidrains · lucidrains · commit 7a5a983e0736 · 2024-06-08T06:41:20.000-07:00
diff --git a/alphafold3_pytorch/alphafold3.py b/alphafold3_pytorch/alphafold3.py
@@ -2847,6 +2847,8 @@ def __init__(
         dim_single = 384,
         dim_pairwise = 128,
         dim_token = 768,
+        num_atom_embeds: int | None = None,
+        num_atompair_embeds: int | None = None,
         distance_bins: List[float] = torch.linspace(3, 20, 38).float().tolist(),
         ignore_index = -1,
         num_dist_bins: int | None = None,
@@ -2925,6 +2927,20 @@ def __init__(
     ):
         super().__init__()
 
+        # optional atom and atom bond embeddings
+
+        has_atom_embeds = exists(num_atom_embeds)
+        has_atompair_embeds = exists(num_atompair_embeds)
+
+        if has_atom_embeds:
+            self.atom_embeds = nn.Embedding(num_atom_embeds, dim_atom)
+
+        if has_atompair_embeds:
+            self.atompair_embeds = nn.Embedding(num_atompair_embeds, dim_atompair)
+
+        self.has_atom_embeds = has_atom_embeds
+        self.has_atompair_embeds = has_atompair_embeds
+
         # atoms per window
 
         self.atoms_per_window = atoms_per_window
@@ -3143,6 +3159,8 @@ def forward(
         atompair_inputs: Float['b m m dapi'] | Float['b nw w1 w2 dapi'],
         additional_molecule_feats: Float[f'b n {ADDITIONAL_MOLECULE_FEATS}'],
         molecule_atom_lens: Int['b n'],
+        atom_ids: Int['b m'] | None = None,
+        atompair_ids: Int['b m m'] | Int['b nw w1 w2'] | None = None,
         atom_mask: Bool['b m'] | None = None,
         token_bond: Bool['b n n'] | None = None,
         msa: Float['b s n d'] | None = None,
@@ -3217,6 +3235,23 @@ def forward(
             molecule_atom_lens = molecule_atom_lens
         )
 
+        # handle maybe atom and atompair embeddings
+
+        assert not (exists(atom_ids) ^ self.has_atom_embeds), 'you either set `num_atom_embeds` and did not pass in `atom_ids` or vice versa'
+        assert not (exists(atompair_ids) ^ self.has_atompair_embeds), 'you either set `num_atompair_embeds` and did not pass in `atompair_ids` or vice versa'
+
+        if self.has_atom_embeds:
+            atom_embeds = self.atom_embeds(atom_ids)
+            atom_feats = atom_feats + atom_embeds
+
+        if self.has_atompair_embeds:
+            atompair_embeds = self.atompair_embeds(atompair_ids)
+
+            if atompair_embeds.ndim == 4:
+                atompair_embeds = full_pairwise_repr_to_windowed(atompair_embeds, window_size = self.atoms_per_window)
+
+            atompair_feats = atompair_feats + atompair_embeds
+
         # relative positional encoding
 
         relative_position_encoding = self.relative_position_encoding(
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "alphafold3-pytorch"
-version = "0.1.42"
+version = "0.1.43"
 description = "Alphafold 3 - Pytorch"
 authors = [
     { name = "Phil Wang", email = "lucidrains@gmail.com" }
diff --git a/tests/test_af3.py b/tests/test_af3.py
@@ -635,6 +635,71 @@ def test_alphafold3_force_return_loss():
 
     assert loss == 0.
 
+def test_alphafold3_with_atom_and_bond_embeddings():
+    alphafold3 = Alphafold3(
+        num_atom_embeds = 7,
+        num_atompair_embeds = 3,
+        dim_atom_inputs = 77,
+        dim_template_feats = 44
+    )
+
+    # mock inputs
+
+    seq_len = 16
+
+    molecule_atom_lens = torch.randint(1, 3, (2, seq_len))
+    atom_seq_len = molecule_atom_lens.sum(dim = -1).amax()
+
+    atom_ids = torch.randint(0, 7, (2, atom_seq_len))
+    atompair_ids = torch.randint(0, 3, (2, atom_seq_len, atom_seq_len))
+
+    atom_inputs = torch.randn(2, atom_seq_len, 77)
+    atompair_inputs = torch.randn(2, atom_seq_len, atom_seq_len, 5)
+
+    additional_molecule_feats = torch.randn(2, seq_len, 10)
+
+    template_feats = torch.randn(2, 2, seq_len, seq_len, 44)
+    template_mask = torch.ones((2, 2)).bool()
+
+    msa = torch.randn(2, 7, seq_len, 64)
+    msa_mask = torch.ones((2, 7)).bool()
+
+    # required for training, but omitted on inference
+
+    atom_pos = torch.randn(2, atom_seq_len, 3)
+    molecule_atom_indices = molecule_atom_lens - 1 # last atom, as an example
+
+    distance_labels = torch.randint(0, 37, (2, seq_len, seq_len))
+    pae_labels = torch.randint(0, 64, (2, seq_len, seq_len))
+    pde_labels = torch.randint(0, 64, (2, seq_len, seq_len))
+    plddt_labels = torch.randint(0, 50, (2, seq_len))
+    resolved_labels = torch.randint(0, 2, (2, seq_len))
+
+    # train
+
+    loss = alphafold3(
+        num_recycling_steps = 2,
+        atom_ids = atom_ids,
+        atompair_ids = atompair_ids,
+        atom_inputs = atom_inputs,
+        atompair_inputs = atompair_inputs,
+        molecule_atom_lens = molecule_atom_lens,
+        additional_molecule_feats = additional_molecule_feats,
+        msa = msa,
+        msa_mask = msa_mask,
+        templates = template_feats,
+        template_mask = template_mask,
+        atom_pos = atom_pos,
+        molecule_atom_indices = molecule_atom_indices,
+        distance_labels = distance_labels,
+        pae_labels = pae_labels,
+        pde_labels = pde_labels,
+        plddt_labels = plddt_labels,
+        resolved_labels = resolved_labels
+    )
+
+    assert loss.numel() == 0
+
 # test creation from config
 
 def test_alphafold3_config():