complete the function necessary to broadcast token features back to atom features for the packed representation

lucidrains · lucidrains · commit 291f715a7a62 · 2024-05-24T08:23:24.000-07:00
diff --git a/alphafold3_pytorch/alphafold3.py b/alphafold3_pytorch/alphafold3.py
@@ -89,6 +89,20 @@ def inner(t, *args, **kwargs):
 
 # packed atom representation functions
 
+@typecheck
+def lens_to_mask(
+    lens: Int['b n'] | Int[' b']
+) -> Bool['b m']:
+
+    device = lens.device
+
+    if lens.ndim == 2:
+        lens = reduce(lens, 'b m -> b', 'sum')
+
+    max_len = lens.amax()
+    arange = torch.arange(max_len, device = device)
+    return einx.less('m, b -> b m', arange, lens)
+
 @typecheck
 def mean_pool_with_lens(
     feats: Float['b m d'],
@@ -115,6 +129,51 @@ def mean_pool_with_lens(
     avg = einx.where('b n, b n d, -> b n d', mask, avg, 0.)
     return avg
 
+@typecheck
+def repeat_consecutive_with_lens(
+    feats: Float['b n d'],
+    lens: Int['b n'],
+    max_length: int | None = None,
+    return_mask = False
+) -> Float['b m d'] | Tuple[Float['b m d'], Bool['b m']]:
+
+    device = feats.device
+
+    # derive arange from the max length
+
+    total_lens = reduce(lens, 'b n -> b', 'sum')
+
+    if not exists(max_length):
+        max_length = total_lens.amax()
+
+    arange = torch.arange(max_length, device = device)
+
+    # get packed atom mask from the total lengths
+
+    mask = lens_to_mask(total_lens)
+
+    lens = F.pad(lens, (1, 0), value = 0)
+    cumsum_lens = lens.cumsum(dim = -1)
+    left_index, right_index = cumsum_lens[:, :-1], cumsum_lens[:, 1:]
+
+    # derive the mask for consecutives per feat
+
+    left_mask = einx.greater_equal('m, b n -> b n m', arange, left_index)
+    right_mask = einx.less('m, b n -> b n m', arange, right_index)
+
+    consecutive_mask = left_mask & right_mask
+
+    # now broadcast and sum for consecutive features
+
+    feats = einx.multiply('b n d, b n m -> b n m d', feats, consecutive_mask.float())
+    feats = reduce(feats, 'b n m d -> b m d', 'sum')
+
+    if not return_mask:
+        return feats
+
+    mask = mask[:, :max_length]
+    return feats, mask
+
 # linear and outer sum
 # for single repr -> pairwise pattern throughout this architecture
 
diff --git a/tests/test_af3.py b/tests/test_af3.py
@@ -2,8 +2,6 @@
 os.environ['TYPECHECK'] = 'True'
 
 import torch
-from torch.nn.utils.rnn import pad_sequence
-
 import pytest
 
 from alphafold3_pytorch import (
@@ -27,7 +25,8 @@
 )
 
 from alphafold3_pytorch.alphafold3 import (
-    mean_pool_with_lens
+    mean_pool_with_lens,
+    repeat_consecutive_with_lens
 )
 
 def test_mean_pool_with_lens():
@@ -37,6 +36,13 @@ def test_mean_pool_with_lens():
 
     assert torch.allclose(pooled, torch.tensor([[[1.], [2.], [1.]]]))
 
+def test_repeat_consecutive_with_lens():
+    seq = torch.tensor([[[1.], [2.], [4.]]])
+    lens = torch.tensor([[3, 4, 2]]).long()
+    repeated = repeat_consecutive_with_lens(seq, lens)
+
+    assert torch.allclose(repeated, torch.tensor([[[1.], [1.], [1.], [2.], [2.], [2.], [2.], [4.], [4.]]]))
+
 def test_smooth_lddt_loss():
     pred_coords = torch.randn(2, 100, 3)
     true_coords = torch.randn(2, 100, 3)